In [1]:
import gym
from gym import spaces

import warnings
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random

from stable_baselines.common.policies import MlpPolicy
from stable_baselines.deepq.policies import MlpPolicy as DQN_MlpPolicy
from stable_baselines import A2C, PPO2, DQN
from stable_baselines.common.vec_env import DummyVecEnv, SubprocVecEnv

from sklearn.model_selection import ParameterGrid

from aequam_env import AequamEnv
from fpdf import FPDF
from backtest_utils import *

In [2]:
gmail_user = 'dorian.lagadec@aequamcapital.com'  
gmail_password = 'Aequaminternship0#'

In [3]:
def rescale(df, start_int = 0, base = 100):
    return(df/np.array(df.iloc[start_int,:])*base)

In [4]:
df_obs_1 = pd.read_csv('data/dataset1.csv', index_col = 0, parse_dates=True, usecols = lambda x: x not in ['Equi_weighted'])
df_obs_2 = pd.read_csv('data/dataset2.csv', index_col = 0, parse_dates=True, usecols = lambda x: x not in ['Equi_weighted'])
df_obs_3 = pd.read_csv('data/dataset3.csv', index_col = 0, parse_dates=True, usecols = lambda x: x not in ['Equi_weighted'])
df_obs_4 = pd.read_csv('data/dataset4.csv', index_col = 0, parse_dates=True, usecols = lambda x: x not in ['Equi_weighted'])
df_obs_5 = pd.read_csv('data/dataset5.csv', index_col = 0, parse_dates=True, usecols = lambda x: x not in ['Equi_weighted'])
df_obs_6 = pd.read_csv('data/dataset6.csv', index_col = 0, parse_dates=True, usecols = lambda x: x not in ['Equi_weighted'])
df_obs_7 = pd.read_csv('data/dataset7.csv', index_col = 0, parse_dates=True, usecols = lambda x: x not in ['Equi_weighted'])

def propagate_index(from_df, to_df):
    return(to_df.loc[from_df.index,:])

df_prices_base = pd.read_csv('data/test.csv', index_col = 0, parse_dates=True, usecols = [i for i in range(7)])

df_prices_all = df_prices_base.copy()
df_prices_on_off = pd.DataFrame(df_prices_base.mean(axis=1), columns = ['Equally_weighted'])
df_prices_modes = pd.concat([df_prices_base[['Value','Momentum','Carry']].mean(axis=1), \
                            df_prices_on_off,\
                            df_prices_base[['Quality','Profitability','Size']].mean(axis=1) ], axis=1)
df_prices_modes.columns = ['Offensive','Equally_weighted','Defensive']

def df_obs_and_prices(num, asset):
    if num == 1:
        df_obs = df_obs_1
    elif num == 2:
        df_obs = df_obs_2
    elif num == 3:
        df_obs = df_obs_3
    elif num == 4:
        df_obs = df_obs_4
    elif num == 5:
        df_obs = df_obs_5
    elif num == 6:
        df_obs = df_obs_6
    else:
        df_obs = df_obs_7
    
    
    if asset == 'all':
        df_prices = df_prices_all
    elif asset == 'on_off':
        df_prices = df_prices_on_off
    else:
        df_prices = df_prices_modes
    return(df_obs, df_prices)

In [5]:
n_filter = 5
# n_episodes = 100
lookback_window = 20
reward_type = 'delayed'
total_timesteps = 2000
transaction_smoothing = 2
algo = 'ppo2'
num_df = 5
asset = 'modes'
random_proportion = 1
prefix = '_'
print_report=True

In [6]:
def wrapper(n_filter = n_filter, lookback_window = lookback_window, reward_type = reward_type, \
            total_timesteps = total_timesteps, transaction_smoothing = transaction_smoothing, \
            algo = algo, num_df = num_df, asset = asset, \
            random_proportion = random_proportion, prefix = prefix, print_report=print_report):
    
    if random.random() < random_proportion :
        
        df_obs, df_prices = df_obs_and_prices(num_df, asset)
        df_prices = propagate_index(from_df=df_obs, to_df=df_prices_all)
        
        env = DummyVecEnv([lambda: AequamEnv(df_obs.iloc[::n_filter], df_prices.iloc[::n_filter],\
                                             lookback_window = lookback_window, reward_type = reward_type,\
                                             transaction_smoothing = transaction_smoothing)])

        if algo == 'dqn':   
            model = DQN(DQN_MlpPolicy, env, param_noise=True, verbose=0, tensorboard_log='tmp/')
        elif algo == 'ppo2' :   
            model = PPO2(MlpPolicy, env, verbose=0, tensorboard_log='tmp/')
            total_timesteps *= 10
        else:
            print(1/0)

        model.learn(total_timesteps=total_timesteps)

        env.envs[0].play_last_episode(model)

        if print_report:
            env.envs[0].print_pdf_report(filename(prefix, n_filter, lookback_window, reward_type, total_timesteps,\
                                                 transaction_smoothing, algo, num_df, asset))
        send_report(gmail_user, gmail_password)

    return('Done')

vectorized_wrapper = np.vectorize(wrapper)

In [7]:
grid = ParameterGrid({"n_filter": [1, 5, 10, 20],
                      "lookback_window": [5, 10, 20, 30],
                      "reward_type": ['delayed', 'daily'],
                      "total_timesteps": [1000, 10000, 50000, 100000, 500000],
                      "transaction_smoothing": [1, 2, 5, 10, 20],
                      "algo": ['dqn','ppo2'],
                      "num_df": [1, 2, 3, 4, 5, 6, 7],
                      "asset": ['all','on_off','modes']})
grid_df = pd.DataFrame(grid)

random_proportion = 1
prefix = 'wrapper_test'

In [None]:
warnings.filterwarnings('ignore')


vectorized_wrapper(n_filter = grid_df['n_filter'].values, lookback_window = grid_df['lookback_window'].values,\
                   reward_type = grid_df['reward_type'].values, \
            total_timesteps = grid_df['total_timesteps'].values, transaction_smoothing = grid_df['transaction_smoothing'].values, \
            algo = grid_df['algo'].values, num_df = grid_df['num_df'].values, asset = grid_df['asset'].values, \
            random_proportion = random_proportion, prefix = prefix, print_report=print_report)

W0904 18:26:50.579475 36804 deprecation_wrapper.py:119] From C:\Users\Avisia\Anaconda3\envs\env_dorian\lib\site-packages\stable_baselines\common\tf_util.py:98: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.

W0904 18:26:50.582468 36804 deprecation_wrapper.py:119] From C:\Users\Avisia\Anaconda3\envs\env_dorian\lib\site-packages\stable_baselines\common\tf_util.py:107: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.

W0904 18:26:50.586459 36804 deprecation_wrapper.py:119] From C:\Users\Avisia\Anaconda3\envs\env_dorian\lib\site-packages\stable_baselines\deepq\dqn.py:123: The name tf.train.AdamOptimizer is deprecated. Please use tf.compat.v1.train.AdamOptimizer instead.

W0904 18:26:50.587455 36804 deprecation_wrapper.py:119] From C:\Users\Avisia\Anaconda3\envs\env_dorian\lib\site-packages\stable_baselines\deepq\build_graph.py:358: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.

W0904

Email sent!
Email sent!
Email sent!
Email sent!


In [None]:
grid_df

In [None]:
# warnings.filterwarnings('ignore')

# wrapper(total_timesteps=100)

In [None]:
# obs = env.reset()
# for i in range(env.envs[0].total_window -env.envs[0].lookback_window):
#     action, _states = model.predict(obs, deterministic=False)
#     obs, rewards, done, info = env.step(action)

In [None]:
# env.envs[0].play_last_episode(model)

In [None]:
# env.envs[0].play_last_episode(model)

# env.envs[0].print_pdf_report()

In [None]:
# env.reset()
# for i in range(len(df_obs.iloc[::n_filter]) -lookback_window-1):
# #     i += 1
# #     print(i)
#     action, _states = model.predict(obs)
#     obs, rewards, done, info = env.step(action)

In [None]:
# env.envs[0].plot_last_episode(save=False)

In [None]:
# env.envs[0].plot_positions(save=False)

In [None]:
env.envs[0].df_render.tail()