In [1]:
import os
os.chdir('..')

# Import Files

In [None]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
import pickle
import pandas as pd
import pandas_market_calendars as mcal
from tqdm import tqdm
import torch
torch.set_float32_matmul_precision('high')
from gymnasium import spaces
from mmd.env import GenLSTM, MMDSimulator, load_generator
from mmd.train import start_writer, get_params_from_events, get_params_dicts, get_robustq_params_dicts, train_robustdqn, training_info
from mmd.evaluation import simulate_agent_spx, generate_graph
from agent.q import QFunc
from agent.DQN import PORDQN

In [3]:
with open('./dataset/lstm/ma_params.pkl', 'rb') as f:
    ma_model_params = pickle.load(f)

In [4]:
events_path = './dataset/lstm/'
params = get_params_from_events(events_path)
for key, value in params.items():
    for key, value in value.items():
        if key in globals(): continue # skip if already in globals
        globals()[key] = value
data_params, model_params, train_params = get_params_dicts(vars().copy())

# Parameters

In [5]:
seed = 0 # results in the paper used 0, 1, 2, 3, 4

total_length = 560
burn_in = 500
state_len = 60
cal_start_date = '1995-01-01'
cal_end_date = '2007-12-31'
trading_calendar = 'NYSE'
calendar = mcal.get_calendar(trading_calendar)
schedule = calendar.schedule(start_date=cal_start_date, end_date=cal_end_date)

int_rate = 0.024
trans_cost = 0.0005 # standard cost = 0.0005
eval_batch_size = 1000
eval_seed = 12345

In [6]:
generator = GenLSTM(4, 1, 60)
generator = load_generator(generator, events_path)

# LSTM Environment

In [7]:
torch.manual_seed(seed)
batch_size = 8
device = 'cuda' if torch.cuda.is_available() else 'cpu'
action_space = spaces.Discrete(9)
action_values = torch.linspace(-1., 1., 9, device=device)
num_actions = len(action_values)
nu_dist = 't'
nu_scale = 0.03
nu_df = 2
other_state_vars = ['log_wealth', 'positions', 'dt']
obs_dim = state_len + len(other_state_vars)

In [None]:
# RUN IF TRAINING FROM SCRATCH
discount = 0.99
eps_greedy = 0.1 # epsilon greedy parameter
buffer_max_length = int(1e5)
clone_steps = 50
train_steps = 1
agent_batch_size = 128
n_batches = 1
n_epochs = 1
robustq_lr = 1e-4
architecture = [64, 64]
pre_train_Q = False
n_episodes = 3

robustq = QFunc(state_len+len(other_state_vars), architecture, action_values.shape[0]).to(device)

delta = 1e-4 # regularisation parameter for Sinkhorn distance
epsilon = 0.003 # Sinkhorn distance
norm_ord = 1
lamda_init = 0. # initial lambda
lamda_max_iter = 100
lamda_step_size = 10 # step size for learning rate scheduler
lamda_gamma = 10 # gamma for learning rate scheduler
lamda_lr = 0.02 # learning rate for lambda
n_outer = 1 # not used in this algorithm but used in logging by writer
n_inner = 1000 # number of samples from nu to calc inner expectations

simulator_params, model_params = get_robustq_params_dicts(vars().copy())
writer = start_writer(simulator_params, model_params, model_name='PORDQN')

In [1]:
# orig_env = MMDSimulator(generator, ma_model_params, trading_calendar, cal_start_date, cal_end_date, state_len, burn_in,int_rate, trans_cost, batch_size, action_space, action_values, device)
# robustdqn_agent = PORDQN(obs_dim, num_actions, discount, nu_scale, nu_df, action_values, epsilon, delta, n_inner, lamda_init,lamda_lr, lamda_max_iter, lamda_step_size, lamda_gamma, norm_ord, robustq, eps_greedy, buffer_max_length, clone_steps, train_steps, agent_batch_size, n_batches, n_epochs, robustq_lr, device=device, seed=seed, writer=writer)
# robustdqn_agent = train_robustdqn(robustdqn_agent, orig_env, writer, simulator_params, model_params)

# Bootstrap Environment

In [None]:
# Initialize Environment
spx_file_loc = './dataset/spx.csv'
spx_df = pd.read_csv(spx_file_loc)

from src.new_env import PortfolioEnv

env = PortfolioEnv(spx_df['log_return'], batch_size=8)
robustdqn_agent = PORDQN(obs_dim, num_actions, discount, nu_scale, nu_df, action_values, epsilon, delta, n_inner, lamda_init,lamda_lr, lamda_max_iter, lamda_step_size, lamda_gamma, norm_ord, robustq, eps_greedy, buffer_max_length, clone_steps, train_steps, agent_batch_size, n_batches, n_epochs, robustq_lr, device=device, seed=seed, writer=writer)
robustdqn_agent = train_robustdqn(robustdqn_agent, env, writer, simulator_params, model_params)

Episode 1: 100%|██████████| 2712/2712 [07:34<00:00,  5.97it/s]


Episode 1 mean of summed rewards: -0.263


Episode 2: 100%|██████████| 2712/2712 [08:49<00:00,  5.12it/s]


Episode 2 mean of summed rewards: -0.079


In [None]:
new_env_output=env.env_step_check
new_env_output

{'next_state': tensor([[ 2.6206e-03,  1.1571e-03, -5.5451e-03,  6.7089e-03,  4.7473e-03,
          -6.6281e-03,  5.3968e-03, -3.7764e-03,  1.7316e-03,  4.5625e-03,
           5.5875e-03, -7.7000e-03,  9.4749e-04,  7.0741e-03,  7.3924e-04,
          -7.4006e-03, -7.6677e-03, -2.6862e-03,  6.7223e-03, -1.8289e-03,
           7.0371e-03, -6.0125e-03,  2.8244e-03, -1.1846e-02,  7.3785e-04,
          -1.0003e-03,  5.6582e-04,  1.6548e-03, -3.3944e-03, -6.5920e-03,
           2.2957e-03, -6.0146e-03,  5.9404e-03, -3.1974e-03,  9.8158e-03,
           1.0320e-03, -2.9267e-03,  1.2540e-02,  2.4051e-03, -3.8006e-03,
           7.9332e-03, -7.4126e-04, -7.5737e-03, -3.2867e-03,  4.6434e-04,
           8.2576e-03, -5.5814e-03, -7.8945e-03, -9.1630e-03,  3.6457e-03,
           5.5150e-04,  2.7970e-04,  2.4702e-05,  1.0113e-03,  8.8278e-03,
           9.2002e-04, -1.7187e-03, -1.0020e-02, -1.4999e-02, -4.1041e-03,
          -1.1254e-02,  0.0000e+00,  8.2192e-03],
         [ 2.5628e-03, -9.9503e-04, 

# Comparison

In [None]:
info = 'next_state'
print(f"ORIGINAL: {orig_env_output[info].shape}\nNEW: {new_env_output[info].shape}")

ORIGINAL: torch.Size([8, 63])
NEW: torch.Size([8, 63])


In [None]:
info = 'done'
print(f"ORIGINAL: {orig_env_output[info].shape}\nNEW: {new_env_output[info].shape}")

ORIGINAL: torch.Size([8, 1])
NEW: torch.Size([8, 1])


In [None]:
info = 'info'
print(f"ORIGINAL: {type(orig_env_output[info])}\nNEW: {type(new_env_output[info])}")
print(f"ORIGINAL: {orig_env_output[info][0].shape}\nNEW: {new_env_output[info][0].shape}")

ORIGINAL: <class 'tuple'>
NEW: <class 'tuple'>
ORIGINAL: torch.Size([8])
NEW: torch.Size([8])
