In [1]:
import os
os.chdir('..')

# Import Files

In [2]:
from src.agent_interface import *
from src.agent import *
from src.env import *
from src.price_data import *
from src.prior_measure import *
from src.q import *
from src.robust import *
from src.util import *
from src.train import *

# Initialize Environment

In [3]:
start_date = '1995-01-01'
end_date = '2023-12-31'
batch_size = 128

stock_params = {
    'symbol': '^SPX',
    'start_date': start_date,
    'end_date': end_date
}

env_params = {
    'start_date': start_date,
    'end_date': end_date,
    'rf_rate': 0.024,
    'trans_cost': 0.0025,
    'batch_size': batch_size,
    'logging': True,
    'seed': 42
}

In [None]:
yf = YahooFinance(**stock_params)
df = yf.pipeline()
asset_log_returns = df['log_return'].dropna().to_numpy()
env = PortfolioEnv(asset_log_returns=asset_log_returns, **env_params)

# Initialize Agent

In [None]:
device = torch.device("cuda")

#Shared params
n_updates = 1
state_dim = 63
action_dim = env.action_values.shape[0]
n_epochs = 5

training_controller_params = {
    'train_steps': 1,
    'clone_steps': 50,
    'batch_size': batch_size,
    'n_batches': n_updates,
}

duality_params = {
    'discount_rate': 0.99,
    'delta': 1e-4,
    'sinkhorn_radius': 3e-3,
}

q_params = {
    'input_size': state_dim,
    'hidden_size': [64, 64],
    'output_size': action_dim
}

dqn_params = {
    'state_dim': state_dim,
    'action_dim': action_dim,
    'batch_size': batch_size,
    'n_updates': n_updates,
    "network_lr": 1e-4,
    'hq_lr': 0.02,
    'clip_gradients': False,
    'seed': 123
}

eps_scheduler_params = {
    'epsilon_start': 0.9,
    'total_timesteps': env.action_steps*n_epochs,
    'epsilon_min': 0.1
}

other_params = {
    "model_name": "PORDQN_SPX",
    "n_epochs": n_epochs
}

In [None]:
training_controller = TrainingController(**training_controller_params)
prior_measure = PriorStudentDistribution(device=device)
duality_operator = DualityHQOperator(**duality_params)
q = QFunc(**q_params)
writer = PORDQNProgressWriter(other_params['model_name'], overwrite_existing_checkpoint_file=True)
eps_scheduler = EpsilonGlobalScheduler(**eps_scheduler_params)
agent = PORDQN(action_values=env.action_values, training_controller=training_controller, prior_measure=prior_measure, duality_operator=duality_operator,
               epsilon_scheduler=eps_scheduler, qfunc=q, writer=writer, device=device, **dqn_params)

In [None]:
config_path = f"./runs/{other_params['model_name']}/config.json"
params = {
    "stock_params": stock_params,
    "env_params": env_params, 
    "training_controller_params":training_controller_params,
    "duality_params": duality_params,
    "q_params": q_params,
    "dqn_params":dqn_params,
    "eps_scheduler_params": eps_scheduler_params,
    "other_params": other_params
}
Config().download_config(params, config_path)

# Training

In [None]:
all_cum_rewards = train_agent(env, agent, 1, other_params['n_epochs'], writer=writer)

Episode 1: 100%|██████████| 693/693 [08:56<00:00,  1.29it/s]


Episode 1 mean of summed rewards: -54.9659%


Episode 2:  36%|███▌      | 247/693 [03:29<06:17,  1.18it/s]


KeyboardInterrupt: 