In [1]:
import os
import json
import pandas as pd
from dataclasses import asdict

from datetime import datetime
from utils.config import DRLConfig
from utils.drl_train import training_pipeline

%load_ext autoreload
%autoreload 2

In [2]:
# SnP500 Sector
# DATA_DIR = "../data/snp_new"
# RETURNS_PATH = os.path.join(DATA_DIR, "returns_1d.parquet")
# PRICES_PATH = os.path.join(DATA_DIR, "prices_1d.parquet")
# VOLA_PATH = os.path.join(DATA_DIR, "vola_1d.parquet")

# MSCI World Index
DATA_DIR = "../data/msci"
RETURNS_PATH = os.path.join(DATA_DIR, "returns_1d.parquet")
PRICES_PATH = os.path.join(DATA_DIR, "prices_1d.parquet")
VOLA_PATH = os.path.join(DATA_DIR, "vola_1d.parquet")

df_ret = pd.read_parquet(RETURNS_PATH)
df_prices = pd.read_parquet(PRICES_PATH)
df_vol = pd.read_parquet(VOLA_PATH)

In [3]:
# To view the logs:
# 1. Open a terminal or command prompt.
# 2. Navigate to the directory *containing* the `logs` directory (i.e., the root of this repository).
# 3. Run the command: `tensorboard --logdir logs/`
# 4. Open the URL provided by TensorBoard (usually http://localhost:6006/) in your web browser.

# Create timestamp for this run
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

N_WINDOWS = 10  # 10 in paper
N_AGENTS = 5  # 5 in paper
START_YR = 2006  # 2006 in paper
TOTAL_STEPS = 7_500_000 # 7_500_000 in paper
SEED_POLICY = False
FOLDER_NAME = f"{timestamp}_train_start={START_YR}_best_seed={SEED_POLICY}"

# Create configuration
config = DRLConfig(
    # Window configuration
    n_windows=N_WINDOWS,
    agents_per_window=N_AGENTS,
    base_start_year=START_YR,
    seed_policy=SEED_POLICY,
    # Environment parameters
    env_window_size=60,
    transaction_cost=0.0,
    initial_balance=100_000,
    reward_scaling=1.0,
    eta_dsr=1 / 252,
    # Training parameters
    n_envs=10,
    total_timesteps_per_round=TOTAL_STEPS,
    n_steps_per_env=252 * 3,
    batch_size=1260,
    n_epochs=16,
    gamma=0.9,
    gae_lambda=0.9,
    clip_range=0.25,
    log_std_init=-1.0,
    # Learning rate parameters
    initial_lr=3e-4,
    final_lr=1e-5,
    # Paths
    data_dir=DATA_DIR,
    model_save_dir=f"../models/{FOLDER_NAME}",
    tensorboard_log_dir=f"../logs/{FOLDER_NAME}",
    # prev_best_model_dir="../models/full_random_run/agent_6-1_seed=25_test=2017_valrew=43.20.zip",
)

config_dict = asdict(config)
config_json_path = os.path.join(config.model_save_dir, f"config_{timestamp}.json")
os.makedirs(config.model_save_dir, exist_ok=True)
with open(config_json_path, "w") as f:
    json.dump(config_dict, f, indent=4)
print(f"\nConfiguration saved to: {config_json_path}")


Configuration saved to: ../models/20250702_205602_train_start=2013_best_seed=False/config_20250702_205602.json


In [4]:
# Run training pipeline
results_df, backtest_portfolio = training_pipeline(
    drl_config=config, df_prices=df_prices, df_ret=df_ret, df_vol=df_vol
)

--- Starting Window 1/1 (Train Year Start: 2013) ---
  Train Period: 2013-01-01 to 2017-12-31
  Val Period  : 2018-01-01 to 2018-12-31
  Test Period : 2019-01-01 to 2019-12-31
  Starting with fresh random initialization
  Training Agent 1/2 with seed 38531...


Output()


Training complete. Trained for 3500000 timesteps.
TensorBoard logs for experiment 'PPO_Seed=38531' saved in directory: ../logs/20250702_205602_train_start=2013_best_seed=False
    Evaluating agent on validation set...
    Validation Reward: -44.13468237
    Agent saved to: ../models/20250702_205602_train_start=2013_best_seed=False/agent_1-1_seed=38531_test=2019_valrew=-44.13.zip

  Training Agent 2/2 with seed 27845...


Output()


Training complete. Trained for 3500000 timesteps.
TensorBoard logs for experiment 'PPO_Seed=27845' saved in directory: ../logs/20250702_205602_train_start=2013_best_seed=False
    Evaluating agent on validation set...
    Validation Reward: -46.08051953
    Agent saved to: ../models/20250702_205602_train_start=2013_best_seed=False/agent_1-2_seed=27845_test=2019_valrew=-46.08.zip

best_agent_path: ../models/20250702_205602_train_start=2013_best_seed=False/agent_1-1_seed=38531_test=2019_valrew=-44.13.zip
    Running backtest evaluation...

Saving backtest portfolio: Portfolio with 8 assets, initial value: $100,000.00, current value: $127,042.55



In [5]:
print("Final Results DataFrame:")
results_df

Final Results DataFrame:


Unnamed: 0,window,best_agent_path,n_eval_episodes,val_reward,std_reward,Annual return,Cumulative returns,Annual volatility,Sharpe ratio,Calmar ratio,...,Omega ratio,Sortino ratio,Skew,Kurtosis,Tail ratio,Daily value at risk (95%),Avg Annual Turnover (in %),final_portfolio_value_first_episode,val_reward_mean,val_reward_std
0,1,agent_1-1_seed=38531_test=2019_valrew=-44,1,11.832245,0.0,0.27286,0.270425,0.112971,2.193221,3.777494,...,1.453871,2.86022,-0.426346,2.56769,1.006886,-0.011112,50.656707,127043,-45.107601,0.972919
