In [10]:
import os
import json
import pandas as pd

from datetime import datetime
from utils.config import DRLConfig
from utils.drl_train import training_pipeline

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
DATA_DIR = "../data/snp_new"
RETURNS_PATH = os.path.join(DATA_DIR, "returns_1d.parquet")
PRICES_PATH = os.path.join(DATA_DIR, "prices_1d.parquet")
VOLA_PATH = os.path.join(DATA_DIR, "vola_1d.parquet")

df_ret = pd.read_parquet(RETURNS_PATH)
df_prices = pd.read_parquet(PRICES_PATH)
df_vol = pd.read_parquet(VOLA_PATH)

In [12]:
# To view the logs:
# 1. Open a terminal or command prompt.
# 2. Navigate to the directory *containing* the `logs` directory (i.e., the root of this repository).
# 3. Run the command: `tensorboard --logdir logs/`
# 4. Open the URL provided by TensorBoard (usually http://localhost:6006/) in your web browser.

# Create timestamp for this run
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# Create configuration
config = DRLConfig(
    # Window configuration
    n_windows=2,  # 10 in paper
    agents_per_window=2,  # 5 in paper
    base_start_year=2006,
    use_previous_best_seed=False,

    # Environment parameters
    env_window_size=60,
    transaction_cost=0.0,
    initial_balance=100_000,
    reward_scaling=1.0,
    eta_dsr=1 / 252,

    # Training parameters
    n_envs=10,
    total_timesteps_per_round=7_500_000,  # 7_500_000 in paper
    n_steps_per_env=252 * 3,
    batch_size=1260,
    n_epochs=16,
    gamma=0.9,
    gae_lambda=0.9,
    clip_range=0.25,
    log_std_init=-1.0,

    # Learning rate parameters
    initial_lr=3e-4,
    final_lr=1e-5,
    
    # Paths
    model_save_dir=f"../models/{timestamp}",
    tensorboard_log_dir=f"../logs/{timestamp}",
)

In [13]:
# Save config as JSON
from dataclasses import asdict
config_dict = asdict(config)
config_json_path = os.path.join(config.model_save_dir, f"config_{timestamp}.json")
os.makedirs(config.model_save_dir, exist_ok=True)
with open(config_json_path, 'w') as f:
    json.dump(config_dict, f, indent=4)
print(f"\nConfiguration saved to: {config_json_path}")


Configuration saved to: ../models/20250629_183415/config_20250629_183415.json


In [14]:
# Run training pipeline
results, backtest_portfolio = training_pipeline(
    drl_config=config, df_prices=df_prices, df_ret=df_ret, df_vol=df_vol
)

--- Starting Window 1/2 (Train Year Start: 2006) ---
  Train Period: 2006-01-01 to 2010-12-31
  Val Period  : 2011-01-01 to 2011-12-31
  Test Period : 2012-01-01 to 2012-12-31
  Starting with fresh random initialization
  Training Agent 1/2 with seed 0...


Output()

    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs for experiment 'PPO_Seed0' saved in directory: ../logs/20250629_183415
    Evaluating agent on validation set...
    Validation Mean Reward: -10.1788
    Agent saved to: ../models/20250629_183415/agent_seed0_valrew-10.18.zip
  Training Agent 2/2 with seed 1...


Output()

    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs for experiment 'PPO_Seed1' saved in directory: ../logs/20250629_183415
    Evaluating agent on validation set...
    Validation Mean Reward: -9.6778
    Agent saved to: ../models/20250629_183415/agent_seed1_valrew-9.68.zip
    Running backtest evaluation...
--- Starting Window 2/2 (Train Year Start: 2007) ---
  Train Period: 2007-01-01 to 2011-12-31
  Val Period  : 2012-01-01 to 2012-12-31
  Test Period : 2013-01-01 to 2013-12-31
  Starting with fresh random initialization
  Training Agent 1/2 with seed 2...


Output()

    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs for experiment 'PPO_Seed2' saved in directory: ../logs/20250629_183415
    Evaluating agent on validation set...
    Validation Mean Reward: -24.9470
    Agent saved to: ../models/20250629_183415/agent_seed2_valrew-24.95.zip
  Training Agent 2/2 with seed 3...


Output()

    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs for experiment 'PPO_Seed3' saved in directory: ../logs/20250629_183415
    Evaluating agent on validation set...
    Validation Mean Reward: -23.0979
    Agent saved to: ../models/20250629_183415/agent_seed3_valrew-23.10.zip
    Running backtest evaluation...


In [17]:
for idx, p in backtest_portfolio.items():
    fname = f"{idx}_portfolio.csv"
    p.get_history().to_csv(os.path.join(config.model_save_dir, fname))

In [18]:
results_filename = f"backtest_results_summary_{timestamp}.csv"
results_save_path = os.path.join(config.model_save_dir, results_filename)

results_df = pd.DataFrame(results)
results_df.to_csv(results_save_path, index=False)
print(f"\nBacktest results summary saved to: {results_save_path}")
print("\nFinal Results DataFrame:")
results_df.head()


Backtest results summary saved to: ../models/20250629_183415/backtest_results_summary_20250629_183415.csv

Final Results DataFrame:


Unnamed: 0,window,best_agent_path,n_eval_episodes,mean_reward,std_reward,Annual return,Cumulative returns,Annual volatility,Sharpe ratio,Calmar ratio,Stability,Max drawdown,Omega ratio,Sortino ratio,Skew,Kurtosis,Tail ratio,Daily value at risk (95%),Portfolio turnover (in %),final_portfolio_value_first_episode
0,1,agent_seed1_valrew-9,1,-20.386796,0.0,0.087672,0.086222,0.100542,0.886285,1.233339,0.908643,-0.071085,1.160334,1.34381,-0.105075,0.645236,1.150395,-0.009774,43.623225,108622
1,2,agent_seed3_valrew-23,1,5.568448,0.0,0.217487,0.215587,0.086533,2.318357,5.084843,0.920359,-0.042772,1.451697,3.26902,-0.507074,0.837528,0.833654,-0.009975,42.000596,121559
