In [1]:
import pandas as pd
import numpy as np
import torch # Ensure torch is imported for policy_kwargs if needed by DRLAgentJules
import os
from datetime import datetime

# Assuming utils are in parent directory or PYTHONPATH is set
from utils.portfolio_env import PortfolioEnv 
from utils.drl_agent_jules import DRLAgent # Import the modified agent

# For learning rate schedule
from typing import Callable

# %load_ext autoreload
# %autoreload 2

In [2]:
# --- Main Configuration ---
N_WINDOWS = 10
AGENTS_PER_WINDOW = 5 
BASE_START_YEAR = 2006

# Data paths
PRICE_DATA_PATH = "../data/prices.parquet"
RETURNS_DATA_PATH = "../data/returns.parquet"
VOLA_DATA_PATH = "../data/vola.parquet"
MODEL_SAVE_DIR = "../models/sliding_window_jules/"

# Ensure model save directory exists
os.makedirs(MODEL_SAVE_DIR, exist_ok=True)

# --- DRL Agent Hyperparameters (from paper) ---
N_ENVS = 10
TOTAL_TIMESTEPS_PER_ROUND = 7_500_000 # 7.5M
N_STEPS_PER_ENV = 252 * 3 # n_steps = 252 * 3 * n_envs (this is per env for PPO buffer)
# total buffer size before update = N_STEPS_PER_ENV * N_ENVS

BATCH_SIZE = 1260
N_EPOCHS = 16
GAMMA = 0.9
GAE_LAMBDA = 0.9
CLIP_RANGE = 0.25
LOG_STD_INIT = -1.0
POLICY_KWARGS = dict(
    activation_fn=torch.nn.Tanh,
    net_arch=[64, 64], # Shared layers for policy and value networks
    log_std_init=LOG_STD_INIT
)

# Learning rate schedule: linear decay from 3e-4 to 1e-5
INITIAL_LR = 3e-4
FINAL_LR = 1e-5

def linear_schedule(initial_value: float, final_value: float) -> Callable[[float], float]:
    """
    Linear learning rate schedule.
    :param initial_value: Initial learning rate.
    :param final_value: Final learning rate.
    :return: schedule that computes current learning rate depending on progress remaining (1.0 -> 0.0)
    """
    def func(progress_remaining: float) -> float:
        """
        Progress will decrease from 1.0 to 0.0
        """
        return final_value + progress_remaining * (initial_value - final_value)
    return func

LEARNING_RATE_SCHEDULE = linear_schedule(INITIAL_LR, FINAL_LR)

# --- PortfolioEnv Parameters ---
ENV_WINDOW_SIZE = 60 # Lookback window for features in PortfolioEnv
TRANSACTION_COST = 0.0 # As per paper (or can be adjusted)
INITIAL_BALANCE = 100_000
REWARD_SCALING = 1.0
ETA_DSR = 1 / 252 # For Differential Sharpe Ratio in PortfolioEnv

In [3]:
# Load the full datasets once
try:
    print("Loading data...")
    prices_df_full = pd.read_parquet(PRICE_DATA_PATH)
    returns_df_full = pd.read_parquet(RETURNS_DATA_PATH)
    vola_df_full = pd.read_parquet(VOLA_DATA_PATH)
    print("Data loaded successfully.")
    
    # Ensure DataFrames have DateTimeIndex
    for df in [prices_df_full, returns_df_full, vola_df_full]:
        if not isinstance(df.index, pd.DatetimeIndex):
            df.index = pd.to_datetime(df.index)
            
    # print("\nPrice Data Head:")
    # print(prices_df_full.head())
    # print("\nReturns Data Head:")
    # print(returns_df_full.head())
    # print("\nVolatility Data Head:")
    # print(vola_df_full.head())
    
except FileNotFoundError as e:
    print(f"ERROR: Data file not found. {e}")
    print("Please ensure data is generated and paths are correct in Cell 2.")
    # Stop execution or raise error if data is critical for notebook to run
    raise

Loading data...
Data loaded successfully.

Price Data Head:
Ticker            XLF        XLK        XLV        XLY        XLP        XLE  \
Date                                                                           
2006-01-03  17.763966  16.536201  23.101501  25.836199  14.134378  30.129393   
2006-01-04  17.747419  16.730202  23.331730  25.859682  14.182621  30.215170   
2006-01-05  17.808100  16.807791  23.274168  25.883167  14.110260  29.832109   
2006-01-06  17.912922  17.071632  23.425253  26.094555  14.170568  30.563917   
2006-01-09  17.962572  17.125944  23.533169  26.360754  14.236897  30.529608   

Ticker            XLI        XLU        XLB  XLRE  XLC  
Date                                                    
2006-01-03  21.745033  16.358213  20.255630   NaN  NaN  
2006-01-04  21.800028  16.383776  20.380095   NaN  NaN  
2006-01-05  21.765644  16.276424  20.347343   NaN  NaN  
2006-01-06  21.848143  16.450228  20.563517   NaN  NaN  
2006-01-09  22.026896  16.347998  20.

In [4]:
def slice_data(year_start, num_train_years, num_val_years, num_test_years, prices_df, returns_df, vol_df):
    """Slices data for a given window configuration."""
    
    train_start_date = pd.to_datetime(f"{year_start}-01-01")
    train_end_date = pd.to_datetime(f"{year_start + num_train_years -1}-12-31")
    
    val_start_date = pd.to_datetime(f"{year_start + num_train_years}-01-01")
    val_end_date = pd.to_datetime(f"{year_start + num_train_years + num_val_years - 1}-12-31")
    
    test_start_date = pd.to_datetime(f"{year_start + num_train_years + num_val_years}-01-01")
    test_end_date = pd.to_datetime(f"{year_start + num_train_years + num_val_years + num_test_years - 1}-12-31")
    
    print(f"  Train Period: {train_start_date.date()} to {train_end_date.date()}")
    print(f"  Val Period  : {val_start_date.date()} to {val_end_date.date()}")
    print(f"  Test Period : {test_start_date.date()} to {test_end_date.date()}")

    # Slicing (ensure index is datetime)
    train_prices = prices_df[train_start_date:train_end_date]
    train_returns = returns_df[train_start_date:train_end_date]
    train_vola = vol_df[train_start_date:train_end_date]
    
    val_prices = prices_df[val_start_date:val_end_date]
    val_returns = returns_df[val_start_date:val_end_date]
    val_vola = vol_df[val_start_date:val_end_date]
    
    test_prices = prices_df[test_start_date:test_end_date]
    test_returns = returns_df[test_start_date:test_end_date]
    test_vola = vol_df[test_start_date:test_end_date]
    
    # Basic check for empty slices which can halt env creation
    if train_prices.empty or val_prices.empty or test_prices.empty:
        print("WARNING: One or more data slices are empty. Check date ranges and data availability.")
        # Potentially raise an error or handle as per requirements
    
    return (train_prices, train_returns, train_vola), \
           (val_prices, val_returns, val_vola), \
           (test_prices, test_returns, test_vola)



In [5]:
all_backtest_results = []
best_agent_paths_per_window = [] # To store path of the best agent for each window

# --- Main Loop for Sliding Windows ---
for i_window in range(N_WINDOWS):
    current_start_year = BASE_START_YEAR + i_window
    print(f"--- Starting Window {i_window+1}/{N_WINDOWS} (Train Year Start: {current_start_year}) ---")

    # 1. Slice Data for the current window
    # 5 years train, 1 year validation, 1 year test
    train_data, val_data, test_data = slice_data(
        year_start=current_start_year,
        num_train_years=5,
        num_val_years=1,
        num_test_years=1,
        prices_df=prices_df_full,
        returns_df=returns_df_full,
        vol_df=vola_df_full
    )
    
    # Unpack data
    (train_prices, train_returns, train_vola) = train_data
    (val_prices, val_returns, val_vola) = val_data
    (test_prices, test_returns, test_vola) = test_data

    # Check if any crucial dataframe is too short (e.g., shorter than ENV_WINDOW_SIZE)
    # PortfolioEnv requires at least `window_size` days of data to start.
    min_data_len = ENV_WINDOW_SIZE + 1 # Need at least window_size + 1 for one step
    if len(train_prices) < min_data_len or len(val_prices) < min_data_len or len(test_prices) < min_data_len:
        print(f"SKIPPING Window {i_window+1} due to insufficient data length for one or more periods.")
        print(f"  Train length: {len(train_prices)}, Val length: {len(val_prices)}, Test length: {len(test_prices)}")
        print(f"  Required minimum: {min_data_len}")
        best_agent_paths_per_window.append(None) # Mark as skipped
        all_backtest_results.append({"window": i_window+1, "status": "skipped_insufficient_data", "metrics": {}})
        continue


    # 2. Create Training and Validation Environments
    # These envs are re-created for each agent to ensure fresh state and correct data.
    # However, the data slice itself is per-window.

    best_agent_for_window = None
    best_val_reward = -np.inf
    
    # --- Inner Loop for Training AGENTS_PER_WINDOW Agents ---
    for i_agent in range(AGENTS_PER_WINDOW):
        agent_seed = (i_window * AGENTS_PER_WINDOW) + i_agent # Unique seed for each agent run
        print(f"  Training Agent {i_agent+1}/{AGENTS_PER_WINDOW} with seed {agent_seed}...")

        # Create environments for this specific agent
        # Training Env
        env_train_config = {
            'returns_df': train_returns, 'prices_df': train_prices, 'vol_df': train_vola,
            'window_size': ENV_WINDOW_SIZE, 'transaction_cost': TRANSACTION_COST,
            'initial_balance': INITIAL_BALANCE, 'reward_scaling': REWARD_SCALING, 'eta': ETA_DSR
        }
        # The DRLAgent class will use this first env to understand structure for SubprocVecEnv
        # This single_env_for_init is just for the DRLAgent constructor to get parameters.
        # The actual training will use N_ENVS created by DRLAgent.
        single_env_for_init_train = PortfolioEnv(**env_train_config)

        # Validation Env (single, not vectorized for evaluation)
        env_val_config = {
            'returns_df': val_returns, 'prices_df': val_prices, 'vol_df': val_vola,
            'window_size': ENV_WINDOW_SIZE, 'transaction_cost': TRANSACTION_COST,
            'initial_balance': INITIAL_BALANCE, 'reward_scaling': REWARD_SCALING, 'eta': ETA_DSR
        }
        env_val = PortfolioEnv(**env_val_config)
        
        # Instantiate DRL Agent
        agent = DRLAgent(
            env=single_env_for_init_train, # Pass the sample env for DRLAgent to clone
            n_envs=N_ENVS,
            policy_kwargs=POLICY_KWARGS,
            n_steps=N_STEPS_PER_ENV, # n_steps per environment for PPO
            batch_size=BATCH_SIZE,
            n_epochs=N_EPOCHS,
            learning_rate=LEARNING_RATE_SCHEDULE,
            gamma=GAMMA,
            gae_lambda=GAE_LAMBDA,
            clip_range=CLIP_RANGE,
            seed=agent_seed
        )

        # Agent Seeding: Load previous window's best agent if not the first window
        if i_window > 0 and best_agent_paths_per_window[i_window-1] is not None:
            previous_best_agent_path = best_agent_paths_per_window[i_window-1]
            print(f"    Seeding agent from: {previous_best_agent_path}")
            # The env for load_from_file should match the new training env structure
            # DRLAgent's load_from_file uses its internal self.env by default if env=None.
            # This self.env is already configured with N_ENVS and the new train_data.
            agent.load_from_file(path=previous_best_agent_path, env=None) 
                                   
        # Train the agent
        print(f"    Starting training for {TOTAL_TIMESTEPS_PER_ROUND} timesteps...")
        # Note: Training can be very long. For testing, reduce TOTAL_TIMESTEPS_PER_ROUND.
        # Example: agent.train(total_timesteps=10000, tb_log_name=f"ppo_win{i_window}_agent{i_agent}")
        agent.train(
            total_timesteps=TOTAL_TIMESTEPS_PER_ROUND, 
            tb_log_name=f"PPO_Window{i_window+1}_Agent{i_agent+1}_Seed{agent_seed}"
        )
        
        # Evaluate the agent on the validation set
        print("    Evaluating agent on validation set...")
        # The evaluate method in DRLAgentJules is designed for a single eval_env
        val_metrics = agent.evaluate(eval_env=env_val, n_eval_episodes=1) # Use 1 episode for validation speed
        current_val_reward = val_metrics.get("mean_reward", -np.inf)
        print(f"    Validation Mean Reward: {current_val_reward:.4f}")
        
        # Save this agent
        current_agent_model_name = f"agent_win{i_window+1}_seed{agent_seed}_valrew{current_val_reward:.2f}.zip"
        current_agent_save_path = os.path.join(MODEL_SAVE_DIR, current_agent_model_name)
        agent.save(current_agent_save_path)
        print(f"    Agent saved to: {current_agent_save_path}")

        if current_val_reward > best_val_reward:
            best_val_reward = current_val_reward
            best_agent_for_window_path = current_agent_save_path 
            print(f"    New best agent for this window with validation reward: {best_val_reward:.4f}")

        # Clean up to free memory if needed, though Python's GC should handle agent and envs
        del agent
        del single_env_for_init_train
        del env_val
        torch.cuda.empty_cache() # If using GPU

    best_agent_paths_per_window.append(best_agent_for_window_path if 'best_agent_for_window_path' in locals() and best_agent_for_window_path is not None else None)
    
    if best_agent_paths_per_window[-1] is None:
        print(f"  No best agent found or saved for window {i_window+1}. Skipping backtest.")
        all_backtest_results.append({"window": i_window+1, "status": "no_best_agent", "metrics": {}})
        continue

    # 3. Backtest the best agent of the window
    print(f"  Backtesting best agent for Window {i_window+1} ({best_agent_paths_per_window[-1]})" )
    
    # Create Backtesting Environment
    env_test_config = {
        'returns_df': test_returns, 'prices_df': test_prices, 'vol_df': test_vola,
        'window_size': ENV_WINDOW_SIZE, 'transaction_cost': TRANSACTION_COST,
        'initial_balance': INITIAL_BALANCE, 'reward_scaling': REWARD_SCALING, 'eta': ETA_DSR
    }
    env_test = PortfolioEnv(**env_test_config)
    
    # Load the best agent for this window
    # For loading, we need a sample env. We can create a dummy one or use env_test.
    # The DRLAgent needs an env instance for its constructor to derive parameters for make_env.
    # So, we pass a temporary env instance here.
    # The actual self.env for the loaded model will be set by PPO.load(env=env_test)
    
    # Create a temporary env instance for DRLAgent initialization before loading the model.
    # This env should reflect the structure the agent was trained on (e.g. observation/action space from PortfolioEnv)
    # but the actual data doesn't matter as much for just loading.
    # However, to be safe, use a structure similar to what it was trained on.
    # The DRLAgent.load method sets the environment for the loaded PPO model.
    
    # Simplified: Create a DRLAgent shell, then load into it.
    # The DRLAgent constructor needs an 'env' to setup its internal SubprocVecEnv, even if we immediately load.
    # We can pass the test_env for this, but DRLAgent will make it a VecEnv.
    # For loading for evaluation, the internal self.env of DRLAgent is less critical
    # if the PPO.load() correctly associates the model with the new eval_env.
    
    # Let's use the DRLAgent.load() method which takes an env.
    # We need to initialize DRLAgent first with *some* env that has the right structure.
    # The `single_env_for_init_train` used earlier has the correct structure.
    # It is important that the observation and action spaces match.
    
    # Re-create a template env for agent initialization before loading
    # This is just to satisfy DRLAgent's __init__ requirement for an env instance.
    # The actual environment for the loaded model will be `env_test`.
    temp_env_for_load_init = PortfolioEnv(
        returns_df=train_returns.iloc[:ENV_WINDOW_SIZE+5], # minimal data for init
        prices_df=train_prices.iloc[:ENV_WINDOW_SIZE+5],
        vol_df=train_vola.iloc[:ENV_WINDOW_SIZE+5],
        window_size=ENV_WINDOW_SIZE, 
        initial_balance=INITIAL_BALANCE
    )

    best_agent_loaded = DRLAgent(
        env=temp_env_for_load_init, # Template env
        n_envs=1, # For eval, n_envs=1 is fine for the DRLAgent wrapper
        policy_kwargs=POLICY_KWARGS 
        # Other params don't matter as much as we are loading a pre-trained model
    )
    
    print(f"    Loading model from: {best_agent_paths_per_window[-1]}")
    # Pass the actual test_env to PPO.load via DRLAgent.load method
    best_agent_loaded.load(path=best_agent_paths_per_window[-1], env=env_test) 
                                   
    print("    Running backtest evaluation...")
    backtest_metrics = best_agent_loaded.evaluate(eval_env=env_test, n_eval_episodes=1) # n_eval_episodes for backtest
    
    print(f"    Backtest Metrics for Window {i_window+1}:")
    for key, value in backtest_metrics.items():
        print(f"      {key}: {value}")
    
    all_backtest_results.append({
        "window": i_window+1, 
        "best_agent_path": best_agent_paths_per_window[-1],
        "status": "completed",
        "metrics": backtest_metrics
    })
    
    del best_agent_loaded
    del temp_env_for_load_init
    del env_test
    torch.cuda.empty_cache() # If using GPU

print("\n--- All Windows Processed ---")
print("Summary of Best Agent Paths:")
for i, path in enumerate(best_agent_paths_per_window):
    print(f"Window {i+1}: {path}")

print("\nSummary of Backtest Results:")
for result in all_backtest_results:
    print(f"Window {result['window']} ({result['status']}):")
    if result['status'] == 'completed':
        # print(f"  Agent: {result['best_agent_path']}")
        for k, v in result['metrics'].items():
            if isinstance(v, float): print(f"    {k}: {v:.4f}")
            else: print(f"    {k}: {v}")

--- Starting Window 1/10 (Train Year Start: 2006) ---
  Train Period: 2006-01-01 to 2010-12-31
  Val Period  : 2011-01-01 to 2011-12-31
  Test Period : 2012-01-01 to 2012-12-31
  Training Agent 1/5 with seed 0...


Output()

    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window1_Agent1_Seed0
    Evaluating agent on validation set...
    Validation Mean Reward: -0.7204
    Agent saved to: ../models/sliding_window_jules/agent_win1_seed0_valrew-0.72.zip
    New best agent for this window with validation reward: -0.7204
  Training Agent 2/5 with seed 1...


Output()

    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window1_Agent2_Seed1
    Evaluating agent on validation set...
    Validation Mean Reward: -0.9013
    Agent saved to: ../models/sliding_window_jules/agent_win1_seed1_valrew-0.90.zip
  Training Agent 3/5 with seed 2...


Output()

    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window1_Agent3_Seed2
    Evaluating agent on validation set...
    Validation Mean Reward: -0.6334
    Agent saved to: ../models/sliding_window_jules/agent_win1_seed2_valrew-0.63.zip
    New best agent for this window with validation reward: -0.6334
  Training Agent 4/5 with seed 3...


Output()

    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window1_Agent4_Seed3
    Evaluating agent on validation set...
    Validation Mean Reward: -1.2657
    Agent saved to: ../models/sliding_window_jules/agent_win1_seed3_valrew-1.27.zip
  Training Agent 5/5 with seed 4...


Output()

    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window1_Agent5_Seed4
    Evaluating agent on validation set...
    Validation Mean Reward: -1.0242
    Agent saved to: ../models/sliding_window_jules/agent_win1_seed4_valrew-1.02.zip
  Backtesting best agent for Window 1 (../models/sliding_window_jules/agent_win1_seed2_valrew-0.63.zip)
    Loading model from: ../models/sliding_window_jules/agent_win1_seed2_valrew-0.63.zip
    Running backtest evaluation...
    Backtest Metrics for Window 1:
      Annual return: -0.010818746062720264
      Cumulative returns: -0.00812508230809228
      Annual volatility: 0.11686190880372624
      Sharpe ratio: -0.20582213093808885
      Calmar ratio: -0.19944280349705118
      Stability: 0.8953658389792367
      Max drawdown: -0.054244855532630046
      Omega ratio: 0.9666915346397733
      Sortino ratio: -0.332271223451118
      Skew: 0.12954004367889269
      Kurtosis: 0.4449310411152507
      Tail ratio: 1.211

Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win1_seed2_valrew-0.63.zip
Model loaded from ../models/sliding_window_jules/agent_win1_seed2_valrew-0.63.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window2_Agent1_Seed5
    Evaluating agent on validation set...
    Validation Mean Reward: 2.1022
    Agent saved to: ../models/sliding_window_jules/agent_win2_seed5_valrew2.10.zip
    New best agent for this window with validation reward: 2.1022
  Training Agent 2/5 with seed 6...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win1_seed2_valrew-0.63.zip
Model loaded from ../models/sliding_window_jules/agent_win1_seed2_valrew-0.63.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window2_Agent2_Seed6
    Evaluating agent on validation set...
    Validation Mean Reward: 2.1022
    Agent saved to: ../models/sliding_window_jules/agent_win2_seed6_valrew2.10.zip
  Training Agent 3/5 with seed 7...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win1_seed2_valrew-0.63.zip
Model loaded from ../models/sliding_window_jules/agent_win1_seed2_valrew-0.63.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window2_Agent3_Seed7
    Evaluating agent on validation set...
    Validation Mean Reward: 2.1022
    Agent saved to: ../models/sliding_window_jules/agent_win2_seed7_valrew2.10.zip
  Training Agent 4/5 with seed 8...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win1_seed2_valrew-0.63.zip
Model loaded from ../models/sliding_window_jules/agent_win1_seed2_valrew-0.63.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window2_Agent4_Seed8
    Evaluating agent on validation set...
    Validation Mean Reward: 2.1022
    Agent saved to: ../models/sliding_window_jules/agent_win2_seed8_valrew2.10.zip
  Training Agent 5/5 with seed 9...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win1_seed2_valrew-0.63.zip
Model loaded from ../models/sliding_window_jules/agent_win1_seed2_valrew-0.63.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window2_Agent5_Seed9
    Evaluating agent on validation set...
    Validation Mean Reward: 2.1022
    Agent saved to: ../models/sliding_window_jules/agent_win2_seed9_valrew2.10.zip
  Backtesting best agent for Window 2 (../models/sliding_window_jules/agent_win2_seed5_valrew2.10.zip)
    Loading model from: ../models/sliding_window_jules/agent_win2_seed5_valrew2.10.zip
    Running backtest evaluation...
    Backtest Metrics for Window 2:
      Annual return: 0.024056864928895694
      Cumulative returns: 0.01818100826193536
      Annual volatility: 0.10467793205348996
      Sharpe ratio: 0.08843626696866674
      Calmar ratio: 0.43839208865562285
      Stability: 0.9052412209784042
      Max drawdown: -0.054875225971044994
      Omega ratio: 1.0142063848283414
      Sortino ratio: 0.13699957367171992
      Skew: -0.23772409696258504
      Kurtosis: 0.6286364685975867
      Tail ratio: 0.977870154

Output()

Model loaded from ../models/sliding_window_jules/agent_win2_seed5_valrew2.10.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window3_Agent1_Seed10
    Evaluating agent on validation set...
    Validation Mean Reward: 0.5081
    Agent saved to: ../models/sliding_window_jules/agent_win3_seed10_valrew0.51.zip
    New best agent for this window with validation reward: 0.5081
  Training Agent 2/5 with seed 11...
    Seeding agent from: ../models/sliding_window_jules/agent_win2_seed5_valrew2.10.zip


Output()

Model loaded from ../models/sliding_window_jules/agent_win2_seed5_valrew2.10.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window3_Agent2_Seed11
    Evaluating agent on validation set...
    Validation Mean Reward: 0.5081
    Agent saved to: ../models/sliding_window_jules/agent_win3_seed11_valrew0.51.zip
  Training Agent 3/5 with seed 12...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win2_seed5_valrew2.10.zip
Model loaded from ../models/sliding_window_jules/agent_win2_seed5_valrew2.10.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window3_Agent3_Seed12
    Evaluating agent on validation set...
    Validation Mean Reward: 0.5081
    Agent saved to: ../models/sliding_window_jules/agent_win3_seed12_valrew0.51.zip
  Training Agent 4/5 with seed 13...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win2_seed5_valrew2.10.zip
Model loaded from ../models/sliding_window_jules/agent_win2_seed5_valrew2.10.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window3_Agent4_Seed13
    Evaluating agent on validation set...
    Validation Mean Reward: 0.5081
    Agent saved to: ../models/sliding_window_jules/agent_win3_seed13_valrew0.51.zip
  Training Agent 5/5 with seed 14...
    Seeding agent from: ../models/sliding_window_jules/agent_win2_seed5_valrew2.10.zip


Output()

Model loaded from ../models/sliding_window_jules/agent_win2_seed5_valrew2.10.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window3_Agent5_Seed14
    Evaluating agent on validation set...
    Validation Mean Reward: 0.5081
    Agent saved to: ../models/sliding_window_jules/agent_win3_seed14_valrew0.51.zip
  Backtesting best agent for Window 3 (../models/sliding_window_jules/agent_win3_seed10_valrew0.51.zip)
    Loading model from: ../models/sliding_window_jules/agent_win3_seed10_valrew0.51.zip
    Running backtest evaluation...
    Backtest Metrics for Window 3:
      Annual return: 0.026011735201471042
      Cumulative returns: 0.019653834167944195
      Annual volatility: 0.09695196519923632
      Sharpe ratio: 0.10706713099985747
      Calmar ratio: 0.4681761679340203
      Stability: 0.9116169456138152
      Max drawdown: -0.05555971658330301
      Omega ratio: 1.0186854631178337
      Sortino ratio: 0.15458837007297252
      Skew: -0.012056520558288045
      Kurtosis: 1.825217537873554
      Tail ratio: 0.934199

Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win3_seed10_valrew0.51.zip
Model loaded from ../models/sliding_window_jules/agent_win3_seed10_valrew0.51.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window4_Agent1_Seed15
    Evaluating agent on validation set...
    Validation Mean Reward: 6.2151
    Agent saved to: ../models/sliding_window_jules/agent_win4_seed15_valrew6.22.zip
    New best agent for this window with validation reward: 6.2151
  Training Agent 2/5 with seed 16...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win3_seed10_valrew0.51.zip
Model loaded from ../models/sliding_window_jules/agent_win3_seed10_valrew0.51.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window4_Agent2_Seed16
    Evaluating agent on validation set...
    Validation Mean Reward: 6.2151
    Agent saved to: ../models/sliding_window_jules/agent_win4_seed16_valrew6.22.zip
  Training Agent 3/5 with seed 17...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win3_seed10_valrew0.51.zip
Model loaded from ../models/sliding_window_jules/agent_win3_seed10_valrew0.51.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window4_Agent3_Seed17
    Evaluating agent on validation set...
    Validation Mean Reward: 6.2151
    Agent saved to: ../models/sliding_window_jules/agent_win4_seed17_valrew6.22.zip
  Training Agent 4/5 with seed 18...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win3_seed10_valrew0.51.zip
Model loaded from ../models/sliding_window_jules/agent_win3_seed10_valrew0.51.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window4_Agent4_Seed18
    Evaluating agent on validation set...
    Validation Mean Reward: 6.2151
    Agent saved to: ../models/sliding_window_jules/agent_win4_seed18_valrew6.22.zip
  Training Agent 5/5 with seed 19...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win3_seed10_valrew0.51.zip
Model loaded from ../models/sliding_window_jules/agent_win3_seed10_valrew0.51.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window4_Agent5_Seed19
    Evaluating agent on validation set...
    Validation Mean Reward: 6.2151
    Agent saved to: ../models/sliding_window_jules/agent_win4_seed19_valrew6.22.zip
  Backtesting best agent for Window 4 (../models/sliding_window_jules/agent_win4_seed15_valrew6.22.zip)
    Loading model from: ../models/sliding_window_jules/agent_win4_seed15_valrew6.22.zip
    Running backtest evaluation...
    Backtest Metrics for Window 4:
      Annual return: 0.011837769436073176
      Cumulative returns: 0.008959485222697072
      Annual volatility: 0.13671205396811442
      Sharpe ratio: 0.00798610794442625
      Calmar ratio: 0.18673640697292032
      Stability: 0.8797302681089109
      Max drawdown: -0.0633929378205817
      Omega ratio: 1.0013142301987856
      Sortino ratio: 0.0139790525255839
      Skew: 0.4196786751401754
      Kurtosis: 1.349186985769717
      Tail ratio: 1.0884118013

Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win4_seed15_valrew6.22.zip
Model loaded from ../models/sliding_window_jules/agent_win4_seed15_valrew6.22.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window5_Agent1_Seed20
    Evaluating agent on validation set...
    Validation Mean Reward: 4.1900
    Agent saved to: ../models/sliding_window_jules/agent_win5_seed20_valrew4.19.zip
    New best agent for this window with validation reward: 4.1900
  Training Agent 2/5 with seed 21...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win4_seed15_valrew6.22.zip
Model loaded from ../models/sliding_window_jules/agent_win4_seed15_valrew6.22.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window5_Agent2_Seed21
    Evaluating agent on validation set...
    Validation Mean Reward: 4.1900
    Agent saved to: ../models/sliding_window_jules/agent_win5_seed21_valrew4.19.zip
  Training Agent 3/5 with seed 22...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win4_seed15_valrew6.22.zip
Model loaded from ../models/sliding_window_jules/agent_win4_seed15_valrew6.22.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window5_Agent3_Seed22
    Evaluating agent on validation set...
    Validation Mean Reward: 4.1900
    Agent saved to: ../models/sliding_window_jules/agent_win5_seed22_valrew4.19.zip
  Training Agent 4/5 with seed 23...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win4_seed15_valrew6.22.zip
Model loaded from ../models/sliding_window_jules/agent_win4_seed15_valrew6.22.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window5_Agent4_Seed23
    Evaluating agent on validation set...
    Validation Mean Reward: 4.1900
    Agent saved to: ../models/sliding_window_jules/agent_win5_seed23_valrew4.19.zip
  Training Agent 5/5 with seed 24...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win4_seed15_valrew6.22.zip
Model loaded from ../models/sliding_window_jules/agent_win4_seed15_valrew6.22.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window5_Agent5_Seed24
    Evaluating agent on validation set...
    Validation Mean Reward: 4.1900
    Agent saved to: ../models/sliding_window_jules/agent_win5_seed24_valrew4.19.zip
  Backtesting best agent for Window 5 (../models/sliding_window_jules/agent_win5_seed20_valrew4.19.zip)
    Loading model from: ../models/sliding_window_jules/agent_win5_seed20_valrew4.19.zip
    Running backtest evaluation...
    Backtest Metrics for Window 5:
      Annual return: 0.025407934526497478
      Cumulative returns: 0.019198995070798475
      Annual volatility: 0.09879013830315057
      Sharpe ratio: 0.10099172201425692
      Calmar ratio: 0.3848461258829601
      Stability: 0.9100918957502557
      Max drawdown: -0.06602102195573244
      Omega ratio: 1.0176042262670864
      Sortino ratio: 0.14446616384098712
      Skew: -0.27705317851611483
      Kurtosis: 2.74516860875174
      Tail ratio: 1.23258730

Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win5_seed20_valrew4.19.zip
Model loaded from ../models/sliding_window_jules/agent_win5_seed20_valrew4.19.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window6_Agent1_Seed25
    Evaluating agent on validation set...
    Validation Mean Reward: -1.2638
    Agent saved to: ../models/sliding_window_jules/agent_win6_seed25_valrew-1.26.zip
    New best agent for this window with validation reward: -1.2638
  Training Agent 2/5 with seed 26...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win5_seed20_valrew4.19.zip
Model loaded from ../models/sliding_window_jules/agent_win5_seed20_valrew4.19.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window6_Agent2_Seed26
    Evaluating agent on validation set...
    Validation Mean Reward: -1.2638
    Agent saved to: ../models/sliding_window_jules/agent_win6_seed26_valrew-1.26.zip
  Training Agent 3/5 with seed 27...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win5_seed20_valrew4.19.zip
Model loaded from ../models/sliding_window_jules/agent_win5_seed20_valrew4.19.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window6_Agent3_Seed27
    Evaluating agent on validation set...
    Validation Mean Reward: -1.2638
    Agent saved to: ../models/sliding_window_jules/agent_win6_seed27_valrew-1.26.zip
  Training Agent 4/5 with seed 28...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win5_seed20_valrew4.19.zip
Model loaded from ../models/sliding_window_jules/agent_win5_seed20_valrew4.19.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window6_Agent4_Seed28
    Evaluating agent on validation set...
    Validation Mean Reward: -1.2638
    Agent saved to: ../models/sliding_window_jules/agent_win6_seed28_valrew-1.26.zip
  Training Agent 5/5 with seed 29...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win5_seed20_valrew4.19.zip
Model loaded from ../models/sliding_window_jules/agent_win5_seed20_valrew4.19.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window6_Agent5_Seed29
    Evaluating agent on validation set...
    Validation Mean Reward: -1.2638
    Agent saved to: ../models/sliding_window_jules/agent_win6_seed29_valrew-1.26.zip
  Backtesting best agent for Window 6 (../models/sliding_window_jules/agent_win6_seed25_valrew-1.26.zip)
    Loading model from: ../models/sliding_window_jules/agent_win6_seed25_valrew-1.26.zip
    Running backtest evaluation...
    Backtest Metrics for Window 6:
      Annual return: 0.02938656077943258
      Cumulative returns: 0.022077399539502718
      Annual volatility: 0.059509005916461105
      Sharpe ratio: 0.18041354975837828
      Calmar ratio: 0.9775385434454213
      Stability: 0.943833411906691
      Max drawdown: -0.03006179242391511
      Omega ratio: 1.032118505000546
      Sortino ratio: 0.2596972176573028
      Skew: -0.23077173984078495
      Kurtosis: 2.0648622507755703
      Tail ratio: 1.23846

Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win6_seed25_valrew-1.26.zip
Model loaded from ../models/sliding_window_jules/agent_win6_seed25_valrew-1.26.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window7_Agent1_Seed30
    Evaluating agent on validation set...
    Validation Mean Reward: 2.4158
    Agent saved to: ../models/sliding_window_jules/agent_win7_seed30_valrew2.42.zip
    New best agent for this window with validation reward: 2.4158
  Training Agent 2/5 with seed 31...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win6_seed25_valrew-1.26.zip
Model loaded from ../models/sliding_window_jules/agent_win6_seed25_valrew-1.26.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window7_Agent2_Seed31
    Evaluating agent on validation set...
    Validation Mean Reward: 2.4158
    Agent saved to: ../models/sliding_window_jules/agent_win7_seed31_valrew2.42.zip
  Training Agent 3/5 with seed 32...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win6_seed25_valrew-1.26.zip
Model loaded from ../models/sliding_window_jules/agent_win6_seed25_valrew-1.26.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window7_Agent3_Seed32
    Evaluating agent on validation set...
    Validation Mean Reward: 2.4158
    Agent saved to: ../models/sliding_window_jules/agent_win7_seed32_valrew2.42.zip
  Training Agent 4/5 with seed 33...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win6_seed25_valrew-1.26.zip
Model loaded from ../models/sliding_window_jules/agent_win6_seed25_valrew-1.26.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window7_Agent4_Seed33
    Evaluating agent on validation set...
    Validation Mean Reward: 2.4158
    Agent saved to: ../models/sliding_window_jules/agent_win7_seed33_valrew2.42.zip
  Training Agent 5/5 with seed 34...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win6_seed25_valrew-1.26.zip
Model loaded from ../models/sliding_window_jules/agent_win6_seed25_valrew-1.26.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window7_Agent5_Seed34
    Evaluating agent on validation set...
    Validation Mean Reward: 2.4158
    Agent saved to: ../models/sliding_window_jules/agent_win7_seed34_valrew2.42.zip
  Backtesting best agent for Window 7 (../models/sliding_window_jules/agent_win7_seed30_valrew2.42.zip)
    Loading model from: ../models/sliding_window_jules/agent_win7_seed30_valrew2.42.zip
    Running backtest evaluation...
    Backtest Metrics for Window 7:
      Annual return: -0.014569973569240391
      Cumulative returns: -0.01100510703406865
      Annual volatility: 0.12928222688428345
      Sharpe ratio: -0.20377529860420782
      Calmar ratio: -0.14846417242024793
      Stability: 0.885518231132552
      Max drawdown: -0.09813797720838742
      Omega ratio: 0.9645120073648895
      Sortino ratio: -0.3068261139284563
      Skew: 0.6024237602752441
      Kurtosis: 5.6822793139697225
      Tail ratio: 0.82348

Output()

Model loaded from ../models/sliding_window_jules/agent_win7_seed30_valrew2.42.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window8_Agent1_Seed35
    Evaluating agent on validation set...
    Validation Mean Reward: -3.9313
    Agent saved to: ../models/sliding_window_jules/agent_win8_seed35_valrew-3.93.zip
    New best agent for this window with validation reward: -3.9313
  Training Agent 2/5 with seed 36...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win7_seed30_valrew2.42.zip
Model loaded from ../models/sliding_window_jules/agent_win7_seed30_valrew2.42.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window8_Agent2_Seed36
    Evaluating agent on validation set...
    Validation Mean Reward: -3.9313
    Agent saved to: ../models/sliding_window_jules/agent_win8_seed36_valrew-3.93.zip
  Training Agent 3/5 with seed 37...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win7_seed30_valrew2.42.zip
Model loaded from ../models/sliding_window_jules/agent_win7_seed30_valrew2.42.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window8_Agent3_Seed37
    Evaluating agent on validation set...
    Validation Mean Reward: -3.9313
    Agent saved to: ../models/sliding_window_jules/agent_win8_seed37_valrew-3.93.zip
  Training Agent 4/5 with seed 38...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win7_seed30_valrew2.42.zip
Model loaded from ../models/sliding_window_jules/agent_win7_seed30_valrew2.42.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window8_Agent4_Seed38
    Evaluating agent on validation set...
    Validation Mean Reward: -3.9313
    Agent saved to: ../models/sliding_window_jules/agent_win8_seed38_valrew-3.93.zip
  Training Agent 5/5 with seed 39...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win7_seed30_valrew2.42.zip
Model loaded from ../models/sliding_window_jules/agent_win7_seed30_valrew2.42.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window8_Agent5_Seed39
    Evaluating agent on validation set...
    Validation Mean Reward: -3.9313
    Agent saved to: ../models/sliding_window_jules/agent_win8_seed39_valrew-3.93.zip
  Backtesting best agent for Window 8 (../models/sliding_window_jules/agent_win8_seed35_valrew-3.93.zip)
    Loading model from: ../models/sliding_window_jules/agent_win8_seed35_valrew-3.93.zip
    Running backtest evaluation...
    Backtest Metrics for Window 8:
      Annual return: 0.1650127303267681
      Cumulative returns: 0.1227276985504806
      Annual volatility: 0.10774941003807444
      Sharpe ratio: 1.2863377743924036
      Calmar ratio: 3.1184709617226645
      Stability: 0.9027312413243614
      Max drawdown: -0.0529146278263287
      Omega ratio: 1.2467454078173812
      Sortino ratio: 1.6161115068003677
      Skew: -0.839421123490313
      Kurtosis: 2.4992786728547722
      Tail ratio: 1.06806099078

Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win8_seed35_valrew-3.93.zip
Model loaded from ../models/sliding_window_jules/agent_win8_seed35_valrew-3.93.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window9_Agent1_Seed40
    Evaluating agent on validation set...
    Validation Mean Reward: 4.1874
    Agent saved to: ../models/sliding_window_jules/agent_win9_seed40_valrew4.19.zip
    New best agent for this window with validation reward: 4.1874
  Training Agent 2/5 with seed 41...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win8_seed35_valrew-3.93.zip
Model loaded from ../models/sliding_window_jules/agent_win8_seed35_valrew-3.93.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window9_Agent2_Seed41
    Evaluating agent on validation set...
    Validation Mean Reward: 4.1874
    Agent saved to: ../models/sliding_window_jules/agent_win9_seed41_valrew4.19.zip
  Training Agent 3/5 with seed 42...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win8_seed35_valrew-3.93.zip
Model loaded from ../models/sliding_window_jules/agent_win8_seed35_valrew-3.93.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window9_Agent3_Seed42
    Evaluating agent on validation set...
    Validation Mean Reward: 4.1874
    Agent saved to: ../models/sliding_window_jules/agent_win9_seed42_valrew4.19.zip
  Training Agent 4/5 with seed 43...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win8_seed35_valrew-3.93.zip
Model loaded from ../models/sliding_window_jules/agent_win8_seed35_valrew-3.93.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window9_Agent4_Seed43
    Evaluating agent on validation set...
    Validation Mean Reward: 4.1874
    Agent saved to: ../models/sliding_window_jules/agent_win9_seed43_valrew4.19.zip
  Training Agent 5/5 with seed 44...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win8_seed35_valrew-3.93.zip
Model loaded from ../models/sliding_window_jules/agent_win8_seed35_valrew-3.93.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window9_Agent5_Seed44
    Evaluating agent on validation set...
    Validation Mean Reward: 4.1874
    Agent saved to: ../models/sliding_window_jules/agent_win9_seed44_valrew4.19.zip
  Backtesting best agent for Window 9 (../models/sliding_window_jules/agent_win9_seed40_valrew4.19.zip)
    Loading model from: ../models/sliding_window_jules/agent_win9_seed40_valrew4.19.zip
    Running backtest evaluation...
    Backtest Metrics for Window 9:
      Annual return: 0.45083355031774874
      Cumulative returns: 0.3278139023567641
      Annual volatility: 0.23091277035377625
      Sharpe ratio: 1.641504696574793
      Calmar ratio: 4.896718444554854
      Stability: 0.8124052524961539
      Max drawdown: -0.09206850576003102
      Omega ratio: 1.3269460667052981
      Sortino ratio: 2.376121019905467
      Skew: 0.008333723213018118
      Kurtosis: 3.030767689756988
      Tail ratio: 0.905769410287267

Output()

Model loaded from ../models/sliding_window_jules/agent_win9_seed40_valrew4.19.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window10_Agent1_Seed45
    Evaluating agent on validation set...
    Validation Mean Reward: 5.1660
    Agent saved to: ../models/sliding_window_jules/agent_win10_seed45_valrew5.17.zip
    New best agent for this window with validation reward: 5.1660
  Training Agent 2/5 with seed 46...
    Seeding agent from: ../models/sliding_window_jules/agent_win9_seed40_valrew4.19.zip


Output()

Model loaded from ../models/sliding_window_jules/agent_win9_seed40_valrew4.19.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window10_Agent2_Seed46
    Evaluating agent on validation set...
    Validation Mean Reward: 5.1660
    Agent saved to: ../models/sliding_window_jules/agent_win10_seed46_valrew5.17.zip
  Training Agent 3/5 with seed 47...
    Seeding agent from: ../models/sliding_window_jules/agent_win9_seed40_valrew4.19.zip


Output()

Model loaded from ../models/sliding_window_jules/agent_win9_seed40_valrew4.19.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window10_Agent3_Seed47
    Evaluating agent on validation set...
    Validation Mean Reward: 5.1660
    Agent saved to: ../models/sliding_window_jules/agent_win10_seed47_valrew5.17.zip
  Training Agent 4/5 with seed 48...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win9_seed40_valrew4.19.zip
Model loaded from ../models/sliding_window_jules/agent_win9_seed40_valrew4.19.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window10_Agent4_Seed48
    Evaluating agent on validation set...
    Validation Mean Reward: 5.1660
    Agent saved to: ../models/sliding_window_jules/agent_win10_seed48_valrew5.17.zip
  Training Agent 5/5 with seed 49...


Output()

    Seeding agent from: ../models/sliding_window_jules/agent_win9_seed40_valrew4.19.zip
Model loaded from ../models/sliding_window_jules/agent_win9_seed40_valrew4.19.zip
    Starting training for 7500000 timesteps...



Training complete. Trained for 7500000 timesteps.
TensorBoard logs saved to directory: PPO_Window10_Agent5_Seed49
    Evaluating agent on validation set...
    Validation Mean Reward: 5.1660
    Agent saved to: ../models/sliding_window_jules/agent_win10_seed49_valrew5.17.zip
  Backtesting best agent for Window 10 (../models/sliding_window_jules/agent_win10_seed45_valrew5.17.zip)
    Loading model from: ../models/sliding_window_jules/agent_win10_seed45_valrew5.17.zip
    Running backtest evaluation...
    Backtest Metrics for Window 10:
      Annual return: 0.1761548064259022
      Cumulative returns: 0.13012891501868618
      Annual volatility: 0.10786736087810725
      Sharpe ratio: 1.3732459631788394
      Calmar ratio: 3.184562746357209
      Stability: 0.9026351306237503
      Max drawdown: -0.0553152254975676
      Omega ratio: 1.2585403580732544
      Sortino ratio: 1.8779464512663917
      Skew: -0.43804413982076834
      Kurtosis: 0.9055267656195762
      Tail ratio: 1.0922283

In [7]:
results_df = pd.DataFrame(all_backtest_results)

# Expand the 'metrics' dictionary into separate columns
metrics_df = results_df['metrics'].apply(pd.Series)
results_df = pd.concat([results_df.drop('metrics', axis=1), metrics_df], axis=1)

results_filename = f"backtest_results_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
results_save_path = os.path.join(MODEL_SAVE_DIR, results_filename)
results_df.to_csv(results_save_path, index=False)
print(f"\nBacktest results summary saved to: {results_save_path}")
print("\nFinal Results DataFrame:")
results_df.head()


Backtest results summary saved to: ../models/sliding_window_jules/backtest_results_summary_20250531_125621.csv

Final Results DataFrame:


Unnamed: 0,window,best_agent_path,status,Annual return,Cumulative returns,Annual volatility,Sharpe ratio,Calmar ratio,Stability,Max drawdown,...,Sortino ratio,Skew,Kurtosis,Tail ratio,Daily value at risk (95%),Portfolio turnover,mean_reward,std_reward,n_eval_episodes,final_portfolio_value_first_episode
0,1,../models/sliding_window_jules/agent_win1_seed...,completed,-0.010819,-0.008125,0.116862,-0.205822,-0.199443,0.895366,-0.054245,...,-0.332271,0.12954,0.444931,1.211972,-0.01098,,2.987013,0.0,1.0,99187.491769
1,2,../models/sliding_window_jules/agent_win2_seed...,completed,0.024057,0.018181,0.104678,0.088436,0.438392,0.905241,-0.054875,...,0.137,-0.237724,0.628636,0.97787,-0.010884,,0.563215,0.0,1.0,101818.100826
2,3,../models/sliding_window_jules/agent_win3_seed...,completed,0.026012,0.019654,0.096952,0.107067,0.468176,0.911617,-0.05556,...,0.154588,-0.012057,1.825218,0.9342,-0.009616,,6.209589,0.0,1.0,101965.383417
3,4,../models/sliding_window_jules/agent_win4_seed...,completed,0.011838,0.008959,0.136712,0.007986,0.186736,0.87973,-0.063393,...,0.013979,0.419679,1.349187,1.088412,-0.013299,,3.948851,0.0,1.0,100895.948522
4,5,../models/sliding_window_jules/agent_win5_seed...,completed,0.025408,0.019199,0.09879,0.100992,0.384846,0.910092,-0.066021,...,0.144466,-0.277053,2.745169,1.232587,-0.008878,,-1.788192,0.0,1.0,101919.899507


In [None]:
# PROBLEMS

# P1
# after the first window, the best agent is chosen as starting point for all agents in the next window
# but smth is wrong with the training process, randomization or smth
# all of the 5 agents in the following window are the same, with same rewards and performance etc
# therefore all of the following 9 windows after the first are superfluous
# or at least training 5 agents in the following windows is a waste of time ... smth

# P2
# the performance is getting better, but the final portfolio value is still just 101919 ie +2k$ which is really not good

# P3
# training logs are not saved, or at least the print statement says its saved
# but there is not directory and I cant find the log files anywhere

# P4
# training progress is not visible, only a tqdm bar
# maybe smth like pytorch lightning with live monitoring would be great