In [1]:
import pandas as pd
import numpy as np
import torch # Ensure torch is imported if policy_kwargs might be used or for consistency
import os
from datetime import datetime

# Assuming utils are in parent directory or PYTHONPATH is set
from utils.portfolio_env import PortfolioEnv 
from utils.drl_agent_jules import DRLAgent # Import the modified agent

# %load_ext autoreload
# %autoreload 2

In [2]:
# --- Configuration ---
# !!! USER: Specify which window's best agent to load and analyze !!!
WINDOW_TO_ANALYZE = 1 # Example: Load best agent from Window 1 (valid: 1 to 10)

BASE_START_YEAR = 2006 # Must match the training notebook's BASE_START_YEAR

# Data paths (must match the training notebook)
PRICE_DATA_PATH = "../data/prices.parquet"
RETURNS_DATA_PATH = "../data/returns.parquet"
VOLA_DATA_PATH = "../data/vola.parquet"

# Directory where models from the training notebook are saved
folder_name = "xxx"
MODEL_SAVE_DIR = f"../models/{folder_name}" 

# --- PortfolioEnv Parameters (must match training env config for consistency) ---
ENV_WINDOW_SIZE = 60 
TRANSACTION_COST = 0.0 
INITIAL_BALANCE = 100_000
REWARD_SCALING = 1.0 # Usually 1.0 for evaluation if not affecting state/rewards directly
ETA_DSR = 1 / 252 

# --- DRL Agent Parameters (needed for DRLAgent instantiation before loading) ---
# These are less critical if the model is fully self-contained, but good for consistency
POLICY_KWARGS = dict(
    activation_fn=torch.nn.Tanh,
    net_arch=[64, 64], 
    log_std_init=-1.0
)

In [3]:
print("Loading full datasets...")
try:
    prices_df_full = pd.read_parquet(PRICE_DATA_PATH)
    returns_df_full = pd.read_parquet(RETURNS_DATA_PATH)
    vola_df_full = pd.read_parquet(VOLA_DATA_PATH)
    
    for df in [prices_df_full, returns_df_full, vola_df_full]:
        if not isinstance(df.index, pd.DatetimeIndex):
            df.index = pd.to_datetime(df.index)
            
    print("Data loaded successfully.")
except FileNotFoundError as e:
    print(f"ERROR: Data file not found. {e}")
    print("Please ensure data is generated and paths are correct in Cell 2.")
    raise

Loading full datasets...
Data loaded successfully.


In [4]:
# Calculate the start year for the *training* of the specified window
# Window numbering is 1-based for user input, index is 0-based
window_index = WINDOW_TO_ANALYZE - 1 
if not (0 <= window_index < 10): # N_WINDOWS is 10
    raise ValueError(f"WINDOW_TO_ANALYZE must be between 1 and 10. Got: {WINDOW_TO_ANALYZE}")

train_start_year_for_window = BASE_START_YEAR + window_index

# The backtest period for this window is:
# Training: train_start_year_for_window to train_start_year_for_window + 4 (5 years)
# Validation: train_start_year_for_window + 5 (1 year)
# Backtest: train_start_year_for_window + 6 (1 year)
backtest_period_start_year = train_start_year_for_window + 5 + 1

backtest_start_date = pd.to_datetime(f"{backtest_period_start_year}-01-01")
backtest_end_date = pd.to_datetime(f"{backtest_period_start_year}-12-31")

print(f"Analyzing Window {WINDOW_TO_ANALYZE}")
print(f"  Original training start year for this window's agent: {train_start_year_for_window}")
print(f"  Backtest Period for this agent: {backtest_start_date.date()} to {backtest_end_date.date()}")

# Slice data for the backtest period
backtest_prices = prices_df_full[backtest_start_date:backtest_end_date]
backtest_returns = returns_df_full[backtest_start_date:backtest_end_date]
backtest_vola = vola_df_full[backtest_start_date:backtest_end_date]

if backtest_prices.empty:
    print(f"ERROR: No data found for the backtest period of Window {WINDOW_TO_ANALYZE}.")
    print("Check data availability and date calculations.")
    raise ValueError("Empty backtest data slice.")
else:
    print(f"  Backtest data slice length: {len(backtest_prices)} days.")

# PortfolioEnv requires at least `window_size` days of data.
min_data_len = ENV_WINDOW_SIZE + 1 
if len(backtest_prices) < min_data_len:
    print(f"WARNING: Backtest data length ({len(backtest_prices)}) is less than required minimum ({min_data_len}).")
    # This might prevent the environment from starting or running a full episode.

Analyzing Window 1
  Original training start year for this window's agent: 2006
  Backtest Period for this agent: 2012-01-01 to 2012-12-31
  Backtest data slice length: 250 days.


In [5]:
# Attempt to find the best agent model file for the specified window.
# The training script saves them as "best_agent_window_{i}.zip" or similar.
# This part might need adjustment based on the exact naming convention used in the training script's output.
# For now, let's assume a pattern or that the user might need to specify the exact file.

# A more robust way: the training script should output a manifest or consistent names.
# Let's assume the training script saved a summary CSV that includes the path to the best agent.
# For this example, we'll try to infer the path or require user input if it's complex.

# Try to find a summary CSV from the training notebook
summary_csv_path = None
potential_summaries = sorted([
    os.path.join(MODEL_SAVE_DIR, f) for f in os.listdir(MODEL_SAVE_DIR) if f.startswith("backtest_results_summary_") and f.endswith(".csv")
], reverse=True) # Get the latest summary

if potential_summaries:
    summary_csv_path = potential_summaries[0]
    print(f"Loading best agent path from summary CSV: {summary_csv_path}")
    summary_df = pd.read_csv(summary_csv_path)
    # Assuming 'window' column is 1-based and 'best_agent_path' column exists
    agent_path_series = summary_df.loc[summary_df['window'] == WINDOW_TO_ANALYZE, 'best_agent_path']
    if not agent_path_series.empty:
        MODEL_PATH_TO_LOAD = agent_path_series.iloc[0]
        if pd.isna(MODEL_PATH_TO_LOAD): # Handle case where path might be None/NaN if window was skipped
             MODEL_PATH_TO_LOAD = None 
    else:
        MODEL_PATH_TO_LOAD = None
        print(f"Could not find best_agent_path for Window {WINDOW_TO_ANALYZE} in summary CSV.")
else:
    MODEL_PATH_TO_LOAD = None
    print("No summary CSV found. Model path cannot be automatically determined.")
    print(f"Please manually set MODEL_PATH_TO_LOAD if you know the direct path to the .zip file for Window {WINDOW_TO_ANALYZE}'s best agent.")

# Fallback or manual override:
# MODEL_PATH_TO_LOAD = "../models/sliding_window_jules/MANUAL_PATH_TO_BEST_AGENT_FOR_WINDOW_X.zip" 

if not MODEL_PATH_TO_LOAD or not os.path.exists(MODEL_PATH_TO_LOAD):
    print(f"ERROR: Model path for Window {WINDOW_TO_ANALYZE} not found or is invalid: {MODEL_PATH_TO_LOAD}")
    print("Please ensure the training script ran successfully and saved the models, or specify the path manually.")
    raise FileNotFoundError(f"Best agent model for Window {WINDOW_TO_ANALYZE} not found at {MODEL_PATH_TO_LOAD}")
else:
    print(f"Will load agent model from: {MODEL_PATH_TO_LOAD}")

# Create the Backtesting Environment
env_backtest_config = {
    'returns_df': backtest_returns, 
    'prices_df': backtest_prices, 
    'vol_df': backtest_vola,
    'window_size': ENV_WINDOW_SIZE, 
    'transaction_cost': TRANSACTION_COST,
    'initial_balance': INITIAL_BALANCE, 
    'reward_scaling': REWARD_SCALING, 
    'eta': ETA_DSR
}
env_backtest = PortfolioEnv(**env_backtest_config)

# Create a temporary env instance for DRLAgent initialization before loading the model.
# This uses minimal data but maintains the structure (obs/action space).
temp_env_for_load_init = PortfolioEnv(
    returns_df=backtest_returns.iloc[:ENV_WINDOW_SIZE+5], # Minimal data for structure
    prices_df=backtest_prices.iloc[:ENV_WINDOW_SIZE+5],
    vol_df=backtest_vola.iloc[:ENV_WINDOW_SIZE+5],
    window_size=ENV_WINDOW_SIZE, 
    initial_balance=INITIAL_BALANCE
    # eta is not strictly needed for just init if DRLAgent doesn't require it for __init__ before load
)

# Instantiate the DRLAgent (shell)
# n_envs=1 because we are evaluating on a single backtest environment
loaded_agent = DRLAgent(
    env=temp_env_for_load_init, 
    n_envs=1, 
    policy_kwargs=POLICY_KWARGS # For consistency, though PPO.load uses saved model's kwargs
)

print(f"Loading model into DRLAgent from {MODEL_PATH_TO_LOAD}...")
# Load the weights and associate with the actual backtest environment
loaded_agent.load(path=MODEL_PATH_TO_LOAD, env=env_backtest)
print("Model loaded successfully.")

Loading best agent path from summary CSV: ../models/sliding_window_jules/backtest_results_summary_20250531_124739.csv
Will load agent model from: ../models/sliding_window_jules/agent_win1_seed2_valrew-0.63.zip
Loading model into DRLAgent from ../models/sliding_window_jules/agent_win1_seed2_valrew-0.63.zip...
Model loaded successfully.


In [6]:
print("\nStarting backtest evaluation...")

# Evaluate the loaded agent on the backtest environment
# n_eval_episodes=1 is typical for a single chronological backtest run
backtest_metrics, backtest_portfolio = loaded_agent.evaluate(eval_env=env_backtest, n_eval_episodes=1, deterministic=True)

print("\n--- Backtest Evaluation Metrics ---")
if backtest_metrics:
    for key, value in backtest_metrics.items():
        if isinstance(value, float):
            print(f"  {key}: {value:.4f}")
        else:
            print(f"  {key}: {value}")
else:
    print("  No metrics returned from evaluation.")

# Example: Access specific metrics
# final_value = backtest_metrics.get('final_portfolio_value_first_episode', INITIAL_BALANCE)
# sharpe = backtest_metrics.get('Sharpe ratio', np.nan)
# print(f"\nFinal portfolio value: ${final_value:,.2f}")
# print(f"Sharpe ratio: {sharpe:.4f}")


Starting backtest evaluation...

--- Backtest Evaluation Metrics ---
  n_eval_episodes: 1
  final_portfolio_value_first_episode: 99187.4918
  mean_reward: 2.9870
  std_reward: 0.0000
  Annual return: -0.0108
  Cumulative returns: -0.0081
  Annual volatility: 0.1169
  Sharpe ratio: -0.2058
  Calmar ratio: -0.1994
  Stability: 0.8954
  Max drawdown: -0.0542
  Omega ratio: 0.9667
  Sortino ratio: -0.3323
  Skew: 0.1295
  Kurtosis: 0.4449
  Tail ratio: 1.2120
  Daily value at risk (95%): -0.0110
  Portfolio turnover: nan


In [13]:
env_backtest.portfolio_value

np.float64(99187.49176919078)

In [8]:
# This cell can be used for more detailed analysis or plotting if desired.
# For example, plotting portfolio value over time, asset allocations, etc.
# The `env_backtest` object might store history if it's instrumented to do so,
# or one could modify the `evaluate` loop to collect more detailed step-by-step information.

# Example: If PortfolioEnv was modified to store history of portfolio values:
if hasattr(env_backtest, 'history') and 'portfolio_value' in env_backtest.history:
   import matplotlib.pyplot as plt
   pd.Series(env_backtest.history['portfolio_value']).plot(title=f"Portfolio Value - Window {WINDOW_TO_ANALYZE} Backtest")
   plt.show()
else:
   print("\n(Skipping plot: Portfolio history not available in env_backtest or not implemented)")

print("\nNotebook execution complete.")


(Skipping plot: Portfolio history not available in env_backtest or not implemented)

Notebook execution complete.
