# DRL Model Evaluation

In [63]:
import os
import re
import json
import glob
import numpy as np
import pandas as pd
from datetime import datetime

from utils.config import DRLConfig
from utils.portfolio import Portfolio
from utils.portfolio_env import PortfolioEnv
from utils.drl_agent import DRLAgent

from pprint import pprint

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


#### Load Configuration and Data

In [67]:
# DRL model timestamp
MODEL_TIMESTAMP = "20250603_011024"

model_dir = f"../models/{MODEL_TIMESTAMP}"
config_path = os.path.join(model_dir, f"config_{MODEL_TIMESTAMP}.json")

def extract_seed(filename):
    """Extracts the numeric seed from a filename like agent_seed12_valrew3.57.zip"""
    match = re.search(r'agent_seed(\d+)_valrew', filename)
    return int(match.group(1)) if match else float('inf')  # fallback to push malformed files to end

agent_model_files = glob.glob(os.path.join(model_dir, "agent_seed*_valrew*.zip"))
agent_model_files.sort(key=lambda f: extract_seed(os.path.basename(f)))
print(f"Found {len(agent_model_files)} agent models in {model_dir}:")
for f in agent_model_files:
    print(f"  - {os.path.basename(f)}")

# Path to MVO results CSV for comparison
MVO_RESULTS_CSV_PATH = "../results/20250626_0038_mvo_backtest_[2012-01-01,2022-12-31]_daily/mvo_metrics.csv"

# Evaluation period
EVAL_START_DATE = "2020-01-01"
EVAL_END_DATE = "2022-12-31"

# specify which DRL agents to evaluate from the model timestamp folder
# if None or empty list, all agents (agent_seed*_valrew*.zip) in the folder will be evaluated.
SPECIFIC_AGENTS_TO_EVALUATE = []  # or None

Found 25 agent models in ../models/20250603_011024:
  - agent_seed0_valrew-8.70.zip
  - agent_seed1_valrew-11.50.zip
  - agent_seed2_valrew-11.72.zip
  - agent_seed3_valrew-9.32.zip
  - agent_seed4_valrew-13.12.zip
  - agent_seed5_valrew-25.03.zip
  - agent_seed6_valrew-26.75.zip
  - agent_seed7_valrew-24.21.zip
  - agent_seed8_valrew-24.88.zip
  - agent_seed9_valrew-25.71.zip
  - agent_seed10_valrew1.91.zip
  - agent_seed11_valrew3.37.zip
  - agent_seed12_valrew3.57.zip
  - agent_seed13_valrew6.65.zip
  - agent_seed14_valrew4.47.zip
  - agent_seed15_valrew22.42.zip
  - agent_seed16_valrew20.83.zip
  - agent_seed17_valrew20.07.zip
  - agent_seed18_valrew21.48.zip
  - agent_seed19_valrew21.03.zip
  - agent_seed20_valrew10.82.zip
  - agent_seed21_valrew8.87.zip
  - agent_seed22_valrew8.12.zip
  - agent_seed23_valrew8.86.zip
  - agent_seed24_valrew10.92.zip


In [58]:
with open(config_path, "r") as f:
    config_dict = json.load(f)
drl_config = DRLConfig(**config_dict)
pprint(drl_config)

DRLConfig(n_windows=5,
          agents_per_window=5,
          base_start_year=2006,
          env_window_size=60,
          transaction_cost=0.0,
          initial_balance=100000,
          reward_scaling=1.0,
          eta_dsr=0.003968253968253968,
          n_envs=10,
          total_timesteps_per_round=7500000,
          n_steps_per_env=756,
          batch_size=1260,
          n_epochs=16,
          gamma=0.9,
          gae_lambda=0.9,
          clip_range=0.25,
          log_std_init=-1.0,
          initial_lr=0.0003,
          final_lr=1e-05,
          model_save_dir='../models/20250603_011024',
          tensorboard_log_dir='../logs/20250603_011024')


In [50]:
# Define data paths (consistent with drl_train.ipynb)
DATA_DIR = "../data/snp_new"
RETURNS_PATH = os.path.join(DATA_DIR, "returns_1d.parquet")
PRICES_PATH = os.path.join(DATA_DIR, "prices_1d.parquet")
VOLA_PATH = os.path.join(DATA_DIR, "vola_1d.parquet")

df_ret = pd.read_parquet(RETURNS_PATH)
df_prices = pd.read_parquet(PRICES_PATH)
df_vol = pd.read_parquet(VOLA_PATH)

# Ensure datetime index
df_ret.index = pd.to_datetime(df_ret.index)
df_prices.index = pd.to_datetime(df_prices.index)
df_vol.index = pd.to_datetime(df_vol.index)

print("Data loaded successfully:")
print(f"Returns: {df_ret.shape}, {df_ret.index.min()} to {df_ret.index.max()}")
print(f"Prices: {df_prices.shape}, {df_prices.index.min()} to {df_prices.index.max()}")
print(f"Volatility: {df_vol.shape}, {df_vol.index.min()} to {df_vol.index.max()}")

Data loaded successfully:
Returns: (4278, 11), 2005-01-04 00:00:00 to 2021-12-30 00:00:00
Prices: (4279, 11), 2005-01-03 00:00:00 to 2021-12-30 00:00:00
Volatility: (4278, 3), 2005-01-04 00:00:00 to 2021-12-30 00:00:00


#### Determine Evaluation Period and Slice Data

In [51]:
if EVAL_START_DATE and EVAL_END_DATE:
    eval_start_date = pd.to_datetime(EVAL_START_DATE)
    eval_end_date = pd.to_datetime(EVAL_END_DATE)
    print(
        f"Using user-defined evaluation period: {eval_start_date.date()} to {eval_end_date.date()}"
    )
else:
    eval_end_date = df_prices.index.max()
    eval_start_date = eval_end_date - pd.DateOffset(years=1) + pd.DateOffset(days=1)
    EVAL_START_DATE = eval_start_date.strftime("%Y-%m-%d")
    EVAL_END_DATE = eval_end_date.strftime("%Y-%m-%d")
    print(f"Using default evaluation period: {EVAL_START_DATE} to {EVAL_END_DATE}")

# Slice data according to the evaluation period
eval_df_prices = df_prices.loc[eval_start_date:eval_end_date].copy()
eval_df_ret = df_ret.loc[eval_start_date:eval_end_date].copy()
eval_df_vol = df_vol.loc[eval_start_date:eval_end_date].copy()

# Check for sufficient data for the environment window
if eval_df_prices.empty or len(eval_df_prices) < drl_config.env_window_size:
    raise ValueError(
        f"Insufficient data for the evaluation period {eval_start_date.date()} to {eval_end_date.date()} "
        f"after slicing. Need at least {drl_config.env_window_size} days for the environment observation window. "
        f"Found {len(eval_df_prices)} days. Please check your dates or data source."
    )

print()
print(f"Data shape after slicing ({eval_start_date.date()} to {eval_end_date.date()}):")
print(f"Prices: {eval_df_prices.shape}")
print(f"Returns: {eval_df_ret.shape}")
print(f"Volatility: {eval_df_vol.shape}")

# Ensure data alignment and select common tickers across all three evaluation dataframes
common_tickers = eval_df_ret.columns.intersection(eval_df_prices.columns)
eval_df_ret = eval_df_ret[list(common_tickers)]
eval_df_prices = eval_df_prices[list(common_tickers)]

if eval_df_ret.empty or eval_df_prices.empty or eval_df_vol.empty:
    raise ValueError(
        "Dataframes are empty after aligning common tickers. Check data consistency for the selected period."
    )

Using user-defined evaluation period: 2020-01-01 to 2022-12-31

Data shape after slicing (2020-01-01 to 2022-12-31):
Prices: (504, 11)
Returns: (504, 11)
Volatility: (504, 3)


#### Load DRL Models and Evaluate

In [70]:
if SPECIFIC_AGENTS_TO_EVALUATE and len(SPECIFIC_AGENTS_TO_EVALUATE) > 0:
    agent_model_files = [
        os.path.join(model_dir, fname) for fname in SPECIFIC_AGENTS_TO_EVALUATE
    ]
    agent_model_files = [path for path in agent_model_files if os.path.exists(path)]
    print(f"Using user-specified agent models: {agent_model_files}")
else:
    agent_model_files = glob.glob(os.path.join(model_dir, "agent_seed*_valrew*.zip"))
    print(f"Found {len(agent_model_files)} agent models in {model_dir}:")
    for f in agent_model_files:
        print(f"  - {os.path.basename(f)}")

if not agent_model_files:
    raise FileNotFoundError(
        f"No agent model files found in {model_dir} matching criteria."
    )

evaluated_portfolios = {}  # To store Portfolio objects keyed by agent name
agent_errors = {}

Found 25 agent models in ../models/20250603_011024:
  - agent_seed13_valrew6.65.zip
  - agent_seed2_valrew-11.72.zip
  - agent_seed15_valrew22.42.zip
  - agent_seed1_valrew-11.50.zip
  - agent_seed12_valrew3.57.zip
  - agent_seed9_valrew-25.71.zip
  - agent_seed21_valrew8.87.zip
  - agent_seed11_valrew3.37.zip
  - agent_seed17_valrew20.07.zip
  - agent_seed20_valrew10.82.zip
  - agent_seed10_valrew1.91.zip
  - agent_seed23_valrew8.86.zip
  - agent_seed0_valrew-8.70.zip
  - agent_seed7_valrew-24.21.zip
  - agent_seed19_valrew21.03.zip
  - agent_seed6_valrew-26.75.zip
  - agent_seed16_valrew20.83.zip
  - agent_seed5_valrew-25.03.zip
  - agent_seed18_valrew21.48.zip
  - agent_seed3_valrew-9.32.zip
  - agent_seed8_valrew-24.88.zip
  - agent_seed4_valrew-13.12.zip
  - agent_seed24_valrew10.92.zip
  - agent_seed22_valrew8.12.zip
  - agent_seed14_valrew4.47.zip


In [104]:
agent_path = agent_model_files[-6]
agent_name = os.path.basename(agent_path)
print(f"\n--- Evaluating Agent: {agent_name} ---")

eval_env = PortfolioEnv(
    df_prices=eval_df_prices,
    df_returns=eval_df_ret,
    df_vola=eval_df_vol,
)

drl_agent = DRLAgent(env=eval_env)
drl_agent.load(agent_path)
eval_metrics, portfolio = drl_agent.evaluate(eval_env, n_eval_episodes=1, deterministic=True)

print(portfolio)
portfolio.plot_value_history()


--- Evaluating Agent: agent_seed3_valrew-9.32.zip ---
Portfolio with 11 assets, initial value: $100,000.00, current value: $186,671.46


## 6. Compare with MVO Results (Optional)

In [None]:
if MVO_RESULTS_CSV_PATH and os.path.exists(MVO_RESULTS_CSV_PATH):
    print(f"\n--- MVO Comparison --- C")
    print(f"Loading MVO results from: {MVO_RESULTS_CSV_PATH}")
    try:
        df_mvo_metrics = pd.read_csv(MVO_RESULTS_CSV_PATH)

        # The MVO metrics CSV might have an unnamed index column if saved with index=True
        if "Unnamed: 0" in df_mvo_metrics.columns:
            df_mvo_metrics = df_mvo_metrics.rename(
                columns={"Unnamed: 0": "MVO_Strategy_ID"}
            )
            # Or df_mvo_metrics.set_index('MVO_Strategy_ID', inplace=True) if that's preferred

        print("\nMVO Performance Metrics:")
        print(df_mvo_metrics.to_markdown(index=False))

        # Save MVO metrics to the results folder as well for completeness
        mvo_metrics_copy_path = os.path.join(
            results_save_dir, "mvo_comparison_metrics.csv"
        )
        df_mvo_metrics.to_csv(mvo_metrics_copy_path, index=False)
        print(f"\nCopied MVO metrics to: {mvo_metrics_copy_path}")

        if not df_drl_metrics.empty:
            print("\nDRL Performance Metrics (repeated for comparison):")
            print(df_drl_metrics.to_markdown(index=False))

            # Attempt a simple concatenation for side-by-side view if structures are somewhat similar
            # This is a basic comparison; more sophisticated merging might be needed depending on exact formats
            try:
                # Add a 'Type' column to distinguish DRL from MVO
                df_drl_metrics_typed = df_drl_metrics.copy()
                df_drl_metrics_typed["Type"] = "DRL"
                # Standardize 'Agent' column name for MVO if possible, or use a generic ID
                # For now, assuming MVO has a 'lookback' or 'MVO_Strategy_ID' that can act as an identifier
                df_mvo_metrics_typed = df_mvo_metrics.copy()
                df_mvo_metrics_typed["Type"] = "MVO"
                if "MVO_Strategy_ID" in df_mvo_metrics_typed.columns:
                    df_mvo_metrics_typed.rename(
                        columns={"MVO_Strategy_ID": "Agent"}, inplace=True
                    )
                elif "lookback" in df_mvo_metrics_typed.columns:
                    df_mvo_metrics_typed.rename(
                        columns={"lookback": "Agent"}, inplace=True
                    )
                else:  # Add a placeholder agent column if no clear identifier
                    df_mvo_metrics_typed["Agent"] = (
                        "MVO_lookback_" + df_mvo_metrics_typed.index.astype(str)
                    )

                # Ensure 'Agent' column is string type for both before concat if it exists
                if "Agent" in df_drl_metrics_typed.columns:
                    df_drl_metrics_typed["Agent"] = df_drl_metrics_typed[
                        "Agent"
                    ].astype(str)
                if "Agent" in df_mvo_metrics_typed.columns:
                    df_mvo_metrics_typed["Agent"] = df_mvo_metrics_typed[
                        "Agent"
                    ].astype(str)

                df_combined_metrics = pd.concat(
                    [df_drl_metrics_typed, df_mvo_metrics_typed], ignore_index=True
                )

                # Reorder columns for better readability
                cols_combined = ["Type", "Agent"]
                if "Error" in df_combined_metrics.columns:
                    cols_combined.append("Error")
                cols_combined.extend(
                    [
                        col
                        for col in df_combined_metrics.columns
                        if col not in cols_combined
                    ]
                )
                df_combined_metrics = df_combined_metrics[cols_combined]

                print("\nCombined DRL and MVO Metrics:")
                print(df_combined_metrics.to_markdown(index=False))

                combined_metrics_path = os.path.join(
                    results_save_dir, "drl_vs_mvo_metrics_comparison.csv"
                )
                df_combined_metrics.to_csv(combined_metrics_path, index=False)
                print(f"Saved combined metrics comparison to: {combined_metrics_path}")

            except Exception as e:
                print(f"Could not create a combined comparison table: {e}")
        else:
            print("DRL metrics are empty, cannot show side-by-side comparison.")

    except Exception as e:
        print(f"Error loading or processing MVO results: {e}")
        import traceback

        traceback.print_exc()
elif MVO_RESULTS_CSV_PATH:
    print(f"\nSpecified MVO results file not found: {MVO_RESULTS_CSV_PATH}")
else:
    print("\nNo MVO results CSV path specified. Skipping MVO comparison.")

print("\n--- Evaluation Notebook Complete ---")