# DRL Model Evaluation

In [1]:
import os
import re
import json
import glob
import numpy as np
import pandas as pd
from datetime import datetime

from utils.config import DRLConfig
from utils.portfolio import Portfolio
from utils.portfolio_env import PortfolioEnv
from utils.drl_agent import DRLAgent
from utils.drl_eval import evaluation_pipeline

from pprint import pprint

%load_ext autoreload
%autoreload 2

### Load Configuration and Data

In [2]:
## RENAME FILES
# import shutil
# # DRL model timestamp
# MODEL_FNAME = "full_random_run"
# model_dir = f"../models/{MODEL_FNAME}"
# agent_model_files = sorted(glob.glob(os.path.join(model_dir, "agent_seed*_valrew*.zip")))

# # rename agent_seed{number1}_valrew{number2}.zip to ...
# # agent_{number1//5 + 1}_{number1%5 + 1}_seed={number1}_test={number1 + 2006}_valrew={number2}.zip

# for fname in agent_model_files:
#     print("fname: ", fname)

#     n1 = fname.split("_seed")[1].split("_")[0]
#     n2 = fname.split("_valrew")[1].split(".zip")[0]

#     window_idx = int(n1)//5 + 1
#     agent_idx = int(n1)%5 + 1
#     renamed_file_name = f"agent_{window_idx}-{agent_idx}_seed={n1}_test={2006+window_idx+5}_valrew={n2}.zip" 
#     print("renamed: ", renamed_file_name)

#     # save as new fname
#     new_fname = os.path.join(model_dir, renamed_file_name)
#     shutil.copy(fname, new_fname)
#     print("new fname: ", new_fname)

In [3]:
# DRL model timestamp
MODEL_FNAME = "full_random_run"
model_dir = f"../models/{MODEL_FNAME}"

# get (first) config file in directory
files = os.listdir(model_dir)
config_files = [f for f in files if f.endswith(".json")]
config_path = os.path.join(model_dir, config_files[0])

agent_model_files = sorted(glob.glob(os.path.join(model_dir, "agent*_seed*_valrew*.zip")))
print(f"Found {len(agent_model_files)} agent models in {model_dir}:")
for f in agent_model_files:
    print(f"  - {os.path.basename(f)}")

Found 50 agent models in ../models/full_random_run:
  - agent_1-1_seed=0_test=2012_valrew=-10.18.zip
  - agent_1-2_seed=1_test=2012_valrew=-9.68.zip
  - agent_1-3_seed=2_test=2012_valrew=-10.94.zip
  - agent_1-4_seed=3_test=2012_valrew=-9.61.zip
  - agent_1-5_seed=4_test=2012_valrew=-13.27.zip
  - agent_10-1_seed=45_test=2021_valrew=-8.98.zip
  - agent_10-2_seed=46_test=2021_valrew=-9.75.zip
  - agent_10-3_seed=47_test=2021_valrew=-9.83.zip
  - agent_10-4_seed=48_test=2021_valrew=-5.74.zip
  - agent_10-5_seed=49_test=2021_valrew=-7.48.zip
  - agent_2-1_seed=5_test=2013_valrew=-23.56.zip
  - agent_2-2_seed=6_test=2013_valrew=-23.82.zip
  - agent_2-3_seed=7_test=2013_valrew=-22.83.zip
  - agent_2-4_seed=8_test=2013_valrew=-23.14.zip
  - agent_2-5_seed=9_test=2013_valrew=-21.72.zip
  - agent_3-1_seed=10_test=2014_valrew=2.72.zip
  - agent_3-2_seed=11_test=2014_valrew=1.24.zip
  - agent_3-3_seed=12_test=2014_valrew=2.58.zip
  - agent_3-4_seed=13_test=2014_valrew=0.80.zip
  - agent_3-5_seed

In [4]:
with open(config_path, "r") as f:
    config_dict = json.load(f)
drl_config = DRLConfig(**config_dict)
pprint(drl_config)

DRLConfig(n_windows=10,
          agents_per_window=5,
          base_start_year=2006,
          seed_policy=False,
          env_window_size=60,
          transaction_cost=0.0,
          initial_balance=100000,
          reward_scaling=1.0,
          eta_dsr=0.003968253968253968,
          n_envs=10,
          total_timesteps_per_round=7500000,
          n_steps_per_env=756,
          batch_size=1260,
          n_epochs=16,
          gamma=0.9,
          gae_lambda=0.9,
          clip_range=0.25,
          log_std_init=-1.0,
          initial_lr=0.0003,
          final_lr=1e-05,
          data_dir=None,
          model_save_dir='../models/full_random_run',
          tensorboard_log_dir='../logs/20250630_011349',
          prev_best_model_dir=None)


In [5]:
# Define data paths (consistent with drl_train.ipynb)
DATA_DIR = "../data/snp_new"
RETURNS_PATH = os.path.join(DATA_DIR, "returns_1d.parquet")
PRICES_PATH = os.path.join(DATA_DIR, "prices_1d.parquet")
VOLA_PATH = os.path.join(DATA_DIR, "vola_1d.parquet")

df_ret = pd.read_parquet(RETURNS_PATH)
df_prices = pd.read_parquet(PRICES_PATH)
df_vol = pd.read_parquet(VOLA_PATH)

# Ensure datetime index
df_ret.index = pd.to_datetime(df_ret.index)
df_prices.index = pd.to_datetime(df_prices.index)
df_vol.index = pd.to_datetime(df_vol.index)

print("Data loaded successfully:")
print(f"Returns: {df_ret.shape}, {df_ret.index.min()} to {df_ret.index.max()}")
print(f"Prices: {df_prices.shape}, {df_prices.index.min()} to {df_prices.index.max()}")
print(f"Volatility: {df_vol.shape}, {df_vol.index.min()} to {df_vol.index.max()}")

Data loaded successfully:
Returns: (4278, 11), 2005-01-04 00:00:00 to 2021-12-30 00:00:00
Prices: (4279, 11), 2005-01-03 00:00:00 to 2021-12-30 00:00:00
Volatility: (4278, 3), 2005-01-04 00:00:00 to 2021-12-30 00:00:00


### Eval Pipeline

In [6]:
# OPTIONAL : only if the backtesting files are not saved from modelrun
# results_df, all_portfolios = evaluation_pipeline(drl_config, df_prices, df_ret, df_vol)

### Evaluation Agent on Specific Timeframe

#### Data Load

In [17]:
# Evaluation period
EVAL_START_DATE = "2005-01-01"
EVAL_END_DATE = "2021-12-31"

eval_start_date = pd.to_datetime(EVAL_START_DATE)
eval_end_date = pd.to_datetime(EVAL_END_DATE)

# Slice data according to the evaluation period
eval_df_prices = df_prices.loc[eval_start_date:eval_end_date].copy()
eval_df_ret = df_ret.loc[eval_start_date:eval_end_date].copy()
eval_df_vol = df_vol.loc[eval_start_date:eval_end_date].copy()

print(f"Eval Period ({eval_start_date.date()} to {eval_end_date.date()}):")
print(f"Returns: {eval_df_ret.shape}, {eval_df_ret.index.min()} to {eval_df_ret.index.max()}")
print(f"Prices: {eval_df_prices.shape}, {eval_df_prices.index.min()} to {eval_df_prices.index.max()}")
print(f"Volatility: {eval_df_vol.shape}, {eval_df_vol.index.min()} to {eval_df_vol.index.max()}")

Eval Period (2005-01-01 to 2021-12-31):
Returns: (4278, 11), 2005-01-04 00:00:00 to 2021-12-30 00:00:00
Prices: (4279, 11), 2005-01-03 00:00:00 to 2021-12-30 00:00:00
Volatility: (4278, 3), 2005-01-04 00:00:00 to 2021-12-30 00:00:00


#### Load DRL Model and Evaluate

In [22]:
agent_path = agent_model_files[-1]
agent_name = os.path.basename(agent_path)
print(f"\n--- Evaluating Agent: {agent_name} ---")

eval_env = PortfolioEnv(
    df_prices=eval_df_prices,
    df_ret=eval_df_ret,
    df_vola=eval_df_vol,
)

drl_agent = DRLAgent(env=eval_env)
drl_agent.load(agent_path)
eval_metrics, portfolio = drl_agent.evaluate(eval_env, n_eval_episodes=1, deterministic=True)

print(portfolio)
portfolio.plot_value_history()


--- Evaluating Agent: agent_9-5_seed=44_test=2020_valrew=17.86.zip ---
Portfolio with 11 assets, initial value: $100,000.00, current value: $481,131.66


In [23]:
# print policy model architecture
drl_agent.model.policy

ActorCriticPolicy(
  (features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (pi_features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (vf_features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (mlp_extractor): MlpExtractor(
    (policy_net): Sequential(
      (0): Linear(in_features=732, out_features=64, bias=True)
      (1): Tanh()
      (2): Linear(in_features=64, out_features=64, bias=True)
      (3): Tanh()
    )
    (value_net): Sequential(
      (0): Linear(in_features=732, out_features=64, bias=True)
      (1): Tanh()
      (2): Linear(in_features=64, out_features=64, bias=True)
      (3): Tanh()
    )
  )
  (action_net): Linear(in_features=64, out_features=11, bias=True)
  (value_net): Linear(in_features=64, out_features=1, bias=True)
)

: 

### Compare with MVO

In [10]:
# Path to MVO results CSV for comparison
MVO_RESULTS_CSV_PATH = "../results/20250626_0038_mvo_backtest_[2012-01-01,2022-12-31]_daily/mvo_metrics.csv"