In [1]:
import pandas as pd
from utils.portfolio_env import PortfolioEnv
from utils.drl_agent import DRLAgent

%load_ext autoreload
%autoreload 2

In [2]:
# --- Load data (replace with actual paths) ---
returns_df = pd.read_parquet("../data/returns.parquet")
prices_df = pd.read_parquet("../data/prices.parquet")
vol_df = pd.read_parquet("../data/vola.parquet")

In [3]:
# subset data to just one year 2020 - 2021
start_date = "2020-01-01"
end_date = "2020-06-01"

returns_df = returns_df[start_date:end_date]
prices_df = prices_df[start_date:end_date]
vol_df = vol_df[start_date:end_date]

In [4]:
# --- Create environment ---
env = PortfolioEnv(
    returns_df=returns_df,
    prices_df=prices_df,
    vol_df=vol_df,
    window_size=60,
    transaction_cost=0,
    initial_balance=100_000,
    reward_scaling=1.0,
    eta=1 / 252,
)

In [5]:
# = = = = = = = = 
# CHECK ENVIRONMENT
# = = = = = = = = 

# from stable_baselines3.common.env_checker import check_env
# check_env(env)

# UserWarning: Your observation  has an unconventional shape (neither an image, nor a 1D vector). 
# We recommend you to flatten the observation to have only a 1D vector or use a custom policy to properly process the data.

# UserWarning: We recommend you to use a symmetric and normalized Box action space (range=[-1, 1]) 
# cf. https://stable-baselines3.readthedocs.io/en/master/guide/rl_tips.html

# = = = = = = = = 
# RANDOM AGENT
# = = = = = = = = 

# obs, info = env.reset()
# n_steps = 10
# for _ in range(n_steps):
#     action = env.action_space.sample() # random action
#     obs, reward, terminated, truncated, info = env.step(action)
#     print(info)
#     if terminated:
#         obs, info = env.reset()

In [None]:
agent = DRLAgent(
    env,
    model_name='ppo',
    n_envs=5,
    n_steps=756,
    batch_size=1260,
    n_epochs=16,
    learning_rate=3e-4, # anneal to 1e-5
    gamma=0.9,
    gae_lambda=0.9,
    # clip_range=0.25
)

agent.train(total_timesteps=100)
agent.save("../models/ppo_portfolio.zip")

Output()


Training Summary:
Final Portfolio Value (First Env): $151,327.53
Average Final Portfolio Value (All Envs): $140,786.18
Std Final Portfolio Value (All Envs): $6,402.34

Performance Metrics (First Env):
Annual return: 1.2401
Cumulative returns: 0.1475
Annual volatility: 0.3495
Sharpe ratio: 2.4825
Calmar ratio: 19.6916
Stability: 0.7410
Max drawdown: -0.0630
Omega ratio: 1.4825
Sortino ratio: 4.9642
Skew: 0.2636
Kurtosis: 0.3757
Tail ratio: 1.3941
Daily value at risk: -0.0280
Portfolio turnover: 0.0177


In [7]:
# from stable_baselines3 import PPO
# model = PPO("MlpPolicy", env).learn(total_timesteps=100)