In [1]:
import pickle
import numpy as np
import pandas as pd
import sys

sys.path.append("C:/Users/zih028/Documents/GitHub/rl_rebalance")
from src.rl_agents.ppo_agent import *
from src.eval import metrics_from_pnl

In [7]:
def build_minvar_targets_for_period(
    returns: pd.DataFrame,
    cov_dict: dict,
) -> pd.DataFrame:
    """
    For each date in returns, compute min-var weights from cov_dict.
    Falls back to equal-weight if no covariance available that day.
    """
    dates = returns.index
    n_assets = returns.shape[1]
    weights_list = []

    for date in dates:
        if date in cov_dict:
            Sigma = cov_dict[date]
            w = compute_minimum_variance_weights(Sigma)
        else:
            w = np.ones(n_assets) / n_assets
        weights_list.append(w)

    targets = pd.DataFrame(weights_list, index=dates, columns=returns.columns)
    return targets

def run_ppo_episode(
    env: PortfolioEnvironment,
    agent: PPOAgent,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Run one full episode over env.dates with a fixed Monte Carlo (MMC) policy
    in evaluation mode (epsilon=0, no learning).

    Returns
    -------
    pnl : np.ndarray
        Daily net returns (after transaction costs)
    tc : np.ndarray
        Daily transaction cost as fraction of NAV
    turnover : np.ndarray
        Daily turnover (sum |Δw|)
    """
    state = env.reset()
    pnls, tcs, turnovers = [], [], []
    weights = [env.current_weights.copy()]

    while True:
        action, log_prob = agent.select_action(state)

        next_state, reward, done, info = env.step(action)

        # env reward = wR - TC - λ * TE^2
        # For performance metrics we care about net return: wR - TC
        net_ret = info["portfolio_return"] - info["transaction_cost"]

        pnls.append(net_ret)
        tcs.append(info["transaction_cost"])
        turnovers.append(info["turnover"])
        weights.append(env.current_weights.copy())

        state = next_state
        if done:
            break

    weights_df = pd.DataFrame(
        weights[1:],
        index = env.dates[1:],
        columns = env.returns.columns
    )

    return np.array(pnls), np.array(tcs), np.array(turnovers), weights_df

In [8]:
returns = pd.read_parquet("../data/returns.parquet")
prices = pd.read_parquet("../data/prices.parquet")

with open("../data/cov_oas_window252.pkl", "rb") as f:
    cov_dict = pickle.load(f)

train_start = "2010-01-01"
train_end = "2019-12-31"
test_start = "2020-01-01"
test_end = returns.index.max().strftime("%Y-%m-%d")

returns_train = returns.loc[train_start:train_end]
returns_test = returns.loc[test_start:test_end]

targets_train = build_minvar_targets_for_period(returns_train, cov_dict)
targets_test = build_minvar_targets_for_period(returns_test, cov_dict)

model_path = "../model/ppo_portfolio_model.pth"

env_train = PortfolioEnvironment(
    returns=returns_train,
    target_weights=targets_train,
    transaction_cost=0.001,
    lambda_tracking=1.0,
)

env_test = PortfolioEnvironment(
    returns=returns_test,
    target_weights=targets_test,
    transaction_cost=0.001,
    lambda_tracking=1.0,
)

agent = PPOAgent(state_dim = len(env_train.reset()), action_dim = env_train.n_actions, hidden_dim=64)

pnl_train, tc_train, to_train, weights_train = run_ppo_episode(env_train, agent)
pnl_test,  tc_test,  to_test, weights_test  = run_ppo_episode(env_test, agent)

metrics_train = metrics_from_pnl(pnl_train, tc_train)  # or convert tc to bps if you want
metrics_test  = metrics_from_pnl(pnl_test,  tc_test)


In [9]:
df_train = weights_train
df_train["pnl"] = pnl_train
df_train["tc"] = tc_train

df_test = weights_test
df_test["pnl"] = pnl_test
df_test["tc"] = tc_test

df_train.to_csv("../model/ppo_train.csv")
df_test.to_csv("../model/ppo_test.csv")