In [None]:
import pickle
import numpy as np
import pandas as pd
import sys

sys.path.append("C:/Users/zih028/Documents/GitHub/rl_rebalance")
from src.rl_agents.TilesQ import *
from src.eval import metrics_from_pnl

In [42]:
def build_minvar_targets_for_period(
    returns: pd.DataFrame,
    cov_dict: dict,
) -> pd.DataFrame:
    """
    For each date in returns, compute min-var weights from cov_dict.
    Falls back to equal-weight if no covariance available that day.
    """
    dates = returns.index
    n_assets = returns.shape[1]
    weights_list = []

    for date in dates:
        if date in cov_dict:
            Sigma = cov_dict[date]
            w = compute_minimum_variance_weights(Sigma)
        else:
            w = np.ones(n_assets) / n_assets
        weights_list.append(w)

    targets = pd.DataFrame(weights_list, index=dates, columns=returns.columns)
    return targets

def run_tileq_episode(
    env: PortfolioEnvironment,
    agent: TileQAgent,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Play one full episode over env.dates with a fixed TileQ policy
    (epsilon-greedy *turned off*).

    Returns
    -------
    pnl : np.ndarray
        Daily net returns (after TC)
    tc : np.ndarray
        Daily transaction cost as fraction of NAV
    turnover : np.ndarray
        Daily turnover (sum |Δw|)
    """
    state = env.reset()
    pnls, tcs, turnovers = [], [], []
    weights = [env.current_weights.copy()]

    while True:
        action = agent.select_action(state, training=False)
        next_state, reward, done, info = env.step(action)

        # env reward = wR - TC - λ*TE²
        # For performance metrics we care about net return: wR - TC
        net_ret = info["portfolio_return"] - info["transaction_cost"]

        pnls.append(net_ret)
        tcs.append(info["transaction_cost"])
        turnovers.append(info["turnover"])
        weights.append(env.current_weights.copy())

        state = next_state
        if done:
            break

    weights_df = pd.DataFrame(
        weights[1:],
        index = env.dates[1:],
        columns = env.returns.columns
    )
    return np.array(pnls), np.array(tcs), np.array(turnovers), weights_df

In [43]:
returns = pd.read_parquet("../data/returns.parquet")
prices = pd.read_parquet("../data/prices.parquet")

with open("../data/cov_oas_window252.pkl", "rb") as f:
    cov_dict = pickle.load(f)

train_start = "2010-01-01"
train_end = "2019-12-31"
test_start = "2020-01-01"
test_end = returns.index.max().strftime("%Y-%m-%d")

returns_train = returns.loc[train_start:train_end]
returns_test = returns.loc[test_start:test_end]

targets_train = build_minvar_targets_for_period(returns_train, cov_dict)
targets_test = build_minvar_targets_for_period(returns_test, cov_dict)

model_path = "../model/tileq_portfolio_model.pkl"

tmp_env = PortfolioEnvironment(
    returns=returns_train,
    target_weights=targets_train,
    transaction_cost=0.001,
    lambda_tracking=1.0,
)

init_state = tmp_env.reset()
state_dim = init_state.shape[0]
action_dim = tmp_env.n_actions

state_low = np.full(state_dim, -1.0, dtype=float)
state_high = np.full(state_dim, 1.0, dtype=float)

agent = TileQAgent(
    state_dim=state_dim,
    action_dim=action_dim,
    alpha=1e-3,
    gamma=0.99,
    epsilon_start=1.0,
    epsilon_end=0.01,
    epsilon_decay=0.995,
    num_tilings=8,
    tiles_per_dim=8,
    n_features=4096,
    state_low=state_low,
    state_high=state_high,
)

agent.load(model_path)
agent.epsilon = 0.0

env_train = PortfolioEnvironment(
    returns=returns_train,
    target_weights=targets_train,
    transaction_cost=0.001,
    lambda_tracking=1.0,
)
pnl_train, tc_train, to_train, weights_train = run_tileq_episode(env_train, agent)
metrics_train = metrics_from_pnl(pnl_train, tc_train)

env_test = PortfolioEnvironment(
    returns=returns_test,
    target_weights=targets_test,
    transaction_cost=0.001,
    lambda_tracking=1.0,
)
pnl_test, tc_test, to_test, weights_test = run_tileq_episode(env_test, agent)
metrics_test = metrics_from_pnl(pnl_test, tc_test)



TileQAgent loaded from ../model/tileq_portfolio_model.pkl


In [44]:
df_train = weights_train
df_train["pnl"] = pnl_train
df_train["tc"] = tc_train

df_test = weights_test
df_test["pnl"] = pnl_test
df_test["tc"] = tc_test

df_train.to_csv("../model/tileq_train.csv")
df_test.to_csv("../model/tileq_test.csv")

In [38]:
to_train

array([0.51861078, 0.0835276 , 0.04169527, ..., 0.05385309, 0.04856178,
       0.03665156], shape=(2515,))

In [31]:
pnl_train.shape

(2515,)

In [45]:
targets_train

Ticker,EEM,EFA,EWJ,IAU,QQQ,SPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,0.0,0.000000,0.154259,0.318188,0.206138,0.321414
2010-01-05,0.0,0.000000,0.159416,0.320044,0.204338,0.316203
2010-01-06,0.0,0.000000,0.160507,0.318426,0.205955,0.315112
2010-01-07,0.0,0.000000,0.157108,0.320394,0.205311,0.317187
2010-01-08,0.0,0.000000,0.155730,0.322366,0.209548,0.312357
...,...,...,...,...,...,...
2019-12-24,0.0,0.120436,0.143208,0.345724,0.000000,0.390632
2019-12-26,0.0,0.094846,0.126828,0.342862,0.000000,0.435464
2019-12-27,0.0,0.077562,0.141966,0.338643,0.000000,0.441828
2019-12-30,0.0,0.078111,0.139975,0.339763,0.000000,0.442150


In [53]:
cov_dict["2010-01-04"]

array([[6.02912900e-04, 4.49616920e-04, 3.03454706e-04, 4.07706659e-05,
        3.30170935e-04, 3.72589681e-04],
       [4.49616920e-04, 3.98807358e-04, 2.79664742e-04, 3.47548927e-05,
        2.68285961e-04, 3.04659064e-04],
       [3.03454706e-04, 2.79664742e-04, 2.66248655e-04, 2.20429485e-05,
        1.92434934e-04, 2.16382448e-04],
       [4.07706659e-05, 3.47548927e-05, 2.20429485e-05, 1.72480131e-04,
        3.93977114e-06, 9.26944175e-06],
       [3.30170935e-04, 2.68285961e-04, 1.92434934e-04, 3.93977114e-06,
        2.52150870e-04, 2.42691516e-04],
       [3.72589681e-04, 3.04659064e-04, 2.16382448e-04, 9.26944175e-06,
        2.42691516e-04, 2.78181132e-04]])

In [50]:
inv_cov = np.linalg.inv(cov_dict["2010-01-04"])
n = 6
ones = np.ones(n)
w = inv_cov @ ones

In [54]:
returns

Ticker,EEM,EFA,EWJ,IAU,QQQ,SPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2005-02-02,0.003156,0.001389,0.003749,0.002608,0.001598,0.003023
2005-02-03,-0.001035,-0.005444,-0.014131,-0.012150,-0.011508,-0.002603
2005-02-04,0.015500,0.005128,0.003788,-0.005287,0.016021,0.010619
2005-02-07,0.003149,-0.006335,-0.002840,-0.003621,-0.002121,-0.001331
2005-02-08,0.001933,0.000000,-0.005703,-0.001452,0.001856,0.001165
...,...,...,...,...,...,...
2025-11-17,-0.011897,-0.013103,-0.018144,-0.011366,-0.008577,-0.009360
2025-11-18,-0.004984,-0.013061,-0.020705,0.007592,-0.012250,-0.008433
2025-11-19,-0.003522,-0.003022,-0.001239,0.001173,0.005952,0.003856
2025-11-20,-0.013837,-0.013931,-0.014860,0.000000,-0.023956,-0.015360


In [52]:
w / np.sum(w)

array([-0.33004824, -0.26697048,  0.24635528,  0.50815275,  0.32920637,
        0.51330433])