In [5]:
%run 00_config_and_utils.ipynb

import cvxpy as cvx
from sklearn.covariance import LedoitWolf
import statsmodels.api as sm

In [6]:
ret = pd.read_parquet("../data/processed/ret.parquet")
spy_ret = pd.read_parquet("../data/processed/spy_ret.parquet")["SPY"]
beta = pd.read_parquet("../data/processed/beta.parquet")
signals_p = pd.read_parquet("../data/processed/signals_purified.parquet")

ret.shape, signals_p.shape

((2512, 50), (2512, 150))

In [14]:
def ledoit_wolf_cov(returns_window: pd.DataFrame):
    X = returns_window.fillna(0.0).values
    return LedoitWolf().fit(X).covariance_

def solve_portfolio(alpha: pd.Series,
                    sigma: np.ndarray,
                    w_prev: pd.Series,
                    lam_risk: float,
                    gamma_turnover: float,
                    gross_leverage: float,
                    pos_cap: float,
                    beta_vec: pd.Series | None = None):
    tickers = alpha.index
    n = len(tickers)

    w = cvx.Variable(n)
    Sigma = cvx.Parameter((n, n), PSD=True, value=sigma)

    obj = cvx.Minimize(
        -alpha.values @ w
        + lam_risk * cvx.quad_form(w, Sigma)
        + gamma_turnover * cvx.norm1(w - w_prev.values)
    )

    constraints = [
        cvx.sum(w) == 0,
        cvx.norm1(w) <= gross_leverage,
        w <= pos_cap,
        w >= -pos_cap,
    ]

    if beta_vec is not None:
        constraints.append(beta_vec.values @ w == 0)

    prob = cvx.Problem(obj, constraints)
    prob.solve(solver=cvx.OSQP)

    if w.value is None:
        return None

    return pd.Series(w.value, index=tickers)

def run_backtest(rebalance_rule: str,
                 lam_risk: float,
                 gamma_turnover: float,
                 tcost_bps: float,
                 lookback_cov: int = 252,
                 use_beta_neutral: bool = True,
                 min_names: int = 20):

    reb_dates = make_rebalance_dates(ret.index, rebalance_rule)

    portfolio_rets = []
    turnover_rows = []
    weights_hist = {}

    w_prev_full = pd.Series(0.0, index=ret.columns)
    tcost = tcost_bps / 1e4

    for i in range(len(reb_dates) - 1):
        t = reb_dates[i]
        t_next = reb_dates[i + 1]

        if t not in signals_p.index or t not in beta.index:
            continue

        mom_t = signals_p.loc[t, ("mom", slice(None))].droplevel(0)
        rev_t = signals_p.loc[t, ("rev", slice(None))].droplevel(0)
        lowv_t = signals_p.loc[t, ("lowvol", slice(None))].droplevel(0)

        alpha = (mom_t + rev_t + lowv_t) / 3.0
        alpha = alpha.dropna()

        if len(alpha) < min_names:
            continue

        beta_vec = beta.loc[t, alpha.index].dropna()
        alpha = alpha.loc[beta_vec.index]

        if len(alpha) < min_names:
            continue

        ret_win = ret.loc[:t].tail(lookback_cov)[alpha.index]

        if ret_win.shape[0] < lookback_cov * 0.8:
            continue

        sigma = ledoit_wolf_cov(ret_win)

        w_prev = w_prev_full.reindex(alpha.index).fillna(0.0)

        beta_constraint = beta_vec if use_beta_neutral else None

        w_new = solve_portfolio(
            alpha=alpha,
            sigma=sigma,
            w_prev=w_prev,
            lam_risk=lam_risk,
            gamma_turnover=gamma_turnover,
            gross_leverage=CONFIG["gross_leverage"],
            pos_cap=CONFIG["pos_cap"],
            beta_vec=beta_constraint
        )

        if w_new is None:
            continue

        turnover = (w_new - w_prev).abs().sum()
        cost = tcost * turnover

        period_rets = ret.loc[t:t_next, alpha.index].iloc[1:]
        daily_port = period_rets @ w_new

        if len(daily_port) > 0:
            daily_port.iloc[0] -= cost

        portfolio_rets.append(daily_port)
        turnover_rows.append({"date": t, "turnover": turnover})
        weights_hist[t] = w_new

        w_prev_full.loc[w_new.index] = w_new

    if len(portfolio_rets) == 0:
        return None

    port_ret = pd.concat(portfolio_rets).sort_index()
    turnover_df = pd.DataFrame(turnover_rows).set_index("date").sort_index()

    equity = (1 + port_ret.fillna(0.0)).cumprod()
    mdd, _ = max_drawdown(equity)
    stats = annualized_stats(port_ret)

    common = port_ret.index.intersection(spy_ret.index)
    y = port_ret.loc[common]
    x = spy_ret.loc[common]
    X = sm.add_constant(x)
    reg = sm.OLS(y, X, missing="drop").fit()

    out = {
        "n_days": int(port_ret.shape[0]),
        "ann_return": float(stats["ann_return"]),
        "ann_vol": float(stats["ann_vol"]),
        "sharpe": float(stats["sharpe"]),
        "max_drawdown": float(mdd),
        "avg_turnover": float(turnover_df["turnover"].mean()) if len(turnover_df) else np.nan,
        "reg_alpha_daily": float(reg.params["const"]),
        "reg_beta": float(reg.params.iloc[1]),
        "reg_r2": float(reg.rsquared),
    }
    return out

In [15]:
grid = []

lam_list = [0.5, 1, 2, 5, 10]
gamma_list = [0, 0.5, 1, 2, 5]
cost_list = [1, 5, 10, 20]
rebalance_list = ["W-FRI", "M"]

for reb in rebalance_list:
    for lam in lam_list:
        for gamma in gamma_list:
            for cost in cost_list:
                grid.append((reb, lam, gamma, cost))

len(grid)

200

In [None]:
rows = []

for reb, lam, gamma, cost in grid:
    res = run_backtest(
        rebalance_rule=reb,
        lam_risk=lam,
        gamma_turnover=gamma,
        tcost_bps=cost,
        lookback_cov=CONFIG["lookback_cov"],
        use_beta_neutral=True
    )
    if res is None:
        continue
    res.update({"rebalance": reb, "lam_risk": lam, "gamma_turnover": gamma, "tcost_bps": cost})
    rows.append(res)

results = pd.DataFrame(rows)
results.shape































































































































































































































































































































































































































































































































In [None]:
results.columns.tolist(), results.head()

In [None]:
results.sort_values(["sharpe", "max_drawdown"], ascending=[False, True]).head(15)

In [11]:
import matplotlib.pyplot as plt

weekly = results[results["rebalance"] == "W-FRI"].copy()
monthly = results[results["rebalance"] == "M"].copy()

plt.figure(figsize=(10,4))
weekly.groupby("lam_risk")["sharpe"].mean().plot(marker="o")
plt.grid(True)
plt.title("Weekly: Average Sharpe vs Risk Aversion (lam_risk)")
plt.show()

plt.figure(figsize=(10,4))
weekly.groupby("gamma_turnover")["avg_turnover"].mean().plot(marker="o")
plt.grid(True)
plt.title("Weekly: Average Turnover vs Turnover Penalty (gamma_turnover)")
plt.show()

plt.figure(figsize=(10,4))
weekly.groupby("tcost_bps")["sharpe"].mean().plot(marker="o")
plt.grid(True)
plt.title("Weekly: Average Sharpe vs Transaction Cost (tcost_bps)")
plt.show()

KeyError: 'rebalance'

In [None]:
pivot = weekly.pivot_table(
    index="lam_risk",
    columns="gamma_turnover",
    values="sharpe",
    aggfunc="mean"
)
pivot

In [12]:
os.makedirs("outputs/results", exist_ok=True)

results.to_parquet("outputs/results/robustness_results.parquet")
results.to_csv("outputs/results/robustness_results.csv", index=False)

results.describe()

ValueError: Cannot describe a DataFrame without columns