In [4]:
import os # give me filesystem utilities and let me create folders programmatically
import numpy as np 
import pandas as pd

import matplotlib.pyplot as plt

os.makedirs("data/raw", exist_ok=True) # ensure that the folders exist before saving them
os.makedirs("data/processed", exist_ok=True)
os.makedirs("outputs/results", exist_ok=True)
os.makedirs("outputs/figures", exist_ok=True)

In [5]:
# rebalances are done weekly on fridays


CONFIG = {
    "start": "2016-01-01",
    "end":   "2025-12-31",
    "rebalance_rule": "W-FRI",
    "lookback_signal_mom": 252,
    "skip_mom": 21,  # skip 1 month as momentum avoids short term reversal effects
    "lookback_signal_rev": 5, # short term reversal uses the last 5 days
    "lookback_signal_vol": 20, # 1 month volatility estimate
    "lookback_beta": 252,
    "lookback_cov": 252, #estimate covariance matrix using 1 year window
    "gross_leverage": 1.0, # restricts total absolute leverage to 100%
    "pos_cap": 0.02, #cap each weight to 2%
    "lam_risk": 5.0, # scales the risk penalty term w'sigmaw (higher means less variance)
    "gamma_turnover": 1.0, # scales turnover penalty (affects abs(w-w_prev)) higher means less trades and further away from w_ideal
    "tcost_bps": 5, # 5 basis points per dollar traded
}
CONFIG

{'start': '2016-01-01',
 'end': '2025-12-31',
 'rebalance_rule': 'W-FRI',
 'lookback_signal_mom': 252,
 'skip_mom': 21,
 'lookback_signal_rev': 5,
 'lookback_signal_vol': 20,
 'lookback_beta': 252,
 'lookback_cov': 252,
 'gross_leverage': 1.0,
 'pos_cap': 0.02,
 'lam_risk': 5.0,
 'gamma_turnover': 1.0,
 'tcost_bps': 5}

In [7]:
# reduce the impacts of extreme outliers in a croiss-section of signal values
# choose to change the of bottom p and top p percentile to exactly the bottom and top p percentile
def winsorize_series(s: pd.Series, p=0.01):
    lo, hi = s.quantile(p), s.quantile(1 - p)
    return s.clip(lo, hi)

# it makes signal comparable. End up with mean 0 and standard deviation 1
def zscore_cross_section(df: pd.DataFrame):
    m = df.mean(axis=1)
    sd = df.std(axis=1).replace(0, np.nan)
    return df.sub(m, axis=0).div(sd, axis=0)

# return the worse drawdown....
def max_drawdown(equity: pd.Series):
    peak = equity.cummax()
    dd = equity / peak - 1.0
    return dd.min(), dd

# return toString of stats annual
def annualized_stats(r: pd.Series, periods=252):
    mu = r.mean() * periods
    vol = r.std() * np.sqrt(periods)
    sharpe = np.nan if vol == 0 else mu / vol
    return {"ann_return": mu, "ann_vol": vol, "sharpe": sharpe}

# choose rebalance dates aligned to actual trading days and avoid rebalancing on non trading days
def make_rebalance_dates(index: pd.DatetimeIndex, rule: str):
    s = pd.Series(index=index, data=1)
    dates = s.resample(rule).last().index
    dates = dates.intersection(index)
    return dates