In [1]:
import numpy as np
import pandas as pd
from sklearn.covariance import LedoitWolf
import cvxpy as cp

In [3]:
# CONFIG

LOOKBACK = 252            # days for μ, Σ
HORIZON = 21              # holding period (trading days)
STEP = 21                 # form a new, independent portfolio every 21 days
GAMMA = 5.0               # objective: 0.5 w'Σw - GAMMA μ'w
NAME_CAP = 0.10           # upper limit for each stock’s weight; set None to disable 
COST_BPS_ONE_WAY = 5.0    # linear cost per side, in bps of traded notional
MIN_NONMISSING_PCT = 0.50 # require >=50% non-missing in lookback
STARTING_NOTIONAL = 1000000.0

Configurations: 
1) We start every portfolio cycle with 1000000 USD
2) Gamma(RISK aversion parameter in portfolio) = 0.5
3) I limited each stock to take maximum of 10% of the portfolio
4) Every time when i buy or sell, i lose 0.05 % of notional value to trading costs

Benchmark expected return:
Find average daily return r, coumpound it by 21 days to give you return over 21 day horizon 
E.G. Benchmark expected return = (1+r)^21 - 1

Benchmark covariance
From the 252 past data window, find the daily covariance and multiply it by 21 days to find the 21-day covariance. 
This is assuming that daily returns i.i.d. --> no autocorrelation across days --> justifies linear scaling

Evaluation performed
1) Average 21-day Net Return
2) Standard Deviation of 21-day Returns
3) Annualized Return
4) Annualized Volatility
5) Sharpe Ratio

In [4]:
# Helper functions

# Takes in a stock's average daily simple return and returns 21-day expected compounded return 
# This gives you the expected return of a stock after 21 trading days 
def compound_from_daily_mean(mu_d, horizon=HORIZON):
    return (1.0 + mu_d)**horizon - 1.0


# This function gives N×N covariance matrix 
def ledoit_wolf_cov(returns_window):
    lw = LedoitWolf(store_precision=False, assume_centered=False)
    lw.fit(returns_window)
    return lw.covariance_

# Solving Markowitz
def solve_markowitz_long_only(mu_21, Sigma_21, gamma=GAMMA, name_cap=NAME_CAP, mode = "standard", l1 = 0.0, l2 = 0.0, 
                              rp_tau=1e-2): # strength for risk parity
    # mu_21: vector of expected 21-day returns (size N)
    # Sigma_21: 21-day covariance matrix assuming no serial correlation --> 21 * covariance matrix 
    # gamma: risk aversion
    n = len(mu_21)
    w = cp.Variable(n)
    Sigma = cp.atoms.affine.wraps.psd_wrap(Sigma_21)

    # --- constraints ---
    cons = [cp.sum(w) == 1, w >= 0]
    if name_cap is not None:
        cons.append(w <= name_cap)

    # Change objective based on mode
    if mode == "standard":
        objective = 0.5 * cp.quad_form(w, Sigma) - gamma * (mu_21 @ w)

    elif mode == "ridge":
        objective = 0.5 * cp.quad_form(w, Sigma) - gamma * (mu_21 @ w)
        objective += l2 * cp.sum_squares(w)

    elif mode == "lasso":
        objective = 0.5 * cp.quad_form(w, Sigma) - gamma * (mu_21 @ w)
        objective += l1 * cp.norm1(w)

    elif mode == "enet":
        objective = 0.5 * cp.quad_form(w, Sigma) - gamma * (mu_21 @ w)
        objective += l2 * cp.sum_squares(w) + l1 * cp.norm1(w)

    elif mode == "minvar":
        # Global minimum variance (risk-only)
        objective = 0.5 * cp.quad_form(w, Sigma)

    elif mode == "invvol":
        # Target inverse-vol weights and project onto simplex + caps
        diag = np.sqrt(np.clip(np.diag(Sigma_21), 1e-12, None))
        w_target = (1.0 / diag)
        w_target = w_target / w_target.sum()
        objective = cp.sum_squares(w - w_target)   # projection objective

    elif mode == "risk_parity":
        # Convex surrogate for ERC: minimize 0.5 w' Σ w - τ ∑ log(w_i)
        # enforce strictly positive weights for log
        eps = 1e-8
        cons.append(w >= eps)
        if name_cap is not None:
            # ensure feasibility with caps (cap must be > eps)
            pass
        objective = 0.5 * cp.quad_form(w, Sigma) - rp_tau * cp.sum(cp.log(w))

    else:
        raise ValueError(f"Unknown mode: {mode}")

    cons = [cp.sum(w) == 1, w >= 0]
    if name_cap is not None:
        cons.append(w <= name_cap)
    prob = cp.Problem(cp.Minimize(objective), cons)
    # choose solver: OSQP for QP; SCS handles log/|.| terms
    use_scs = (mode in {"risk_parity"} or l1 > 0)
    prob.solve(solver=cp.SCS if use_scs else cp.OSQP, verbose=False)

    if w.value is None:
        Sigma_r = Sigma_21 + 1e-6 * np.eye(n)
        Sigma_wrapped_r = cp.atoms.affine.wraps.psd_wrap(Sigma_r)
        if mode == "minvar":
            objective = 0.5 * cp.quad_form(w, Sigma_wrapped_r)
        elif mode == "risk_parity":
            objective = 0.5 * cp.quad_form(w, Sigma_wrapped_r) - rp_tau * cp.sum(cp.log(w))
        elif mode == "invvol":
            objective = cp.sum_squares(w - w_target)
        else:
            objective = 0.5 * cp.quad_form(w, Sigma_wrapped_r) - gamma * (mu_21 @ w)
            if l2 > 0 and l1 == 0: objective += l2 * cp.sum_squares(w)
            if l1 > 0 and l2 == 0: objective += l1 * cp.norm1(w)
            if l1 > 0 and l2 > 0:  objective += l1 * cp.norm1(w) + l2 * cp.sum_squares(w)
        prob = cp.Problem(cp.Minimize(objective), cons)
        prob.solve(solver=cp.SCS if use_scs else cp.OSQP, verbose=False)
        
    if w.value is None:
        raise RuntimeError("QP failed; check Σ conditioning or constraints.")
    return np.clip(w.value, 0, 1)

# Find the realised 21- days return per ticker
def ticker_period_compound(returns_df, start_idx, horizon=HORIZON):
    r = returns_df.iloc[start_idx+1 : start_idx+horizon+1]
    return (1.0 + r).prod(axis=0) - 1.0  # Series by ticker


In [7]:
# Main backtesting

def backtest_markowitz_independent_legs_from_returns(rets: pd.DataFrame, is_in_sp500: pd.DataFrame, mode: str = "stats", 
                                                     markowitz_version: str = "standard", l1: float = 0.0, l2: float = 0.0, rp_tau: float = 1e-2):
    """
    rets: DataFrame (Date x ticker) of DAILY simple returns (not log), aligned and sorted.
    is_in_sp500: DataFrame (Date x ticker) with {0,1}, aligned to rets index/columns.
    """
    rets = rets.sort_index().dropna(how="all")
    is_in_sp500 = is_in_sp500.reindex_like(rets).fillna(0).astype(int)
    dates = rets.index

    start = LOOKBACK - 1
    end = len(dates) - HORIZON - 1
    legs = []

    for t in range(start, end + 1, STEP):
        date_t = dates[t]

        # Universe on formation day
        tickers = is_in_sp500.columns[is_in_sp500.loc[date_t] == 1].tolist()
        if not tickers:
            continue

        # 252d window of daily returns
        win = rets.iloc[t-LOOKBACK+1 : t+1][tickers]

        # Data quality filter
        non_missing_ratio = 1.0 - win.isna().mean()
        keep = non_missing_ratio[non_missing_ratio >= MIN_NONMISSING_PCT].index.tolist()
        win = win[keep].dropna(how="all", axis=1)
        if win.shape[1] < 2:
            continue
        win = win.dropna(how="any")
        if len(win) < 30:
            continue

        tickers = win.columns.tolist()

        if mode == "stats":
            # μ: mean daily -> compounded 21d
            mu_d = win.mean(axis=0).values
            mu_21 = compound_from_daily_mean(mu_d, HORIZON)

            # Σ: Ledoit–Wolf daily -> 21d
            Sigma_d = ledoit_wolf_cov(win.values)
            Sigma_21 = HORIZON * Sigma_d

        # Optimize
        try:
            w = solve_markowitz_long_only(mu_21, Sigma_21, gamma=GAMMA, name_cap=NAME_CAP, mode = markowitz_version, l1= l1, l2 = l2, rp_tau = rp_tau)
        except RuntimeError:
            continue

        # Realized leg
        comp_rets = ticker_period_compound(rets[tickers], start_idx=t, horizon=HORIZON)
        gross_leg_ret = float(np.dot(w, comp_rets.values))

        # Round-trip linear costs: entry 1 + exit 1 = 2
        rt_turnover = 2.0
        cost_frac = (COST_BPS_ONE_WAY * 1e-4) * rt_turnover
        net_leg_ret = gross_leg_ret - cost_frac

        start_value = STARTING_NOTIONAL
        end_value = start_value * (1.0 + net_leg_ret)

        legs.append({
            "formation_date": date_t,
            "n_names": len(tickers),
            "gross_21d_ret": gross_leg_ret,
            "net_21d_ret": net_leg_ret,
            "roundtrip_cost_frac": cost_frac,
            "start_value": start_value,
            "end_value": end_value,
            "gamma": GAMMA,
            "name_cap": NAME_CAP
        })

    legs_df = pd.DataFrame(legs).set_index("formation_date")

    # Evaluation (your main score = average net 21d return)
    if not legs_df.empty:
        legs_per_year = 252.0 / HORIZON
        avg_net = legs_df["net_21d_ret"].mean()
        std_net = legs_df["net_21d_ret"].std(ddof=1)
        ann_return = (1.0 + avg_net)**legs_per_year - 1.0
        ann_vol = std_net * np.sqrt(legs_per_year)
        sharpe = ann_return / ann_vol if ann_vol > 0 else np.nan

        summary = {
            "legs": int(len(legs_df)),
            "avg_net_21d_return": float(avg_net),
            "std_net_21d_return": float(std_net),
            "annual_return_from_avg_leg": float(ann_return),
            "annual_vol_from_leg_std": float(ann_vol),
            "sharpe_from_legs": float(sharpe),
            "cost_bps_one_way": COST_BPS_ONE_WAY,
            "gamma": GAMMA,
            "name_cap": NAME_CAP
        }
    else:
        summary = {}

    return legs_df, summary

In [5]:
df = pd.read_csv("training_data.csv", parse_dates=["Date"])
df["daily_simple_return"] = np.expm1(df["daily_log_return"])

# Building my wide df
rets = df.pivot(index="Date", columns="ticker", values="daily_simple_return").sort_index()
is_in = df.pivot(index="Date", columns="ticker", values="is_in_sp500").sort_index()
is_in = is_in.fillna(0).astype(float).clip(0,1).astype(int)

split_idx = len(rets) - 252*2

# include the 252-day overlap so the first test formation has a full lookback
rets_test = rets.iloc[split_idx - LOOKBACK:]
is_in_test = is_in.iloc[split_idx - LOOKBACK:]

rets_train = rets.iloc[:split_idx - LOOKBACK]
is_in_train = is_in.iloc[:split_idx - LOOKBACK]

rets_test

  df = pd.read_csv("training_data.csv", parse_dates=["Date"])


ticker,A,AAL,AAP,AAPL,ABBV,ABT,ACN,ADBE,ADI,ADM,...,XEL,XOM,XRAY,XRX,XYL,YUM,ZBH,ZBRA,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-02-16,-0.015111,-0.011990,-0.017635,-0.001181,-0.002757,0.004546,0.006587,0.001684,0.004535,-0.001133,...,0.006729,-0.010342,0.005962,-0.009371,0.001678,-0.003059,-0.008439,-0.006759,-0.008830,-0.042974
2017-02-17,0.015539,-0.001278,0.003665,0.002734,0.004717,0.011086,0.004225,0.006222,0.006222,0.013379,...,0.002626,-0.006561,0.031939,0.005406,0.001047,-0.000585,0.004728,0.008917,-0.005568,-0.005116
2017-02-21,-0.005423,-0.002132,-0.002661,0.007221,0.000971,0.007160,0.007177,-0.000334,0.004122,0.002685,...,0.010238,0.001590,-0.002872,0.000000,0.011088,-0.010673,0.002567,0.008024,0.005375,0.018282
2017-02-22,-0.000195,-0.010468,-0.009122,0.002999,-0.007601,0.000000,0.000901,-0.001337,-0.003764,-0.002008,...,0.001415,-0.011723,-0.002560,0.002688,-0.001655,-0.023201,-0.002304,-0.010960,-0.003342,-0.001683
2017-02-23,0.002532,-0.004750,-0.012275,-0.004230,0.009452,0.008665,0.002209,-0.005357,-0.005484,0.008274,...,0.010826,0.010503,0.001765,0.009383,-0.015130,-0.005295,-0.004448,0.048641,0.001788,0.003747
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-02-12,0.003996,0.021113,0.016728,0.023748,0.018646,-0.004262,-0.000707,0.013567,0.016513,-0.003278,...,0.001015,0.012225,0.000172,0.018129,0.021664,0.012398,-0.003704,0.016868,0.003701,-0.007290
2020-02-13,0.000468,-0.012471,0.002605,-0.007121,-0.024952,-0.001239,0.001934,0.001469,0.000589,-0.007455,...,0.011445,-0.005550,0.001724,0.000524,-0.023337,0.001526,0.000189,-0.074090,0.007157,0.013708
2020-02-14,0.004095,-0.029578,-0.008019,0.000246,-0.013634,0.011051,-0.000141,0.012886,-0.010346,-0.011045,...,0.006302,-0.004595,-0.004474,-0.010207,0.012522,0.002380,0.010521,0.023358,-0.003876,-0.005313
2020-02-18,-0.012002,-0.019521,0.061531,-0.018310,-0.004679,-0.008700,0.006545,-0.002160,0.007820,-0.007148,...,0.012810,-0.012696,0.001210,-0.015864,-0.001021,-0.011873,-0.007170,-0.008837,-0.025292,-0.003607


In [8]:
legs_df, summary = backtest_markowitz_independent_legs_from_returns(rets_test, is_in_test, mode = 'stats', markowitz_version = "standard")

summary

{'legs': 24,
 'avg_net_21d_return': 0.00477220862514516,
 'std_net_21d_return': 0.054300089285366525,
 'annual_return_from_avg_leg': 0.0587937546427415,
 'annual_vol_from_leg_std': 0.18810102699556244,
 'sharpe_from_legs': 0.3125647721430491,
 'cost_bps_one_way': 5.0,
 'gamma': 5.0,
 'name_cap': 0.1}

# Create function to perform parameter tuning for regularised markowitz

In [26]:
def parameter_tuning(rets_train, is_in_train, mode = 'stats', markowitz_version = "ridge", l1_values = [0.0], l2_values = [0.0], rp_tau_values = [1e-2]):
    best_params = None
    best_returns = -1.0
    for i in range(max(len(l1_values), len(l2_values), len(rp_tau_values))):
        if i < len(l1_values):
            l1 = l1_values[i]
        else:
            l1 = l1_values[-1] # use last value if index exceeds length
        if i < len(l2_values):
            l2 = l2_values[i]
        else:  
            l2 = l2_values[-1]
        if i < len(rp_tau_values):
            rp_tau = rp_tau_values[i]
        else:
            rp_tau = rp_tau_values[-1]
        legs_df, summary = backtest_markowitz_independent_legs_from_returns(
            rets_train, is_in_train, mode=mode, markowitz_version=markowitz_version, l1=l1, l2=l2, rp_tau=rp_tau
        )
        if summary['avg_net_21d_return'] > best_returns:
            best_returns = summary['avg_net_21d_return']
            best_params = {
                'l1': l1,
                'l2': l2,
                'rp_tau': rp_tau,
                'summary': summary
            }

    return best_params


def get_l1_l2_lists_for_enet(lambda_vals, alpha_vals):
    l1_list = []
    l2_list = []
    
    for lam in lambda_vals:
        for alpha in alpha_vals:
            l1_list.append(lam * alpha)
            l2_list.append(lam * (1 - alpha))
    
    return l1_list, l2_list

lambda_vals =  np.logspace(-4, 4, num=20) #for ridge and lasso
alpha_vals  = np.linspace(0.0, 1.0, 6) # 0, .2, .4, .6, .8, 1, for enet only
rp_tau_grid = np.logspace(-3, 1, num=10) 


In [33]:
# best_param_ridge = parameter_tuning(rets_train, is_in_train, mode = 'stats', markowitz_version = "ridge", l1_values = [0.0], l2_values = lambda_vals)

# print(best_param_ridge['l2'])

best_param_ridge = {}
best_param_ridge['l2'] = 0.0001

legs_df, summary_ridge = backtest_markowitz_independent_legs_from_returns(rets_test, is_in_test, mode = 'stats', markowitz_version = "ridge", l2 = best_param_ridge['l2'])
summary_ridge

{'legs': 24,
 'avg_net_21d_return': 0.004779860452949828,
 'std_net_21d_return': 0.05430112760642048,
 'annual_return_from_avg_leg': 0.05889051743261575,
 'annual_vol_from_leg_std': 0.1881046238452025,
 'sharpe_from_legs': 0.3130732048409331,
 'cost_bps_one_way': 5.0,
 'gamma': 5.0,
 'name_cap': 0.1}

In [36]:
# best_param_lasso = parameter_tuning(rets_train, is_in_train, mode = 'stats', markowitz_version = "ridge", l1_values = lambda_vals, l2_values = [0.0])

# print(best_param_lasso['l1'])
best_param_lasso = {}
best_param_lasso['l1'] = 0.0001

legs_df, summary_lasso = backtest_markowitz_independent_legs_from_returns(rets_test, is_in_test, mode = 'stats', markowitz_version = "lasso", l1 = best_param_lasso['l1'])
summary_lasso

{'legs': 24,
 'avg_net_21d_return': 0.0047722449921544586,
 'std_net_21d_return': 0.05430191370205962,
 'annual_return_from_avg_leg': 0.05879421451019917,
 'annual_vol_from_leg_std': 0.18810734696037568,
 'sharpe_from_legs': 0.31255671540880337,
 'cost_bps_one_way': 5.0,
 'gamma': 5.0,
 'name_cap': 0.1}

In [31]:
# l1_vals, l2_vals = get_l1_l2_lists_for_enet(lambda_vals, alpha_vals)

# best_param_en = parameter_tuning(rets_train, is_in_train, mode = 'stats', markowitz_version = "enet", l1_values = l1_vals, l2_values = l2_vals)

# print(best_param_en['l1']) # 2e-05
# print(best_param_en['l2']) # 8e-05
best_param_en = {}
best_param_en['l1'] = 2e-05
best_param_en['l2'] = 8e-05

legs_df, summary_en = backtest_markowitz_independent_legs_from_returns(rets_test, is_in_test, mode = 'stats', markowitz_version = "enet",l1 = best_param_en['l1'], l2 = best_param_en['l2'])
summary_en




{'legs': 24,
 'avg_net_21d_return': 0.0714555716821828,
 'std_net_21d_return': 0.36028408348029206,
 'annual_return_from_avg_leg': 1.2892330684672322,
 'annual_vol_from_leg_std': 1.2480606754925052,
 'sharpe_from_legs': 1.0329890956290884,
 'cost_bps_one_way': 5.0,
 'gamma': 5.0,
 'name_cap': 0.1}

In [29]:
legs_df, summary_minvar = backtest_markowitz_independent_legs_from_returns(rets_test, is_in_test, mode = 'stats', markowitz_version = "minvar")
summary_minvar

{'legs': 24,
 'avg_net_21d_return': 0.008756131509745886,
 'std_net_21d_return': 0.021603673929277836,
 'annual_return_from_avg_leg': 0.11028443127980059,
 'annual_vol_from_leg_std': 0.07483732175132074,
 'sharpe_from_legs': 1.473655506356416,
 'cost_bps_one_way': 5.0,
 'gamma': 5.0,
 'name_cap': 0.1}

In [31]:
legs_df, summary_invvol = backtest_markowitz_independent_legs_from_returns(rets_test, is_in_test, mode = 'stats', markowitz_version = "invvol")
summary_invvol

{'legs': 24,
 'avg_net_21d_return': 0.008612156763954276,
 'std_net_21d_return': 0.028008562866058562,
 'annual_return_from_avg_leg': 0.1083843387891783,
 'annual_vol_from_leg_std': 0.0970245078620008,
 'sharpe_from_legs': 1.1170820772761325,
 'cost_bps_one_way': 5.0,
 'gamma': 5.0,
 'name_cap': 0.1}

In [37]:
# best_param_rp = parameter_tuning(rets_train, is_in_train, mode = 'stats', markowitz_version = "risk_parity", rp_tau_values = rp_tau_grid)

# print(best_param_rp['rp_tau'])

best_param_rp = {}
best_param_rp['rp_tau'] = 0.0027825594022071257

legs_df, summary_riskparity = backtest_markowitz_independent_legs_from_returns(rets_test, is_in_test, mode = 'stats', markowitz_version = "risk_parity", rp_tau = best_param_rp['rp_tau'])
summary_riskparity

{'legs': 24,
 'avg_net_21d_return': 0.008205216878237144,
 'std_net_21d_return': 0.030419700802898617,
 'annual_return_from_avg_leg': 0.10302989715167388,
 'annual_vol_from_leg_std': 0.10537693468332834,
 'sharpe_from_legs': 0.9777272176429536,
 'cost_bps_one_way': 5.0,
 'gamma': 5.0,
 'name_cap': 0.1}

## Stress test evaluation

In [39]:
stress_test = pd.read_csv("covid_stress_test_data.csv",parse_dates=["Date"])
stress_test["daily_simple_return"] = np.expm1(stress_test["daily_log_return"])
stress_test

Unnamed: 0,Date,Dividends,ticker,is_in_sp500,daily_log_return,daily_volume_return,monthly_log_return_future,forward_split,reverse_split,high_low_spread,...,close_sma_ratio_20,close_ema_ratio_20,close_sma_ratio_50,close_ema_ratio_50,close_sma_ratio_100,close_ema_ratio_100,macd_line_pct,macd_signal_pct,macd_hist_pct,daily_simple_return
0,2020-02-20,0.0,A,1,-0.011905,-0.011905,-0.238256,0.0,0.0,0.019327,...,-0.010419,-0.010644,-0.013873,-0.001349,0.039971,0.029934,-0.002739,-0.002058,-0.000680,-0.011834
1,2020-02-21,0.0,A,1,0.008618,0.008618,-0.294484,0.0,0.0,0.019866,...,0.000494,-0.001883,-0.006179,0.007007,0.047810,0.038050,-0.002487,-0.002130,-0.000357,0.008656
2,2020-02-24,0.0,A,1,-0.055217,-0.055217,-0.183565,0.0,0.0,0.031925,...,-0.048913,-0.050483,-0.059072,-0.045326,-0.008971,-0.017369,-0.006937,-0.003188,-0.003749,-0.053720
3,2020-02-25,0.0,A,1,-0.032190,-0.032190,-0.121961,0.0,0.0,0.040924,...,-0.074333,-0.073452,-0.087749,-0.072820,-0.040750,-0.047582,-0.013179,-0.005270,-0.007909,-0.031677
4,2020-02-26,0.0,A,1,0.002051,0.002051,-0.057844,0.0,0.0,0.027909,...,-0.067410,-0.065180,-0.084479,-0.068326,-0.039362,-0.044764,-0.017542,-0.007715,-0.009826,0.002053
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107861,2020-12-22,0.0,ZTS,1,0.006136,0.006136,-0.001484,0.0,0.0,0.016931,...,0.010396,0.004065,-0.003656,0.003360,0.004212,0.024023,-0.001469,-0.004086,0.002617,0.006155
107862,2020-12-23,0.0,ZTS,1,-0.012311,-0.012311,0.000672,0.0,0.0,0.019643,...,-0.001590,-0.007443,-0.015208,-0.008570,-0.008415,0.011264,-0.002000,-0.003710,0.001709,-0.012235
107863,2020-12-24,0.0,ZTS,1,0.005428,0.005428,-0.046697,0.0,0.0,0.012568,...,0.003678,-0.001847,-0.009515,-0.003050,-0.003345,0.016430,-0.001935,-0.003339,0.001403,0.005442
107864,2020-12-28,0.0,ZTS,1,0.010337,0.010337,-0.035205,0.0,0.0,0.015518,...,0.013816,0.007706,0.000826,0.007020,0.006708,0.026443,-0.001031,-0.002850,0.001819,0.010391


In [40]:
# Building my wide df
stress_rets = stress_test.pivot(index="Date", columns="ticker", values="daily_simple_return").sort_index()

is_in_stress = stress_test.pivot(index="Date", columns="ticker", values="is_in_sp500").sort_index()
is_in_stress = is_in_stress.fillna(0).astype(float).clip(0,1).astype(int)

stress_rets

ticker,A,AAL,AAP,AAPL,ABBV,ABT,ACN,ADBE,ADI,ADM,...,XEL,XOM,XRAY,XRX,XYL,YUM,ZBH,ZBRA,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-02-20,-0.011834,0.006354,0.016843,-0.010259,0.000957,-0.010072,-0.009355,-0.011271,0.005327,0.005913,...,-0.001695,-0.007955,-0.019628,0.000272,0.007958,-0.012549,-0.010186,0.009231,0.009605,-0.003540
2020-02-21,0.008656,-0.024202,-0.013125,-0.022635,0.007746,-0.011418,-0.009397,-0.015859,-0.014372,-0.007461,...,0.002123,-0.012195,-0.010271,-0.011963,-0.001579,-0.003466,-0.006313,-0.017839,-0.017730,-0.009196
2020-02-24,-0.053720,-0.085190,-0.004978,-0.047500,-0.019166,-0.032247,-0.035537,-0.041668,-0.043988,-0.024601,...,-0.006354,-0.046846,-0.037467,-0.040726,-0.023610,-0.031494,-0.019442,-0.046101,-0.036760,-0.026999
2020-02-25,-0.031677,-0.091552,-0.016511,-0.033872,-0.042516,-0.041356,-0.035526,-0.026888,-0.023006,-0.055815,...,-0.021742,-0.038325,-0.058845,0.045898,-0.024297,-0.022943,-0.029806,-0.032542,-0.031535,-0.025796
2020-02-26,0.002053,-0.035035,-0.022238,0.015864,-0.008635,0.002835,-0.007255,0.010178,0.001570,-0.013851,...,-0.007118,-0.021956,-0.012038,-0.020022,-0.001665,-0.004594,-0.021172,-0.009955,-0.013214,0.009865
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-22,-0.003481,-0.038509,-0.006792,0.028465,-0.008774,0.001758,-0.007459,0.011597,0.011852,-0.012963,...,-0.002001,-0.016925,-0.012013,0.000000,-0.013105,-0.011323,0.015007,0.010774,-0.009191,0.006155
2020-12-23,-0.000596,0.026486,-0.000690,-0.006976,0.004669,-0.007665,-0.007823,-0.014400,-0.007347,0.012723,...,-0.008021,0.012851,-0.001934,0.017873,0.003320,-0.005680,-0.012266,-0.000579,0.034967,-0.012235
2020-12-24,0.000085,-0.014475,0.008726,0.007713,-0.000193,0.008376,-0.000544,0.005937,0.008728,0.001824,...,0.004976,-0.004070,0.004069,-0.008780,0.005214,0.007023,0.003429,-0.007505,-0.003907,0.005442
2020-12-28,0.004433,0.025543,-0.011700,0.035765,0.001840,-0.005168,0.009250,-0.001820,0.001453,0.005259,...,0.010676,0.003366,0.009456,0.031001,0.001197,0.020830,-0.002412,0.015124,0.000462,0.010391


In [41]:
rets_past = rets.iloc[-252:]
isin_past = is_in.iloc[-252:]


stress_rets_final = pd.concat([rets_past,stress_rets])
is_in_stress_final = pd.concat([isin_past, is_in_stress], axis=0).sort_index()



In [98]:
is_in_stress_final

ticker,A,AAL,AAP,AAPL,ABBV,ABT,ACN,ADBE,ADI,ADM,...,XEL,XOM,XRAY,XRX,XYL,YUM,ZBH,ZBRA,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-02-20,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,0,1,1
2019-02-21,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,0,1,1
2019-02-22,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,0,1,1
2019-02-25,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,0,1,1
2019-02-26,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-22,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
2020-12-23,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
2020-12-24,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
2020-12-28,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1


In [42]:
common_cols = stress_rets_final.columns.intersection(is_in_stress_final.columns)
stress_rets_final = stress_rets_final[common_cols]
is_in_stress_final = is_in_stress_final[common_cols]
is_in_stress_final = is_in_stress_final.reindex_like(stress_rets_final).fillna(0).astype(int)


In [43]:
legs_df, summary = backtest_markowitz_independent_legs_from_returns(
    stress_rets_final, is_in_stress_final
)
summary

{'legs': 10,
 'avg_net_21d_return': 0.02032604116050905,
 'std_net_21d_return': 0.14447030630484925,
 'annual_return_from_avg_leg': 0.27311505056056684,
 'annual_vol_from_leg_std': 0.5004598214100744,
 'sharpe_from_legs': 0.5457282260762701,
 'cost_bps_one_way': 5.0,
 'gamma': 5.0,
 'name_cap': 0.1}

In [44]:
best_param_ridge = {}
best_param_ridge['l2'] = 0.0001

legs_df, summary_ridge = backtest_markowitz_independent_legs_from_returns(stress_rets_final, is_in_stress_final, mode = 'stats', markowitz_version = "ridge", l2 = best_param_ridge['l2'])
summary_ridge

{'legs': 10,
 'avg_net_21d_return': 0.020325129190305803,
 'std_net_21d_return': 0.14447208606311904,
 'annual_return_from_avg_leg': 0.2731013956631656,
 'annual_vol_from_leg_std': 0.5004659866735713,
 'sharpe_from_legs': 0.5456942188586651,
 'cost_bps_one_way': 5.0,
 'gamma': 5.0,
 'name_cap': 0.1}

In [45]:
best_param_lasso = {}
best_param_lasso['l1'] = 0.0001

legs_df, summary_lasso = backtest_markowitz_independent_legs_from_returns(stress_rets_final, is_in_stress_final, mode = 'stats', markowitz_version = "lasso", l1 = best_param_lasso['l1'])
summary_lasso

{'legs': 10,
 'avg_net_21d_return': 0.020337924534204847,
 'std_net_21d_return': 0.14447362079950085,
 'annual_return_from_avg_leg': 0.27329299216475045,
 'annual_vol_from_leg_std': 0.5004713031563504,
 'sharpe_from_legs': 0.5460712541181847,
 'cost_bps_one_way': 5.0,
 'gamma': 5.0,
 'name_cap': 0.1}

In [46]:
best_param_en = {}
best_param_en['l1'] = 2e-05
best_param_en['l2'] = 8e-05

legs_df, summary_en = backtest_markowitz_independent_legs_from_returns(stress_rets_final, is_in_stress_final, mode = 'stats', markowitz_version = "enet",l1 = best_param_en['l1'], l2 = best_param_en['l2'])
summary_en




{'legs': 10,
 'avg_net_21d_return': 0.020278845202830784,
 'std_net_21d_return': 0.14450114191916805,
 'annual_return_from_avg_leg': 0.27240856344493714,
 'annual_vol_from_leg_std': 0.50056663911144,
 'sharpe_from_legs': 0.5442003964317156,
 'cost_bps_one_way': 5.0,
 'gamma': 5.0,
 'name_cap': 0.1}

In [47]:
legs_df, summary_minvar = backtest_markowitz_independent_legs_from_returns(stress_rets_final, is_in_stress_final, mode = 'stats', markowitz_version = "minvar")
summary_minvar

{'legs': 10,
 'avg_net_21d_return': -0.0034102500721191185,
 'std_net_21d_return': 0.11109959677835023,
 'annual_return_from_avg_leg': -0.040164092432720566,
 'annual_vol_from_leg_std': 0.3848602926410363,
 'sharpe_from_legs': -0.1043601878414152,
 'cost_bps_one_way': 5.0,
 'gamma': 5.0,
 'name_cap': 0.1}

In [48]:
legs_df, summary_invvol = backtest_markowitz_independent_legs_from_returns(stress_rets_final, is_in_stress_final, mode = 'stats', markowitz_version = "invvol")
summary_invvol

{'legs': 10,
 'avg_net_21d_return': 0.015403937184052271,
 'std_net_21d_return': 0.1289287411135762,
 'annual_return_from_avg_leg': 0.20134049433170054,
 'annual_vol_from_leg_std': 0.4466222603292167,
 'sharpe_from_legs': 0.45080711871210205,
 'cost_bps_one_way': 5.0,
 'gamma': 5.0,
 'name_cap': 0.1}

In [49]:
best_param_rp = {}
best_param_rp['rp_tau'] = 0.0027825594022071257

legs_df, summary_riskparity = backtest_markowitz_independent_legs_from_returns(stress_rets_final, is_in_stress_final, mode = 'stats', markowitz_version = "risk_parity", rp_tau = best_param_rp['rp_tau'])
summary_riskparity

{'legs': 10,
 'avg_net_21d_return': 0.018255866506952566,
 'std_net_21d_return': 0.135119402468043,
 'annual_return_from_avg_leg': 0.24246181572132475,
 'annual_vol_from_leg_std': 0.468067340325996,
 'sharpe_from_legs': 0.5180062671163016,
 'cost_bps_one_way': 5.0,
 'gamma': 5.0,
 'name_cap': 0.1}