In [39]:
%run 00_config_and_utils.ipynb

ret = pd.read_parquet("../data/processed/ret.parquet")
spy_ret = pd.read_parquet("../data/processed/spy_ret.parquet")["SPY"]
beta = pd.read_parquet("../data/processed/beta.parquet")
signals_p = pd.read_parquet("../data/processed/signals_purified.parquet")

ret.shape, spy_ret.shape, beta.shape, signals_p.shape

((2512, 50), (2512,), (2512, 50), (2512, 150))

In [40]:
import cvxpy as cvx # a modeling language for convex optimization which is used to optimize alpha signal while controlling risk
from sklearn.covariance import LedoitWolf # covariance shrinking estimatorprint("nonzero positions:", (w_test.abs() > 1e-6).sum())
# not really sure about this part...

In [41]:
# converts historical return window into a covariance matrix
def ledoit_wolf_cov(returns_window: pd.DataFrame):
    X = returns_window.fillna(0.0).values
    return LedoitWolf().fit(X).covariance_

In [42]:
# optimizer solver function
def solve_portfolio(alpha: pd.Series,
                    sigma: np.ndarray,
                    w_prev: pd.Series,
                    beta_vec: pd.Series | None = None):
    tickers = alpha.index
    n = len(tickers)

    w = cvx.Variable(n)

    Sigma = cvx.Parameter((n, n), PSD=True, value=sigma)

    obj = cvx.Minimize(
        -alpha.values @ w
        + CONFIG["lam_risk"] * cvx.quad_form(w, Sigma)
        + CONFIG["gamma_turnover"] * cvx.norm1(w - w_prev.values)
    )

    constraints = [
        cvx.sum(w) == 0, #dollar neutral
        cvx.norm1(w) <= CONFIG["gross_leverage"], # below gross exposure
        w <= CONFIG["pos_cap"], # position bounds
        w >= -CONFIG["pos_cap"],
    ]

    if beta_vec is not None:
        constraints.append(beta_vec.values @ w == 0)

    prob = cvx.Problem(obj, constraints)
    prob.solve(solver=cvx.OSQP) #calls OSQP solver which so

    if w.value is None:
        return None

    return pd.Series(w.value, index=tickers)

In [52]:
# generate a seq of fridays and pick a not too early rebalance date
reb_dates = make_rebalance_dates(ret.index, CONFIG["rebalance_rule"])
t = reb_dates[100]
t

Timestamp('2017-12-22 00:00:00')

In [44]:
# turn my 3 signals into one alpha vector
mom_t  = signals_p.loc[t, ("mom",  slice(None))].droplevel(0)
rev_t  = signals_p.loc[t, ("rev",  slice(None))].droplevel(0)
lowv_t = signals_p.loc[t, ("lowvol", slice(None))].droplevel(0)

alpha0 = (mom_t + rev_t + lowv_t) / 3.0
alpha0 = alpha0.dropna()
alpha0.head(), alpha0.shape

(ticker
 AAPL    0.800702
 ABBV    0.658953
 ACN     0.326172
 ADBE    0.298149
 AMD    -1.249143
 Name: 2017-12-22 00:00:00, dtype: float64,
 (50,))

In [53]:
# 1) tickers that exist everywhere
common = alpha0.index.intersection(beta.columns).intersection(ret.columns)

# 2) keep only tickers with beta available at time t
print("t:", t)
print("len(alpha0):", len(alpha0))
print("len(common):", len(common))

# how many betas exist on this date for those tickers?
beta_row = beta.loc[t, common]
print("non-NaN betas:", beta_row.notna().sum())
beta_vec = beta.loc[t, common].dropna()

# 3) filtered alpha for time t (DO NOT overwrite alpha0)
alpha_t = alpha0.loc[beta_vec.index]

# 4) if too few, skip this date
if len(alpha_t) < 2:
    raise ValueError(f"Too few assets after beta filter on {t}: {len(alpha_t)}")

# 5) build covariance window using same tickers
ret_win = ret.loc[:t, alpha_t.index].tail(CONFIG["lookback_cov"])
sigma = ledoit_wolf_cov(ret_win)

alpha_t.shape, ret_win.shape, sigma.shape

t: 2017-12-22 00:00:00
len(alpha0): 50
len(common): 50
non-NaN betas: 50


((50,), (252, 50), (50, 50))

In [54]:
w_prev = pd.Series(0.0, index=alpha_t.index)

w_test = solve_portfolio(alpha_t, sigma, w_prev, beta_vec=beta_vec)

w_test.abs().sum(), w_test.sum(), w_test.min(), w_test.max()



(0.09426607113451639,
 4.875287887303254e-10,
 -0.020000340860938288,
 0.019999753002385854)

In [56]:
# check beta neutral
float(beta_vec.loc[w_test.index] @ w_test)

-4.401692247958606e-10

In [50]:
# save test weights
w_test.to_frame("w").to_parquet("data/processed/w_test_single_date.parquet")
t

Timestamp('2017-12-22 00:00:00')

In [51]:
print("nonzero positions:", (w_test.abs() > 1e-6).sum())

nonzero positions: 6
