The empirical exercise should focus on how the sector and factor tilting works when a crisis comes, better diversification provided, consistent risk factor contributions, and greater resilience to economic shocks

I should present two applications:
1. single name 
2. sector rotation strategy

In [1]:
from datetime import datetime
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import matplotlib.ticker as mtick
import cvxpy as cp
from tqdm.notebook import tqdm
from regimeaware.routines import cfg
from itertools import product
from scipy.stats import entropy
from regimeaware.core import utils

rebalance_dts = pd.date_range(start=cfg.bt_start_dt, end=cfg.bt_end_dt, freq=cfg.rebalance_freq)

# CRSP data set loading
crsp = pd.read_pickle(f'{cfg.data_fldr}/crsp_daily.pkl')
crsp['mktcap'] = crsp['shrout'].mul(crsp['prc']).abs().replace(0, np.nan)
crsp['dollar_vol'] = crsp['prc'].mul(crsp['vol'])
crsp['industry'] = crsp['siccd'].apply(utils.assign_industry)

# Load cached factor estimates
factor_covars = pd.read_pickle(f'{cfg.data_fldr}/moments/factor_covars.pkl')
factor_means = pd.read_pickle(f'{cfg.data_fldr}/moments/factor_means.pkl')
factor_loadings = pd.read_pickle(f'{cfg.data_fldr}/exposures/forecasted_betas.pkl')
factor_variance = pd.read_pickle(f'{cfg.data_fldr}/exposures/var.pkl')

# Monthly performance time series
rf = pd.read_pickle(f'{cfg.data_fldr}/ff_daily.pkl')['rf']
rf = rf.add(1).groupby(pd.Grouper(freq=cfg.rebalance_freq)).prod().sub(1)
rt = pd.pivot_table(crsp, index='date', columns='permno', values='ret')
rt = rt.add(1).groupby(pd.Grouper(freq=cfg.rebalance_freq)).prod().sub(1)
rt = rt.replace(0, np.nan)

$$
\begin{equation}
\begin{aligned}
& \underset{w}{\text{argmin}} & & \gamma \left( w^{T} F^{T} \Sigma_{f} F w + w^{T} E w \right) - w^{T} \mu_{f} \\
& \text{s.t.} & & (w - b)^{T} \Sigma (w - b) \leq \bar{\sigma}^{2} \\
& & & \sum_{i=1}^{N} w_i = 1 \\
& & &  w_i \geq 0 \; ; \; \forall \; i =1, \ldots, N \\
\end{aligned}
\end{equation}
$$

In [3]:
indu_t = crsp['industry'].xs(as_of_dt).reindex(tradable_ids)
I = pd.get_dummies(indu_t).astype(int)
indu_labels = I.columns
I = I.values
b = crsp['mktcap'].xs(as_of_dt).reindex(tradable_ids)
b = np.divide(b, b.sum())
b = b.values.reshape(-1, 1)

## Risk aversion

In [14]:
b.shape

(3359, 1)

In [15]:
I.shape

(3359, 10)

In [13]:
b @ I

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 3359 is different from 1)

In [6]:
F.shape

(6, 3359)

In [7]:
I.shape

(3359, 10)

In [18]:
(F @ b).shape

(6, 1)

In [26]:
pd.Series((b.T @ I).flatten(), index=indu_labels)

Agriculture       0.000536
Construction      0.003934
Finance           0.159508
Manufacturing     0.482171
Mining            0.057725
Retail            0.078748
Services          0.089944
Transportation    0.070627
Utilities         0.046219
Wholesale         0.010587
dtype: float64

In [32]:
(F @ I).shape

(6, 10)

In [16]:
(F @ b) @ I

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 3359 is different from 1)

average betas per sector, this should reduce F to a 10-by-n, but this only works if 

In [4]:
collect_wt = {}
collect_bm = {}

crsp_dts = crsp.index.get_level_values('date').unique()
for g, dt in product(cfg.gamma_iter, rebalance_dts):
    print(f'Gamma: {g}, Date: {dt.strftime("%Y %b")}', end='       \r')
    as_of_dt = crsp_dts.asof(dt)
    loadings_t = utils.unpack_betas(factor_loadings.xs(dt))
    tradable_ids = loadings_t.join(crsp[['ret', 'mktcap']].xs(as_of_dt)).dropna().index
    mu_f = factor_means.xs(dt)[cfg.factor_set].values.reshape(-1, 1)
    mu_f_const = np.concatenate([np.array([[1]]), mu_f], axis=0)  # Adding back the constant
    Sigma_f = factor_covars.xs(dt).loc[cfg.factor_set, cfg.factor_set].values
    F = loadings_t.reindex(tradable_ids).values.T
    E = np.diag(factor_variance.xs(dt).reindex(tradable_ids))

    indu_t = crsp['industry'].xs(as_of_dt).reindex(tradable_ids)
    I = pd.get_dummies(indu_t).astype(int)
    indu_labels = I.columns
    I = I.values

    b = crsp['mktcap'].xs(as_of_dt).reindex(tradable_ids)
    b = np.divide(b, b.sum())
    collect_bm[dt] = b.copy()
    b = b.values.reshape(-1, 1)

    # Optimization problem
    tev_budget = cp.Parameter(nonneg=True)
    gamma = cp.Parameter(nonneg=True)

    m, n = F.shape
    w = cp.Variable((n, 1))
    f = cp.Variable((m, 1))

    Sigma_f_const = np.zeros((m, m))
    Sigma_f_const[1:, 1:] = Sigma_f

    port_risk = cp.quad_form(f, Sigma_f_const) + cp.sum_squares(np.sqrt(E) @ (w - b))
    port_return = mu_f_const.T @ f

    constraints = [
        cp.sum(w) == 1,
        f == F @ (w - b),
        w >= 0
    ]

    gamma.value = g

    prob = cp.Problem(cp.Maximize(port_return - gamma * port_risk), constraints)
    prob.solve(verbose=False, solver=cp.CLARABEL)
    collect_wt[(g, dt)] = pd.Series(w.value.flatten(), index=tradable_ids)
    bm_indu_wts = pd.Series((b.T @ I).flatten(), indu_labels)
    bt_indu_wts = pd.Series((w.value.T @ I).flatten(), indu_labels)

wts = pd.DataFrame.from_dict(collect_wt, orient='index').fillna(0)
wts.index.names = ['gamma', 'date']

collect_bt = {}
for g in cfg.gamma_iter:
    wt = wts.xs(g)
    collect_bt[g] = wt.shift(1).mul(rt).dropna(how='all').sum(axis=1)

# Backtests
bt = pd.DataFrame.from_dict(collect_bt)
bt.columns = [f'Gamma: {x}' for x in bt.columns]

# Benchmark
bm_wt = pd.DataFrame.from_dict(collect_bm, orient='index')
bm_rt = bm_wt.shift(1).mul(rt.reindex(bm_wt.columns, axis=1)).dropna(how='all').sum(axis=1)

# Stats/plots
df = bm_rt.to_frame(name='Benchmark').join(bt).add(1).cumprod()
df = df.div(df.iloc[0])
df.apply(np.log).plot()

tracking = df.pct_change().sub(df['Benchmark'].pct_change(), axis=0).drop('Benchmark', axis=1)
ir = tracking.mean().div(tracking.std()).mul(np.sqrt(12))
sr = df.pct_change().mean().div(df.pct_change().std()).mul(np.sqrt(12))
display(ir.sort_values())
display(sr.sort_values())
display(tracking.std().mul(np.sqrt(12)).sort_values())

Gamma: 10, Date: 2008 Nov       

KeyboardInterrupt: 

In [None]:
zero_flags = np.isclose(bm_wt, 0, atol=1e-8)
total = bm_wt.mask(zero_flags).count(axis=1)
wts.mask(zero_flags).count(axis=1).groupby('gamma').mean()

Anchor the risk aversion parameter to TEV, enough to generate around 6%

## TEV cap

In [None]:
collect_wt = {}
collect_bm = {}
tev_to_test = np.arange(start=0.01, stop=.08, step=.01)
crsp_dts = crsp.index.get_level_values('date').unique()
for dt, tev in product(rebalance_dts, tev_to_test):
    print(f'TEV: {tev}, Date: {dt.strftime("%Y %b")}', end='       \r')
    as_of_dt = crsp_dts.asof(dt)
    loadings_t = utils.unpack_betas(factor_loadings.xs(dt))
    tradable_ids = loadings_t.join(crsp[['ret', 'mktcap']].xs(as_of_dt)).dropna().index
    mu_f = factor_means.xs(dt)[cfg.factor_set].values.reshape(-1, 1)
    mu_f_const = np.concatenate([np.array([[1]]), mu_f], axis=0)  # Adding back the constant
    Sigma_f = factor_covars.xs(dt).loc[cfg.factor_set, cfg.factor_set].values
    F = loadings_t.reindex(tradable_ids).values.T
    E = np.diag(factor_variance.xs(dt).reindex(tradable_ids))

    b = crsp['mktcap'].xs(as_of_dt).reindex(tradable_ids)
    b = np.divide(b, b.sum())
    collect_bm[dt] = b.copy()
    b = b.values.reshape(-1, 1)

    # Optimization problem
    tev_budget = cp.Parameter(nonneg=True)
    gamma = cp.Parameter(nonneg=True)

    m, n = F.shape
    w = cp.Variable((n, 1))
    f = cp.Variable((m, 1))

    Sigma_f_const = np.zeros((m, m))
    Sigma_f_const[1:, 1:] = Sigma_f

    port_risk = cp.quad_form(f, Sigma_f_const) + cp.sum_squares(np.sqrt(E) @ (w - b))
    port_return = mu_f_const.T @ f

    constraints = [
        cp.sum(w) == 1,
        f == F @ (w - b),
        w >= 0,
        port_risk <= (tev ** 2) / 12
    ]

    gamma.value = g

    prob = cp.Problem(cp.Maximize(port_return), constraints)
    prob.solve(verbose=False, solver=cp.CLARABEL)
    collect_wt[(tev, dt)] = pd.Series(w.value.flatten(), index=tradable_ids)

wts = pd.DataFrame.from_dict(collect_wt, orient='index').fillna(0)
wts.index.names = ['tev', 'date']

collect_bt = {}
for tev in tev_to_test:
    wt = wts.xs(tev)
    collect_bt[tev] = wt.shift(1).mul(rt).dropna(how='all').sum(axis=1)


# Backtests
bt = pd.DataFrame.from_dict(collect_bt)
bt.columns = [f'TEV: {x}' for x in bt.columns]

# Benchmark
bm_wt = pd.DataFrame.from_dict(collect_bm, orient='index')
bm_rt = bm_wt.shift(1).mul(rt.reindex(bm_wt.columns, axis=1)).dropna(how='all').sum(axis=1)

df = bm_rt.to_frame(name='Benchmark').join(bt).add(1).cumprod()
df = df.div(df.iloc[0])
df.apply(np.log).plot()

tracking = df.pct_change().sub(df['Benchmark'].pct_change(), axis=0).drop('Benchmark', axis=1)
ir = tracking.mean().div(tracking.std()).mul(np.sqrt(12))
sr = df.pct_change().mean().div(df.pct_change().std()).mul(np.sqrt(12))
display(ir.sort_values())
display(sr.sort_values())
display(tracking.std().mul(np.sqrt(12)).sort_values())

## Sector Rotation

In [12]:
crsp_dts = crsp.index.get_level_values('date').unique()
dt = rebalance_dts[0]

as_of_dt = crsp_dts.asof(dt)
loadings_t = utils.unpack_betas(factor_loadings.xs(dt))
tradable_ids = loadings_t.join(crsp[['ret', 'mktcap']].xs(as_of_dt)).dropna().index
indu_t = crsp['industry'].xs(as_of_dt).reindex(tradable_ids)
mu_f = factor_means.xs(dt)[cfg.factor_set].values.reshape(-1, 1)
mu_f_const = np.concatenate([np.array([[1]]), mu_f], axis=0)  # Adding back the constant
Sigma_f = factor_covars.xs(dt).loc[cfg.factor_set, cfg.factor_set].values
F = loadings_t.reindex(tradable_ids).values.T
E = np.diag(factor_variance.xs(dt).reindex(tradable_ids))
I = pd.get_dummies(indu_t).astype(int)
indu_labels = I.columns
I = I.values
b = crsp['mktcap'].xs(as_of_dt).reindex(tradable_ids)
b = np.divide(b, b.sum())
b = b.values.reshape(-1, 1)
n = F.shape[1]

In [16]:
np.ones((n, 1)).T @ I  # Number of 

array([[  10.,   46.,  745., 1611.,  140.,  263.,  640.,  170.,  121.,
         176.]])

In [24]:
c = crsp['mktcap'].xs(as_of_dt).reindex(tradable_ids).values.reshape(-1, 1)

In [25]:
c.T @ I

array([[4.43325270e+06, 2.79393807e+07, 2.01078484e+09, 4.23774386e+09,
        2.28087657e+08, 7.26791120e+08, 9.37928980e+08, 6.26215711e+08,
        2.86926829e+08, 1.13410932e+08]])

In [26]:
I.shape

(3922, 10)

In [36]:
np.diag(c.flatten()) @ I  # mcap casted onto the mapping matrix

array([[      0.   ,       0.   ,       0.   , ...,       0.   ,
          19046.441,       0.   ],
       [      0.   ,       0.   ,   96891.9  , ...,       0.   ,
              0.   ,       0.   ],
       [      0.   ,       0.   ,       0.   , ...,       0.   ,
              0.   ,       0.   ],
       ...,
       [      0.   ,       0.   ,   39609.3  , ...,       0.   ,
              0.   ,       0.   ],
       [      0.   ,       0.   ,       0.   , ...,       0.   ,
              0.   ,       0.   ],
       [      0.   ,       0.   , 1121541.45 , ...,       0.   ,
              0.   ,       0.   ]])

In [37]:
np.ones((n, 1)).T  @ np.diag(c.flatten()) @ I  # Total mcap by industry

array([[4.43325270e+06, 2.79393807e+07, 2.01078484e+09, 4.23774386e+09,
        2.28087657e+08, 7.26791120e+08, 9.37928980e+08, 6.26215711e+08,
        2.86926829e+08, 1.13410932e+08]])

In [38]:
indu_size = np.ones((n, 1)).T  @ np.diag(c.flatten()) @ I
flat_size = np.diag(c.flatten()) @ I

In [40]:
flat_size.shape

(3922, 10)

In [46]:
flat_size @ np.linalg.inv(np.diag(indu_size.flatten()))

array([[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 6.63808298e-05, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 4.81861102e-05, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       ...,
       [0.00000000e+00, 0.00000000e+00, 1.96984278e-05, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 5.57763032e-04, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]])

In [48]:
pd.DataFrame(flat_size @ np.linalg.inv(np.diag(indu_size.flatten()))).sum()

0    1.0
1    1.0
2    1.0
3    1.0
4    1.0
5    1.0
6    1.0
7    1.0
8    1.0
9    1.0
dtype: float64

In [21]:
indu_t.groupby(indu_t).count()

industry
Agriculture         10
Construction        46
Finance            745
Manufacturing     1611
Mining             140
Retail             263
Services           640
Transportation     170
Utilities          121
Wholesale          176
Name: industry, dtype: int64

In [7]:
F.shape

(6, 3922)

In [None]:
collect_wt = {}
collect_bm = {}
tev_to_test = np.arange(start=0.01, stop=.08, step=.01)
crsp_dts = crsp.index.get_level_values('date').unique()
for dt, tev in product(rebalance_dts, tev_to_test):
    print(f'TEV: {tev}, Date: {dt.strftime("%Y %b")}', end='       \r')
    as_of_dt = crsp_dts.asof(dt)
    loadings_t = utils.unpack_betas(factor_loadings.xs(dt))
    tradable_ids = loadings_t.join(crsp[['ret', 'mktcap']].xs(as_of_dt)).dropna().index
    indu_t = crsp['industry'].xs(as_of_dt).reindex(tradable_ids)
    mu_f = factor_means.xs(dt)[cfg.factor_set].values.reshape(-1, 1)
    mu_f_const = np.concatenate([np.array([[1]]), mu_f], axis=0)  # Adding back the constant
    Sigma_f = factor_covars.xs(dt).loc[cfg.factor_set, cfg.factor_set].values
    F = loadings_t.reindex(tradable_ids).values.T
    E = np.diag(factor_variance.xs(dt).reindex(tradable_ids))
    I = pd.get_dummies(indu_t).astype(int)
    indu_labels = I.columns
    I = I.values
    b = crsp['mktcap'].xs(as_of_dt).reindex(tradable_ids)
    b = np.divide(b, b.sum())
    b = b.values.reshape(-1, 1)

    # Optimization problem
    tev_budget = cp.Parameter(nonneg=True)
    gamma = cp.Parameter(nonneg=True)

    m, n = F.shape
    w = cp.Variable((n, 1))
    f = cp.Variable((m, 1))

    Sigma_f_const = np.zeros((m, m))
    Sigma_f_const[1:, 1:] = Sigma_f

    port_risk = cp.quad_form(f, Sigma_f_const) + cp.sum_squares(np.sqrt(E) @ (w - b))
    port_return = mu_f_const.T @ f

    constraints = [
        cp.sum(w) == 1,
        f == F @ (w - b),
        w >= 0,
        port_risk <= (tev ** 2) / 12
    ]

    gamma.value = g

    prob = cp.Problem(cp.Maximize(port_return), constraints)
    prob.solve(verbose=False, solver=cp.CLARABEL)
    collect_wt[(tev, dt)] = pd.Series(w.value.flatten(), index=tradable_ids)

wts = pd.DataFrame.from_dict(collect_wt, orient='index').fillna(0)
wts.index.names = ['tev', 'date']

collect_bt = {}
for tev in tev_to_test:
    wt = wts.xs(tev)
    collect_bt[tev] = wt.shift(1).mul(rt).dropna(how='all').sum(axis=1)


# Backtests
bt = pd.DataFrame.from_dict(collect_bt)
bt.columns = [f'TEV: {x}' for x in bt.columns]

# Benchmark
bm_wt = pd.DataFrame.from_dict(collect_bm, orient='index')
bm_rt = bm_wt.shift(1).mul(rt.reindex(bm_wt.columns, axis=1)).dropna(how='all').sum(axis=1)

df = bm_rt.to_frame(name='Benchmark').join(bt).add(1).cumprod()
df = df.div(df.iloc[0])
df.apply(np.log).plot()

tracking = df.pct_change().sub(df['Benchmark'].pct_change(), axis=0).drop('Benchmark', axis=1)
ir = tracking.mean().div(tracking.std()).mul(np.sqrt(12))
sr = df.pct_change().mean().div(df.pct_change().std()).mul(np.sqrt(12))
display(ir.sort_values())
display(sr.sort_values())
display(tracking.std().mul(np.sqrt(12)).sort_values())

In [None]:
zero_flags = np.isclose(bm_wt, 0, atol=1e-8)
total = bm_wt.mask(zero_flags).count(axis=1)
zero_flags = np.isclose(wts, 0, atol=1e-8)
wts.mask(zero_flags).count(axis=1).groupby('gamma').mean()

In [None]:
1 / (.06 / np.sqrt(12))