In [1]:
import numpy as np
import pandas as pd
import strat_models
import matplotlib.pyplot as plt

from risk_return_models import *
from utils import *

np.random.seed(0)

In [2]:
Z_train = pd.read_csv("data/Z_train.csv", index_col="DATE")
Z_val = pd.read_csv("data/Z_val.csv", index_col="DATE")

Z_train.index = pd.to_datetime(Z_train.index)
Z_val.index = pd.to_datetime(Z_val.index)

df_train = pd.read_csv("data/df_train.csv", index_col="DATE")
df_val = pd.read_csv("data/df_val.csv", index_col="DATE")

df_train.index = pd.to_datetime(df_train.index)
df_val.index = pd.to_datetime(df_val.index)

num_quantiles = 10
num_assets = len(df_train.columns)

start_train = df_train.index[0].strftime("%Y-%m-%d")

In [3]:
kwargs = dict(verbose=True, abs_tol=1e-6, maxiter=150, rho=1, n_jobs=8)

train_return = get_data_dict(df_Y=df_train, df_Z=Z_train, num_assets=num_assets)
val_return = get_data_dict(df_Y=df_val, df_Z=Z_val, num_assets=num_assets)

M, local, w1, w2, w3 = 0.01, 0.01, 20, 50, 5000

print(M, local, w1, w2, w3)

G = make_G(w1=w1, w2=w2, w3=w3)

loss = huber_return_loss(M=M)
reg = strat_models.sum_squares_reg(lambd=local)

bm = strat_models.BaseModel(loss=loss,reg=reg)
sm = strat_models.StratifiedModel(BaseModel=bm, graph=G)

sm.fit(data=train_return, **kwargs)

preds_train = np.vstack([
    sm.G._node[tuple(Z_train.loc[date].values)]["theta"] for date in Z_train.index])

preds_val = np.vstack([
    sm.G._node[tuple(Z_val.loc[date].values)]["theta"] for date in Z_val.index])

corr_train = corr(preds=preds_train, df=df_train)
corr_val = corr(preds=preds_val, df=df_val)

print("STRATIFIED RETURN MODEL:")
print("\t train = {}".format(corr_train))
print("\t val = {}".format(corr_val))

returns = dict()
for node in sm.G.nodes():
    returns[node] = sm.G._node[node]["theta"].copy()

0.01 0.01 20 50 5000
 it |     s_norm     r_norm    eps_pri   eps_dual    rho  time1  time2  time3
  1 | 5.7873e-02 2.6575e-04 2.4761e-04 2.4761e-04 1.000 69.658 0.188 15.201
  2 | 9.5381e-02 2.1738e-03 2.8512e-04 4.3777e-04 2.000 49.222 0.122 16.440
  3 | 1.2042e-01 1.1733e-02 3.1015e-04 9.1812e-04 4.000 45.638 0.051 15.660
  4 | 1.3001e-01 4.8310e-02 3.1975e-04 1.9553e-03 8.000 44.758 0.031 15.272
  5 | 1.2746e-01 7.8612e-02 3.1720e-04 2.9714e-03 8.000 40.166 0.036 18.193
  6 | 1.1646e-01 9.6460e-02 3.0620e-04 3.8983e-03 8.000 42.564 0.036 14.826
  7 | 1.0027e-01 9.2094e-02 2.9001e-04 4.6945e-03 8.000 41.442 0.029 14.607
  8 | 8.1062e-02 5.5631e-02 2.7080e-04 5.3363e-03 8.000 43.919 0.207 16.319
  9 | 6.0655e-02 4.9046e-02 2.5039e-04 5.8151e-03 8.000 39.350 0.027 14.860
 10 | 4.1002e-02 2.8936e-02 2.3074e-04 6.1327e-03 8.000 37.554 0.027 14.816
 11 | 2.5303e-02 3.4028e-02 2.2376e-04 6.3038e-03 8.000 41.685 0.028 14.875
 12 | 1.9289e-02 2.1616e-02 2.1135e-04 6.3496e-03 8.000 40.237 0.

In [4]:
common_return = df_train.mean(axis=0)
    
preds_train = np.vstack([common_return for date in Z_train.index])
preds_test = np.vstack([common_return for date in Z_val.index])

common_anll_train = corr(preds=preds_train, df=df_train)
common_anll_test = corr(preds=preds_test, df=df_val)

print("COMMON RETURN MODEL:")
print("\t train = {}".format(common_anll_train))
print("\t test = {}".format(common_anll_test))

COMMON RETURN MODEL:
	 train = 0.018213169510868762
	 test = 0.0011977400112646037


In [5]:
rets = pd.DataFrame(data=np.vstack([returns[key] for key in returns.keys()]), columns=df_train.columns)
tab = rets.describe().loc[["50%", "min", "max"]].rename(index={"50%":"median"})
tab = tab.T
tab["common"] = common_return
tab = tab[["common", "median", "min", "max"]].drop("VTI")

(tab*100).round(3)

Unnamed: 0,common,median,min,max
AGG,-0.015,-0.064,-0.109,0.045
DBC,-0.049,-0.05,-0.131,0.076
GLD,-0.007,-0.017,-0.111,0.13
IBB,0.04,0.045,-0.053,0.132
ITA,0.022,0.029,-0.062,0.059
PBJ,0.009,0.007,-0.038,0.096
TLT,0.011,-0.053,-0.162,0.092
VNQ,0.015,0.008,-0.229,0.064
XLB,0.003,0.014,-0.033,0.066
XLE,-0.001,0.02,-0.081,0.113


In [6]:
train_cov = get_data_dict(df_Y=df_train, df_Z=Z_train, num_assets=num_assets)
val_cov = get_data_dict(df_Y=df_val, df_Z=Z_val, num_assets=num_assets)

for i in range(len(train_cov["Y"])):
    if not np.allclose(train_cov["Y"][i], 0):
        train_cov["Y"][i] = (train_cov["Y"][i])*100
        
for i in range(len(val_cov["Y"])):
    if not np.allclose(val_cov["Y"][i], 0):
        val_cov["Y"][i] = (val_cov["Y"][i])*100
        
kwargs = dict(verbose=True, abs_tol=1e-3, maxiter=2500, rho=25, n_jobs=16)

w1, w2, w3 = 0.2, 20, 50

print("(w1,w2,w3)=({:.3f},{:.3f},{:.3f})".format(w1,w2,w3))

G = make_G(w1=w1, w2=w2, w3=w3)

loss = covariance_max_likelihood_loss()
reg = strat_models.trace_reg(lambd=0)

bm = strat_models.BaseModel(loss=loss,reg=reg)
sm = strat_models.StratifiedModel(BaseModel=bm, graph=G)

sm.fit(data=train_cov, **kwargs)
        
print("STRATIFIED RISK MODEL:")
print("\t(w1,w2,w3)=({:.3f},{:.3f},{:.3f})".format(w1, w2, w3))
print("\t{:.6f} {:.6f}".format(sm.anll(train_cov), sm.anll(val_cov)))

covs = dict() 
for node in sm.G.nodes():
    covs[node] = np.linalg.inv(sm.G._node[node]["theta"].copy())



(w1,w2,w3)=(0.200,20.000,50.000)
 it |     s_norm     r_norm    eps_pri   eps_dual    rho  time1  time2  time3
  1 | 2.5745e+01 3.9384e+02 1.1988e+00 1.4486e+00 25.000 137.095 6.292 235.171
  2 | 1.4655e+01 3.2132e+02 1.1263e+00 1.5087e+00 10.000 320.296 6.989 240.870
  3 | 1.9118e+01 2.5940e+02 1.0644e+00 1.4594e+00 5.000 327.433 7.071 247.187
  4 | 4.1429e+01 2.0767e+02 1.0127e+00 1.3794e+00 2.500 339.846 6.244 235.090
  5 | 4.7942e+01 1.5508e+02 9.6007e-01 1.2863e+00 2.500 302.856 7.398 229.776
  6 | 4.3891e+01 1.1246e+02 9.1744e-01 1.2090e+00 2.500 305.141 5.918 229.512
  7 | 3.5803e+01 8.2738e+01 8.8772e-01 1.1600e+00 2.500 279.207 5.829 230.120
  8 | 2.7699e+01 6.3972e+01 8.6896e-01 1.1382e+00 2.500 287.037 5.888 229.629
  9 | 2.1021e+01 5.2784e+01 8.5777e-01 1.1349e+00 2.500 306.232 5.717 227.596
 10 | 1.5963e+01 4.6041e+01 8.5103e-01 1.1410e+00 2.500 264.391 5.736 229.936
 11 | 1.2321e+01 4.1553e+01 8.4654e-01 1.1507e+00 2.500 294.248 6.305 229.840
 12 | 9.7830e+00 3.8124e+01 8

105 | 7.4277e-01 3.4254e+00 8.0841e-01 1.2709e+00 0.625 295.528 5.869 231.179
106 | 7.1608e-01 3.3801e+00 8.0836e-01 1.2710e+00 0.625 276.312 6.224 229.947
107 | 7.3367e-01 3.3479e+00 8.0833e-01 1.2711e+00 0.625 274.016 5.836 232.553
108 | 7.1027e-01 3.3050e+00 8.0829e-01 1.2713e+00 0.625 349.736 6.110 240.334
109 | 7.2840e-01 3.2744e+00 8.0826e-01 1.2714e+00 0.625 286.678 5.824 232.126
110 | 7.0795e-01 3.2334e+00 8.0822e-01 1.2716e+00 0.625 289.628 6.289 230.954
111 | 7.2689e-01 3.2043e+00 8.0819e-01 1.2717e+00 0.625 295.207 6.052 228.978
112 | 7.0953e-01 3.1649e+00 8.0815e-01 1.2718e+00 0.625 266.237 5.838 231.805
113 | 7.2939e-01 3.1374e+00 8.0812e-01 1.2720e+00 0.625 281.435 5.666 230.624
114 | 7.1462e-01 3.0995e+00 8.0808e-01 1.2721e+00 0.625 284.578 6.025 231.048
115 | 7.3476e-01 3.0736e+00 8.0806e-01 1.2722e+00 0.625 275.274 5.879 231.565
116 | 7.2157e-01 3.0373e+00 8.0802e-01 1.2724e+00 0.625 271.194 5.849 229.265
117 | 7.4095e-01 3.0130e+00 8.0800e-01 1.2725e+00 0.625 285.096 

211 | 5.4978e-01 1.8074e+00 8.0679e-01 1.2794e+00 0.625 284.003 5.927 230.886
212 | 5.4737e-01 1.8008e+00 8.0679e-01 1.2795e+00 0.625 296.969 5.773 229.740
213 | 5.4869e-01 1.7950e+00 8.0678e-01 1.2795e+00 0.625 316.577 5.727 229.640
214 | 5.4668e-01 1.7888e+00 8.0677e-01 1.2796e+00 0.625 298.207 5.842 230.775
215 | 5.4804e-01 1.7829e+00 8.0677e-01 1.2796e+00 0.625 306.604 5.956 229.721
216 | 5.4576e-01 1.7772e+00 8.0676e-01 1.2796e+00 0.625 368.542 6.128 240.109
217 | 5.4654e-01 1.7713e+00 8.0676e-01 1.2797e+00 0.625 336.815 5.918 230.935
218 | 5.4342e-01 1.7661e+00 8.0675e-01 1.2797e+00 0.625 269.539 6.040 230.074
219 | 5.4316e-01 1.7604e+00 8.0674e-01 1.2798e+00 0.625 283.503 5.899 231.144
220 | 5.3890e-01 1.7558e+00 8.0674e-01 1.2798e+00 0.625 278.581 5.888 230.335
221 | 5.3749e-01 1.7502e+00 8.0673e-01 1.2798e+00 0.625 275.790 5.864 230.637
222 | 5.3214e-01 1.7461e+00 8.0673e-01 1.2799e+00 0.625 276.557 5.892 231.211
223 | 5.2982e-01 1.7407e+00 8.0673e-01 1.2799e+00 0.625 292.474 

317 | 3.8317e-01 1.4067e+00 8.0639e-01 1.2822e+00 0.625 281.793 5.877 226.635
318 | 3.8576e-01 1.4032e+00 8.0639e-01 1.2822e+00 0.625 282.373 5.853 226.048
319 | 3.8993e-01 1.4002e+00 8.0638e-01 1.2822e+00 0.625 263.522 5.722 225.982
320 | 3.9232e-01 1.3964e+00 8.0638e-01 1.2823e+00 0.625 281.932 5.878 226.728
321 | 3.9536e-01 1.3941e+00 8.0638e-01 1.2823e+00 0.625 261.576 5.890 228.121
322 | 3.9602e-01 1.3904e+00 8.0637e-01 1.2823e+00 0.625 273.016 5.798 226.567
323 | 3.9663e-01 1.3889e+00 8.0637e-01 1.2823e+00 0.625 287.264 5.813 225.325
324 | 3.9466e-01 1.3857e+00 8.0637e-01 1.2823e+00 0.625 274.645 5.685 228.532
325 | 3.9235e-01 1.3849e+00 8.0637e-01 1.2823e+00 0.625 326.746 5.942 239.936
326 | 3.8776e-01 1.3820e+00 8.0637e-01 1.2823e+00 0.625 372.050 5.942 226.412
327 | 3.8306e-01 1.3818e+00 8.0637e-01 1.2824e+00 0.625 315.815 5.930 226.936
328 | 3.7690e-01 1.3790e+00 8.0636e-01 1.2824e+00 0.625 316.632 5.950 227.036
329 | 3.7134e-01 1.3788e+00 8.0636e-01 1.2824e+00 0.625 357.401 

In [7]:
train_cov = get_data_dict(df_Y=df_train, df_Z=Z_train, num_assets=num_assets)
val_cov = get_data_dict(df_Y=df_val, df_Z=Z_val, num_assets=num_assets)

for i in range(len(train_cov["Y"])):
    if not np.allclose(train_cov["Y"][i], 0):
        train_cov["Y"][i] = (train_cov["Y"][i])*100
        train_cov["Y"][i] = np.delete(train_cov["Y"][i], 8, axis=0)

for i in range(len(val_cov["Y"])):
    if not np.allclose(val_cov["Y"][i], 0):
        val_cov["Y"][i] = (val_cov["Y"][i])*100
        val_cov["Y"][i] = np.delete(val_cov["Y"][i], 8, axis=0)
        
theta_common = (df_train*100).cov().values

G = make_G(w1=1e10, w2=1e10, w3=1e10)

loss = covariance_max_likelihood_loss()
reg = strat_models.trace_reg(lambd=0)

new_theta_common = np.delete(np.delete(theta_common, 8, axis=1), 8, axis=0)#rm vti from calculation

bm_common = strat_models.BaseModel(loss=loss,reg=reg)
sm_common = strat_models.StratifiedModel(BaseModel=bm_common, graph=G)

for node in G.nodes():
    sm_common.G._node[node]["theta"] = np.linalg.inv(new_theta_common)
    sm_common.G._node[node]["theta_tilde"] = np.linalg.inv(new_theta_common)
    sm_common.G._node[node]["theta_hat"] = np.linalg.inv(new_theta_common)
    
print("COMMON RISK MODEL:")
print("train:", sm_common.anll(train_cov))
print("validation:", sm_common.anll(val_cov))

COMMON RISK MODEL:
train: 3.4724301956040864
validation: 4.985193422130269


In [8]:
common_vols = np.sqrt((100*df_train).cov().values.diagonal()/(100*100))

vols = pd.DataFrame(data=np.vstack([np.sqrt(covs[key].diagonal()/(100*100)) for key in covs.keys()]), columns=df_train.columns)
tab = vols.describe().loc[["50%", "min", "max"]].rename(index={"50%":"Median"}).T
tab["Common"] = common_vols
tab = tab[["Common", "Median", "min", "max"]].drop("VTI")

(tab*100).round(3)

Unnamed: 0,Common,Median,min,max
AGG,1.314,0.906,0.586,4.135
DBC,1.285,1.07,0.778,3.87
GLD,1.671,1.269,0.982,5.201
IBB,0.905,0.823,0.694,2.12
ITA,0.618,0.557,0.492,1.428
PBJ,0.65,0.513,0.437,1.915
TLT,1.816,1.334,0.809,5.828
VNQ,1.328,0.786,0.666,4.409
XLB,0.771,0.641,0.507,1.703
XLE,1.019,0.857,0.686,2.401


In [9]:
AGG_idx = np.where(df_train.columns=="AGG")[0][0]

common_corrs = pd.DataFrame(data=correlation_from_covariance(df_train.cov().values)[AGG_idx].reshape(-1,1),
                            index=df_train.columns,
                            columns=["Common"])

corrs_strat = []
for key in covs.keys():
    corr_mtx = correlation_from_covariance(covs[key])
    corrs_strat += [corr_mtx[AGG_idx]]

corrs = pd.DataFrame(data=np.vstack(corrs_strat),
                    columns=df_train.columns)

tab = corrs.describe().loc[["50%", "min", "max"]].rename(index={"50%":"Median"}).T
tab["Common"] = common_corrs

tab = tab[["Common", "Median", "min", "max"]].drop("VTI")

tab.round(3)

  correlation = covariance / outer_v


Unnamed: 0,Common,Median,min,max
AGG,1.0,1.0,1.0,1.0
DBC,0.492,0.416,-0.384,0.952
GLD,0.684,0.524,0.093,0.971
IBB,0.25,0.063,-0.585,0.917
ITA,0.024,-0.051,-0.807,0.875
PBJ,0.565,0.384,0.006,0.946
TLT,0.935,0.897,0.803,0.994
VNQ,-0.345,0.021,-0.932,0.652
XLB,-0.214,-0.232,-0.749,0.808
XLE,-0.205,-0.185,-0.935,0.619
