In [1]:
import numpy as np
import pandas as pd
import strat_models
import matplotlib.pyplot as plt

from risk_return_models import *
from utils import *

np.random.seed(0)

In [2]:
Z_train = pd.read_csv("data/Z_train.csv", index_col="Date")
Z_val = pd.read_csv("data/Z_val.csv", index_col="Date")

Z_train.index = pd.to_datetime(Z_train.index)
Z_val.index = pd.to_datetime(Z_val.index)

df_train = pd.read_csv("data/df_train.csv", index_col="Date")
df_val = pd.read_csv("data/df_val.csv", index_col="Date")

df_train.index = pd.to_datetime(df_train.index)
df_val.index = pd.to_datetime(df_val.index)

num_quantiles = 10
num_assets = len(df_train.columns)

start_train = df_train.index[0].strftime("%Y-%m-%d")

In [3]:
kwargs = dict(verbose=False, abs_tol=1e-8, maxiter=1000, rho=10)

train_return = get_data_dict(df_Y=df_train, df_Z=Z_train, num_assets=num_assets)
val_return = get_data_dict(df_Y=df_val, df_Z=Z_val, num_assets=num_assets)

M, local, w1, w2, w3 = 0.01, 0.0075, 10, 50, 5000

print(M, local, w1, w2, w3)

G = make_G(w1=w1, w2=w2, w3=w3)

loss = huber_return_loss(M=M)
reg = strat_models.sum_squares_reg(lambd=local)

bm = strat_models.BaseModel(loss=loss,reg=reg)
sm = strat_models.StratifiedModel(BaseModel=bm, graph=G)

sm.fit(data=train_return, **kwargs)

preds_train = np.vstack([
    sm.G._node[tuple(Z_train.loc[date].values)]["theta"] for date in Z_train.index])

preds_val = np.vstack([
    sm.G._node[tuple(Z_val.loc[date].values)]["theta"] for date in Z_val.index])

corr_train = corr(preds=preds_train, df=df_train)
corr_val = corr(preds=preds_val, df=df_val)

print("STRATIFIED RETURN MODEL:")
print("\t train = {}".format(corr_train))
print("\t val = {}".format(corr_val))

returns = dict()
for node in sm.G.nodes():
    returns[node] = sm.G._node[node]["theta"].copy()

0.01 0.0075 10 50 5000
STRATIFIED RETURN MODEL:
	 train = 0.0969742072211529
	 val = 0.05195373397052082


In [4]:
common_return = df_train.mean(axis=0)
    
preds_train = np.vstack([common_return for date in Z_train.index])
preds_test = np.vstack([common_return for date in Z_val.index])

common_anll_train = corr(preds=preds_train, df=df_train)
common_anll_test = corr(preds=preds_test, df=df_val)

print("COMMON RETURN MODEL:")
print("\t train = {}".format(common_anll_train))
print("\t test = {}".format(common_anll_test))

COMMON RETURN MODEL:
	 train = 0.018483569229611453
	 test = -0.0007427531529758996


In [5]:
rets = pd.DataFrame(data=np.vstack([returns[key] for key in returns.keys()]), columns=df_train.columns)
tab = rets.describe().loc[["50%", "min", "max"]].rename(index={"50%":"median"})
tab = tab.T
tab["common"] = common_return
tab = tab[["common", "median", "min", "max"]]

(tab*100).round(3)

Unnamed: 0,common,median,min,max
AGG,-0.021,-0.071,-0.128,0.073
DBC,-0.056,-0.06,-0.158,0.106
GLD,-0.012,-0.012,-0.119,0.153
IBB,0.033,0.031,-0.098,0.139
ITA,0.022,0.031,-0.077,0.075
PBJ,0.006,0.005,-0.039,0.112
TLT,-0.0,-0.063,-0.173,0.11
VNQ,0.016,0.009,-0.301,0.071
VTI,0.0,-0.0,-0.0,0.0
XLB,0.001,0.01,-0.065,0.078


In [6]:
train_cov = get_data_dict(df_Y=df_train, df_Z=Z_train, num_assets=num_assets)
val_cov = get_data_dict(df_Y=df_val, df_Z=Z_val, num_assets=num_assets)

for i in range(len(train_cov["Y"])):
    if not np.allclose(train_cov["Y"][i], 0):
        train_cov["Y"][i] = (train_cov["Y"][i])*100
        
for i in range(len(val_cov["Y"])):
    if not np.allclose(val_cov["Y"][i], 0):
        val_cov["Y"][i] = (val_cov["Y"][i])*100
        
kwargs = dict(verbose=False, abs_tol=1e-5, maxiter=2500, rho=25)

w1, w2, w3 = 0.2, 5, 20

print("(w1,w2,w3)=({:.3f},{:.3f},{:.3f})".format(w1,w2,w3))

G = make_G(w1=w1, w2=w2, w3=w3)

loss = covariance_max_likelihood_loss()
reg = strat_models.trace_reg(lambd=0)

bm = strat_models.BaseModel(loss=loss,reg=reg)
sm = strat_models.StratifiedModel(BaseModel=bm, graph=G)

sm.fit(data=train_cov, **kwargs)
        
print("STRATIFIED RISK MDOEL:")
print("\t(w1,w2,w3)=({:.3f},{:.3f},{:.3f})".format(w1, w2, w3))
print("\t{:.6f} {:.6f}".format(sm.anll(train_cov), sm.anll(val_cov)))

covs = dict() 
for node in sm.G.nodes():
    covs[node] = np.linalg.inv(sm.G._node[node]["theta"].copy())



(w1,w2,w3)=(0.200,5.000,20.000)
STRATIFIED RISK MDOEL:
	(w1,w2,w3)=(0.200,5.000,20.000)
	-10.639661 -4.266534


In [7]:
train_cov = get_data_dict(df_Y=df_train, df_Z=Z_train, num_assets=num_assets)
val_cov = get_data_dict(df_Y=df_val, df_Z=Z_val, num_assets=num_assets)

for i in range(len(train_cov["Y"])):
    if not np.allclose(train_cov["Y"][i], 0):
        train_cov["Y"][i] = (train_cov["Y"][i])*100
        train_cov["Y"][i] = np.delete(train_cov["Y"][i], 8, axis=0)

for i in range(len(val_cov["Y"])):
    if not np.allclose(val_cov["Y"][i], 0):
        val_cov["Y"][i] = (val_cov["Y"][i])*100
        val_cov["Y"][i] = np.delete(val_cov["Y"][i], 8, axis=0)
        
theta_common = (df_train*100).cov().values

G = make_G(w1=1e10, w2=1e10, w3=1e10)

loss = covariance_max_likelihood_loss()
reg = strat_models.trace_reg(lambd=0)

new_theta_common = np.delete(np.delete(theta_common, 8, axis=1), 8, axis=0)#rm vti from calculation

bm_common = strat_models.BaseModel(loss=loss,reg=reg)
sm_common = strat_models.StratifiedModel(BaseModel=bm_common, graph=G)

for node in G.nodes():
    sm_common.G._node[node]["theta"] = np.linalg.inv(new_theta_common)
    sm_common.G._node[node]["theta_tilde"] = np.linalg.inv(new_theta_common)
    sm_common.G._node[node]["theta_hat"] = np.linalg.inv(new_theta_common)
    
print("COMMON RISK MODEL:")
print("train:", sm_common.anll(train_cov))
print("validation:", sm_common.anll(val_cov))

COMMON RISK MODEL:
train: 2.4392610467753646
validation: 3.2564357279964944


In [8]:
common_vols = np.sqrt((100*df_train).cov().values.diagonal()/(100*100))

vols = pd.DataFrame(data=np.vstack([np.sqrt(covs[key].diagonal()/(100*100)) for key in covs.keys()]), columns=df_train.columns)
tab = vols.describe().loc[["50%", "min", "max"]].rename(index={"50%":"Median"}).T
tab["Common"] = common_vols
tab = tab[["Common", "Median", "min", "max"]]

(tab*100).round(3)

Unnamed: 0,Common,Median,min,max
AGG,1.313,0.864,0.537,4.236
DBC,1.289,0.998,0.725,3.95
GLD,1.665,1.194,0.866,5.613
IBB,0.914,0.796,0.634,1.92
ITA,0.619,0.549,0.474,1.421
PBJ,0.648,0.502,0.414,1.502
TLT,1.816,1.263,0.734,6.05
VNQ,1.328,0.769,0.643,3.73
VTI,0.0,0.083,0.083,0.083
XLB,0.772,0.623,0.491,2.148


In [9]:
AGG_idx = np.where(df_train.columns=="AGG")[0][0]

common_corrs = pd.DataFrame(data=correlation_from_covariance(df_train.cov().values)[AGG_idx].reshape(-1,1),
                            index=df_train.columns,
                            columns=["Common"])

corrs_strat = []
for key in covs.keys():
    corr_mtx = correlation_from_covariance(covs[key])
    corrs_strat += [corr_mtx[AGG_idx]]

corrs = pd.DataFrame(data=np.vstack(corrs_strat),
                    columns=df_train.columns)

tab = corrs.describe().loc[["50%", "min", "max"]].rename(index={"50%":"Median"}).T
tab["Common"] = common_corrs

tab = tab[["Common", "Median", "min", "max"]]

tab.round(3)

  correlation = covariance / outer_v


Unnamed: 0,Common,Median,min,max
AGG,1.0,1.0,1.0,1.0
DBC,0.49,0.414,-0.285,0.959
GLD,0.683,0.522,-0.131,0.979
IBB,0.238,0.066,-0.669,0.888
ITA,0.021,-0.059,-0.896,0.842
PBJ,0.569,0.356,-0.058,0.918
TLT,0.934,0.888,0.749,0.995
VNQ,-0.345,0.007,-0.908,0.796
VTI,0.0,0.0,-0.0,0.0
XLB,-0.213,-0.216,-0.802,0.826
