In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import pandas as pd
import numpy as np
import datetime as dt
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import norm

sns.set()

from IPython.display import display
from tools import make_track, ols_regression, make_ER, lasso_regression, lasso_regression_ic, ridge_regression,\
    kalman_filter, kalman_with_selection, replication_stats
from hc_graph import hc_stock, hc_piechart, hc_spiderweb

ImportError: cannot import name 'replication_stats'

In [None]:
fund_name = 'HFRXGL'
pd.read_csv(r"financial_data/hfrx_index_names.csv", index_col=0)

### Financial data loads and cleaning

In [None]:
# US rate
US_rate = pd.read_csv(r"financial_data/USD_rates.csv", index_col=0, parse_dates=True, dayfirst=True)['3M']

# HFRX
hfrx_all = pd.read_csv(r"financial_data/hfrx_daily_index_data.csv", index_col=0, parse_dates=True, dayfirst=True).ffill()
hfrx = make_ER(hfrx_all[[fund_name]].dropna(), US_rate)

# BNP

bnp = pd.read_csv(r"financial_data/bnp_data.csv", index_col=0, parse_dates=True, dayfirst=True) #[["BNPIFEU","BNPIFUS",
                 #"BNPIFJP","BNPIFCN","BNPIFFR","BNPIFGE","BNPIFGB","BNPIFCH","BNPIFKR","BNPIFUSC","BNPIFBR",
                 #"BNPIFAU","BNPIFHK","BNPIFTW","BNPIFSG","BNPIFIT","BNPIFSP","BNPIFSW","BNPIFNE","BNPIFRU","BNPIFEM"]]

risk_premia = pd.read_pickle("financial_data/risk_premia_ER_FX_USD.pkl") #[["BNPICCE3","BNPIVMRU","BNPIAIRT","BNPIAIRU",
                  #"BNPIEC03","BNPIPLEE","BNPIPMEE","BNPIPQEE","BNPIPVEE","BNPILUEE","BNPIPMUH","BNPIPQUH",
                  #"BNPIPVUH","BNPIAIRG","BNPIAIRJ","BNPIAIRE"]]

In [None]:
df_info = pd.read_csv(r"financial_data/bnp_info.csv", index_col=0)
df_info.head()

In [None]:
prices_all = bnp.join(risk_premia,how="outer").ffill().join(hfrx,how="inner")

In [None]:
returns_all = prices_all.resample('1D').first().pct_change().dropna()
hrfx_returns = returns_all[[fund_name]]
returns_all = returns_all.drop(fund_name, axis=1)
prices_all = prices_all.drop(fund_name, axis=1)

In [None]:
alpha = hrfx_returns[fund_name].autocorr(lag=1)
hrfx_returns_adj = ((hrfx_returns - alpha * hrfx_returns.shift(1)) / (1. - alpha)).fillna(0) # 'liquidity adjustment' for
                                                                                             # removing returns autocorrelation
beta = hrfx_returns_adj[fund_name].autocorr(lag=1)

In [None]:
print("Auto-correlation of",fund_name,"returns :", alpha)
print("Auto-correlation of",fund_name,"adjusted returns :", beta)
ax = (1 + hrfx_returns).cumprod().plot()
_ = (1 + hrfx_returns_adj).cumprod()[fund_name].plot(label=fund_name + ' adjusted', legend=True, ax=ax)

In [None]:
size = 126
freq = 5

In [None]:
%%time
df_weight_aic, _ = lasso_regression_ic(hrfx_returns, returns_all,sample_length=size, frequency=freq, criterion='aic')
prices_for_track_aic = prices_all.loc[df_weight_aic.index[0]:]

In [None]:
%%time
df_weight_bic, _ = lasso_regression_ic(hrfx_returns, returns_all,sample_length=size, frequency=freq, criterion='bic')
prices_for_track_bic = prices_all.loc[df_weight_bic.index[0]:]

In [None]:
%%time
nu = 1.
df_weight_kalman = kalman_filter(hrfx_returns, returns_all,freq, sigma_weight=1. , sigma_return=nu)

prices_for_track_kalman = prices_all.loc[df_weight_kalman.index[0]:]

In [None]:
%%time
nb_period = 120
df_weight_kalman_bic = kalman_with_selection(hrfx_returns, returns_all,sample_length=size, frequency=freq,\
                                          nu=nu, nb_period=nb_period, criterion='bic')

prices_for_track_kalman_bic = prices_all.loc[df_weight_kalman_bic.index[0]:]

In [None]:
%%time
df_weight_kalman_aic = kalman_with_selection(hrfx_returns, returns_all,sample_length=size, frequency=freq,\
                                          nu=nu, nb_period=nb_period, criterion='aic')

prices_for_track_kalman_aic = prices_all.loc[df_weight_kalman_aic.index[0]:]

In [None]:
tc = 0.0003
lag = 1

replication_aic = make_track(prices_for_track_aic, df_weight_aic, tc=tc, lag=lag)
df_res = hfrx.loc[replication_aic.index]
df_res["Lasso AIC"] = replication_aic

replication_bic = make_track(prices_for_track_bic, df_weight_bic, tc=tc, lag=lag)
df_res["Lasso BIC"] = replication_bic

replication_kalman = make_track(prices_for_track_kalman, df_weight_kalman, tc=tc, lag=lag).loc[df_res.index[0]:]
df_res["Kalman"] = replication_kalman

replication_kalman_aic = make_track(prices_for_track_kalman_aic, df_weight_kalman_aic, tc=tc, lag=lag).loc[df_res.index[0]:]
df_res["Kalman AIC"] = replication_kalman_aic

replication_kalman_bic = make_track(prices_for_track_kalman_bic, df_weight_kalman_bic, tc=tc, lag=lag).loc[df_res.index[0]:]
df_res["Kalman BIC"] = replication_kalman_bic

In [None]:
df_res = (df_res / df_res.iloc[0])
df_res[[fund_name, 'Kalman', 'Kalman BIC', 'Kalman AIC']].plot(figsize=(20,10))

In [None]:
df_stat = replication_stats(df_res, fund_name=fund_name)
df_stat

In [None]:
vol = df_res.pct_change().rolling(window=60).std().dropna()
vol[[fund_name, 'Kalman', 'Kalman BIC', 'Kalman AIC']].plot(figsize=(20,10))

In [None]:
vol_fund = vol[fund_name]
vol_ratio = 1. / vol.div(vol_fund, axis='index').drop(fund_name, axis=1)
vol_ratio[['Kalman', 'Kalman BIC', 'Kalman AIC']].plot(figsize=(20,10))

### Vol adjustment

In [None]:
df_weight_kalman_aic_voladj = df_weight_kalman_aic.multiply(vol_ratio['Kalman AIC'], axis='index').dropna()
df_weight_kalman_bic_voladj = df_weight_kalman_bic.multiply(vol_ratio['Kalman BIC'], axis='index').dropna()
df_weight_kalman_voladj = df_weight_kalman.multiply(vol_ratio['Kalman'], axis='index').dropna()

prices_for_track_all = prices_for_track_aic[df_weight_aic.multiply(vol_ratio['Lasso AIC'], axis='index').dropna().index[0]:]

replication_aic_voladj = make_track(prices_for_track_all, df_weight_aic.multiply(vol_ratio['Lasso AIC'], axis='index').dropna(), tc=tc, lag=lag)
df_res_voladj = hfrx.loc[replication_aic_voladj.index]
df_res_voladj["Lasso AIC"] = replication_aic_voladj

replication_bic_voladj = make_track(prices_for_track_all, df_weight_bic.multiply(vol_ratio['Lasso BIC'], axis='index').dropna(), tc=tc, lag=lag)
df_res_voladj["Lasso BIC"] = replication_bic_voladj

prices_for_track_all = prices_for_track_kalman[df_weight_kalman.multiply(vol_ratio['Kalman'], axis='index').dropna().index[0]:]

replication_kalman_voladj = make_track(prices_for_track_all, df_weight_kalman_voladj, tc=tc, lag=lag).loc[df_res_voladj.index[0]:]
df_res_voladj["Kalman"] = replication_kalman_voladj

prices_for_track_all = prices_for_track_kalman_aic[df_weight_kalman_aic.multiply(vol_ratio['Kalman AIC'], axis='index').dropna().index[0]:]

replication_kalman_aic_voladj = make_track(prices_for_track_all, df_weight_kalman_aic_voladj, tc=tc, lag=lag).loc[df_res_voladj.index[0]:]
df_res_voladj["Kalman AIC"] = replication_kalman_aic_voladj

replication_kalman_bic_voladj = make_track(prices_for_track_all,df_weight_kalman_bic_voladj, tc=tc, lag=lag).loc[df_res_voladj.index[0]:]
df_res_voladj["Kalman BIC"] = replication_kalman_bic_voladj

In [None]:
df_res_voladj = df_res_voladj.bfill()
df_res_voladj = (df_res_voladj / df_res_voladj.iloc[0])
df_res_voladj[[fund_name, 'Kalman', 'Kalman BIC', 'Kalman AIC']].plot(figsize=(20,10))

In [None]:
df_res_volperfadj = df_res_voladj[[fund_name]].copy()
temp = df_res_voladj.drop(fund_name, axis=1)
df_res_volperfadj[temp.columns]= make_ER(temp, 1.75 + 0 * US_rate)
df_res_volperfadj[[fund_name, 'Kalman', 'Kalman BIC', 'Kalman AIC']].plot(figsize=(20,10))

## Statistics on replication

In [None]:
df_stat_vol = replication_stats(df_res_voladj, fund_name=fund_name)
df_stat_vol

In [None]:
df_contrib_kalman_voladj = (prices_all.reindex(df_weight_kalman_voladj.index).pct_change()*df_weight_kalman_voladj).std()
df_contrib_kalman_voladj = 100*df_contrib_kalman_voladj/df_contrib_kalman_voladj.sum()

df_contrib_kalman_bic_voladj = (prices_all.reindex(df_weight_kalman_bic_voladj.index).pct_change()*df_weight_kalman_bic_voladj).std()
df_contrib_kalman_bic_voladj = 100*df_contrib_kalman_bic_voladj/df_contrib_kalman_bic_voladj.sum()

df_contrib_kalman_aic_voladj = (prices_all.reindex(df_weight_kalman_aic_voladj.index).pct_change()*df_weight_kalman_aic_voladj).std()
df_contrib_kalman_aic_voladj = 100*df_contrib_kalman_aic_voladj/df_contrib_kalman_aic_voladj.sum()

In [None]:
df_contrib = pd.DataFrame()
df_contrib["Kalman"] = df_contrib_kalman_voladj
df_contrib["Kalman BIC"] = df_contrib_kalman_bic_voladj
df_contrib["Kalman AIC"] = df_contrib_kalman_aic_voladj
df_contrib = df_contrib.join(df_info).reset_index()

In [None]:
df_contrib_asset_class = df_contrib.drop(["Style"],axis=1).groupby(["Asset Class","index"]).sum()
df_contrib_style = df_contrib.drop(["Asset Class"],axis=1).groupby(["Style","index"]).sum()

In [None]:
g = hc_spiderweb(df_contrib_asset_class, title="Vol contribution by Asset Class")
g.plot()

In [None]:
g = hc_spiderweb(df_contrib_style, title="Vol contribution by Style")
g.plot()

In [None]:
g = hc_piechart(df_contrib_asset_class[["Kalman"]], title="Vol contribution by Asset Class in Kalman")
g.plot()

In [None]:
g = hc_piechart(df_contrib_style[["Kalman"]], title="Vol contribution by Style in Kalman")
g.plot()

In [None]:
df_contrib_kalman_voladj.plot(kind='barh', figsize=(15,20), cmap="tab10")

In [None]:
(df_weight_aic).mean(axis=0).plot(kind='barh', figsize=(15,20))

In [None]:
(df_weight_bic).mean(axis=0).plot(kind='barh', figsize=(15,20))

In [None]:
df_weight_kalman.loc[df_res.index[0]:].mean(axis=0).plot(kind='barh', figsize=(15,20))

In [None]:
df_weight_kalman_bic.loc[df_res.index[0]:].mean(axis=0).plot(kind='barh', figsize=(15,20))

In [None]:
df_weight_kalman_aic.loc[df_res.index[0]:].mean(axis=0).plot(kind='barh', figsize=(15,20))

In [None]:
limit = 60
df_autocorr = pd.DataFrame(data=[df_res.drop(["Lasso AIC","Lasso BIC"],axis=1)\
                                 .pct_change().apply(lambda col : col.autocorr(lag=i)) for i in range(1,limit)])
df_autocorr[r"$2/\sqrt{n}$"] = [2/np.sqrt(len(df_res)-i) for i in range(1,limit)]
df_autocorr[r"$-2/\sqrt{n}$"] = [-2/np.sqrt(len(df_res)-i) for i in range(1,limit)]

In [None]:
df_autocorr.plot(figsize=(14,7))

In [None]:
df_res[fund_name].pct_change().autocorr(lag=1)

In [None]:
df_res['Kalman'].pct_change().autocorr(lag=1)

In [None]:
df_res['Kalman BIC'].pct_change().autocorr(lag=1)

In [None]:
df_res['Kalman AIC'].pct_change().autocorr(lag=1)

In [None]:
df_res['Lasso AIC'].pct_change().autocorr(lag=1)