In [None]:
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 20)
import sys, os
sys.path.append('../../src/')
# import toolbox
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
from toolbox import *
import pickle
# from toolbox import *

In [None]:
spot_name = 'rs'
future_name = 'rf'

spotCoin = 'btc' # eth

data_name = '%sUSD_BTCUSD_25SEP20'%spotCoin.upper()
OHR_path = 'best_h/%s/'%data_name
# Replace your directory to the data: data_path = '/Volumes/external_SSD/copulaData/Deribit_ready2/%s/'%data_name 
HEs = pd.read_csv('%s_HEs.csv'%spotCoin, index_col=0)

In [None]:
copula_names = list(set(HEs.copula))
risk_measures = list(set(HEs.risk_measure))
files = list(set(HEs.file))

In [None]:
rh_results = []

for f in files:
    print(f)
    ######## WORKING WITH TEST DATA ########
    test = pd.read_csv(data_path+'test/'+f, index_col=0) 
    rs = test.rs
    rf = test.rf
    time = pd.to_datetime(test.datetime)
    for c in copula_names:
        for rm in risk_measures:
            _id = HEs.copula == c
            _id &= HEs.risk_measure == rm
            _id &= HEs.file == f
            h  = HEs.loc[_id, 'h'].values[0]
            AIC_selected = HEs.loc[_id, 'AIC_selected'].values[0]
            rh = rs - h*rf
            rh_results.append({'c':c, 'rm':rm, 'f':f, 'h':h,
                               'rs':rs, 'rf':rf, 'rh':rh,
                               'datetime':time, 'AIC_selected':AIC_selected})

In [None]:
rh_results_df = []

for row in rh_results:
    _df = pd.DataFrame(row['rs'])
    _df.columns = ['rs']
    _df.loc[:, 'rf'] = row['rf']
    _df.loc[:, 'rh'] = row['rh']
    _df.loc[:, 'h'] = row['h']
    _df.loc[:, 'risk_measure'] = row['rm']
    _df.loc[:, 'datetime'] = row['datetime']
    _df.loc[:, 'copula'] = row['c']
    _df.loc[:, 'file'] = row['f']
    _df.loc[:, 'AIC_selected'] = row['AIC_selected']
    rh_results_df.append(_df)
    
rh_results_df = pd.concat(rh_results_df)

In [None]:
_id = rh_results_df.AIC_selected == 1
_id &= rh_results_df.risk_measure == 'Variance'
_df = rh_results_df.loc[_id, :].sort_values('datetime').reset_index(drop=True)

In [None]:
rh_results_df.to_csv('%s_rh_results.csv'%spotCoin)

In [None]:
spotCoin = 'eth'
data_name = '%sUSD_BTCUSD_25SEP20'%spotCoin.upper()
OHR_path = 'best_h/%s/'%data_name
data_path = '/Volumes/external_SSD/copulaData/Deribit_ready2/%s/'%data_name
HEs = pd.read_csv('%s_HEs.csv'%spotCoin, index_col=0)
rh_results_df = pd.read_csv('%s_rh_results.csv'%spotCoin)

In [None]:
rh_copula_selection = rh_results_df.loc[rh_results_df.AIC_selected == 1,:]
rh_copula_selection.datetime = pd.to_datetime(rh_copula_selection.datetime)
# risk_measures_name = list(set(rh_copula_selection.risk_measure))

risk_measures_name = ['Variance', 'ES q=0.05', 'ES q=0.01', 
                      'VaR q=0.05', 'VaR q=0.01', 'ERM k=10']

rh_copula_selection.sort_values('datetime',inplace=True)

In [None]:
# Stationary Block Bootstrap 2.7.2 of Lahiri
def SB(X, p, T, seed, return_id=True):
    np.random.seed(seed)
    # X: time series to be sampled
    # p: param of Geometric distribution; 1/p = mean of block sample length
    # T: length of pseudo time series 
    X = X.reset_index(drop=True)
    blockL = 0
    s = []
    while blockL < T:
        I = int(np.random.choice(len(X), 1, replace=True))
        L = int(np.random.geometric(p=p, size=1))
        _id = np.linspace(I, I+L-1, num=L)
        _id = list((_id % len(X)).astype(int))
        blockL += len(_id)
        if return_id:
            s+=[_id]
        else:
            s.append(X.loc[_id, :])
    
    if return_id:
        return np.concatenate(s).astype('int')[:T]
    else:
        return pd.concat(s).iloc[:T]

In [None]:
T = len(pd.read_csv(data_path+'train/1.csv', index_col=0)) # = 336
p = 1/4

In [None]:
rm = 'Variance'
_id = rh_copula_selection.risk_measure == rm   
X = rh_copula_selection.loc[_id,:].copy()

sample_id = dict()

N_samples = 500
for seed in range(0,N_samples):
    sample_id[seed] = SB(X,p,T,seed,True)

In [None]:
risk_measures_name

In [None]:
ERM = lambda x: ERM_estimate_trapezoidal(10, x)
ES1 = lambda x: ES(0.01, x)
ES5 = lambda x: ES(0.05, x)
VaR1 = lambda x: VaR(0.01, x)
VaR5 = lambda x: VaR(0.05, x)
Variance = lambda x: np.var(x)

risk_measures_fn = [Variance, ES5, ES1, VaR5, VaR1, ERM]
risk_measures_dict = dict(zip(risk_measures_name, risk_measures_fn))

In [None]:
HE_results = dict()

for rm in risk_measures_name:
    _id = rh_copula_selection.risk_measure == rm   
    X = rh_copula_selection.loc[_id,:].copy().reset_index(drop=True)
    fn = risk_measures_dict[rm]
    
    HE_arr = []
    for seed in list(sample_id.keys()):
        _id = sample_id[seed]
        _df = X.loc[_id, ['rh','rs']]
        HE = 1-(fn(_df.rh)/fn(_df.rs))
        HE_arr.append(HE)
        
    HE_results[rm] = HE_arr

In [None]:
rm_results = dict()

for rm in risk_measures_name:
    _id = rh_copula_selection.risk_measure == rm   
    X = rh_copula_selection.loc[_id,:].copy().reset_index(drop=True)
    fn = risk_measures_dict[rm]
    
    rm_arr = []
    for seed in list(sample_id.keys()):
        _id = sample_id[seed]
        _df = X.loc[_id, ['rh','rs']]
        rm_arr.append((fn(_df.rh), fn(_df.rs)))
        
    rm_results[rm] = rm_arr

In [None]:
returns_results = dict()

for rm in risk_measures_name:
    _id = rh_copula_selection.risk_measure == rm   
    X   = rh_copula_selection.loc[_id,:].copy().reset_index(drop=True)
    fn  = risk_measures_dict[rm]
    
    returns_arr = []
    for seed in list(sample_id.keys()):
        _id = sample_id[seed]
        _df = X.loc[_id, ['rh','rs']]
        returns_arr.append((_df.rh, _df.rs))
        
    returns_results[rm] = returns_arr

In [None]:
with open('%s_HE_SB_results.pickle'%spotCoin, 'wb') as f:
    pickle.dump(HE_results, f)

In [None]:
spotCoin = 'btc'
with open('%s_HE_SB_results.pickle'%spotCoin, 'rb') as f:
    btc_HE = pickle.load(f)

spotCoin = 'eth'
with open('%s_HE_SB_results.pickle'%spotCoin, 'rb') as f:
    eth_HE = pickle.load(f)

In [None]:
risk_measures_name

In [None]:
risk_measures_name = ['Variance', 'ES q=0.05', 'ES q=0.01', 
                      'VaR q=0.05', 'VaR q=0.01', 'ERM k=10']

risk_measure_name_plot = ['Variance', 'ES 95%', 'ES 99%', 'VaR 95%', 'VaR 99%', 'ERM']
rm2name =  dict(zip(risk_measures_name, risk_measure_name_plot))

In [None]:
factor = 4.5
xsize = 3*factor
ysize = 2*factor
fig, axs = plt.subplots(nrows=2,ncols=3, figsize=(xsize, ysize), sharex=True, sharey=True)

for i, rm in enumerate(risk_measures_name):
    axs.flatten()[i].boxplot([btc_HE[rm], eth_HE[rm]])
    axs.flatten()[i].axhline(y=0, xmin=0, xmax=1, color='black')
    axs.flatten()[i].set_title(rm2name[rm])

for p, a in enumerate(axs):
    xtickNames = plt.setp(axs[p], xticklabels=['BTC', 'ETH'])
    plt.setp(xtickNames, rotation=90, fontsize=12)
    
plt.ylim((-1,1))

plt.savefig('intraday_HE.pdf', transparent=True, bbox_inches = 'tight')

In [None]:
_id = rh_copula_selection.risk_measure == 'ES q=0.01'
sns.distplot(rh_copula_selection.loc[_id, 'rh'])

_id = rh_copula_selection.risk_measure == 'ES q=0.05'
sns.distplot(rh_copula_selection.loc[_id, 'rh'])