In [1]:
import matplotlib.pyplot as plt
import scipy
from scipy import stats
from scipy.stats import norm
import pandas as pd
import numpy as np
import seaborn as sns
from statsmodels.distributions.empirical_distribution import ECDF
from copulae1 import *
from KDEs import *
from toolbox import *
import warnings
import itertools
import os
warnings.filterwarnings("ignore")

data = pd.read_csv('../processed_data/btc_future_crix.csv')
data.head()

ecdf_brr = ECDF(data.return_brr)
ecdf_btc = ECDF(data.return_btc)

u = ecdf_brr(data.return_brr)
v = ecdf_btc(data.return_btc)

brr = np.array(data.return_brr)
btc = np.array(data.return_btc)

kde_brr = KDE(data.return_brr, "Gaussian")
kde_btc = KDE(data.return_btc, "Gaussian")

kde_brr = KDE(data.return_brr, "Gaussian", kde_brr.h_brot*3)
kde_btc = KDE(data.return_brr, "Gaussian", kde_btc.h_brot*3)

In [10]:
# Gaussian, t_Copula, Clayton, Frank, Gumbel, Plackett, Gaussian mix Indep
C1  = Gaussian(dict(rho=0.9),       Law_RS=kde_brr, Law_RF=kde_btc) # fix the maringals!
C2  = t_Copula(dict(rho=0.1, nu=4), Law_RS=kde_brr, Law_RF=kde_btc, nu_lowerbound=2) 
C2c = t_Copula(dict(rho=0.1, nu=4), Law_RS=kde_brr, Law_RF=kde_btc, nu_lowerbound=4) 
C3  = Clayton(dict(theta=0.1),      Law_RS=kde_brr, Law_RF=kde_btc)
C4  = Frank(dict(theta=0.1),        Law_RS=kde_brr, Law_RF=kde_btc)
C5  = Gumbel(dict(theta=3),         Law_RS=kde_brr, Law_RF=kde_btc)
C6  = Plackett(dict(theta=10),      Law_RS=kde_brr, Law_RF=kde_btc)
C7  = Gaussian_Mix_Independent(dict(rho=.5,p=0.7),Law_RS=kde_brr, Law_RF=kde_btc)
Copulae_names = ['Gaussian', 't_Copula', 't_Copula_Capped', 'Clayton', 'Frank', 'Gumbel', 'Plackett', 'Gauss Mix Indep']
Copulae_arr   = [C1,C2,C2c,C3,C4,C5,C6,C7]
Copulae = dict(zip(Copulae_names, Copulae_arr))


Gaussian
t_Copula
t_Copula_Capped
Clayton
Frank
Gumbel
Plackett
Gauss Mix Indep


In [4]:
k_arr = [10,20] # Absolute risk aversion for exponential risk measure
q_arr = [0.01,0.05,0.1] # Quantile level for expected shortfall

best_h = []
for C_name in Copulae:
    best_h.append(optimize_h(Copulae[C_name], k_arr, q_arr))

Optimization terminated successfully.
         Current function value: 0.000448
         Iterations: 12
         Function evaluations: 24
Optimization terminated successfully.
         Current function value: 0.032501
         Iterations: 12
         Function evaluations: 24
Optimization terminated successfully.
         Current function value: 0.041642
         Iterations: 12
         Function evaluations: 24
Optimization terminated successfully.
         Current function value: 0.010071
         Iterations: 13
         Function evaluations: 27
Optimization terminated successfully.
         Current function value: 0.000840
         Iterations: 13
         Function evaluations: 27
Optimization terminated successfully.
         Current function value: 0.000094
         Iterations: 14
         Function evaluations: 30
Optimization terminated successfully.
         Current function value: 0.002236
         Iterations: 17
         Function evaluations: 34
Optimization terminated successful

In [14]:
best_h = pd.DataFrame(best_h)
best_h.columns = ['Var'] + ['ERM k=%i'%k for k in k_arr] +  ['ES q=%.2f'%q for q in q_arr]
best_h.index = Copulae_names 
best_h

Unnamed: 0,Var,ERM k=10,ERM k=20,ES q=0.01,ES q=0.05,ES q=0.10
Gaussian,0.884082,0.896289,0.914648,0.842383,0.89248,0.8582031
t_Copula,0.098047,0.104395,0.105566,0.099902,0.096387,0.1
t_Copula_Capped,0.097754,0.104395,0.104395,0.098633,0.08125,0.08720703
Clayton,0.078223,0.10625,0.134668,0.064062,0.05293,-8.881784e-16
Frank,0.015332,0.015234,0.018066,0.016309,0.002539,-9.765625e-05
Gumbel,0.834375,0.960449,1.014648,0.871094,0.743652,0.624707
Plackett,0.580371,0.589062,0.592773,0.601367,0.55,0.5404297
Gauss Mix Indep,0.338672,0.356348,0.386426,0.301465,0.30166,0.3898437


In [None]:
data_name = "future_brr_v4"
data_path = "../processed_data/"+data_name+"/"

# Risk Measures
# Variance is automatically included
k_arr = [10,20] # Absolute risk aversion for exponential risk measure
q_arr_ES = [0.01,0.05,0.1] # Quantile level for expected shortfall

spot_name = "log return brr"
future_name = "log return future"

paras_results = []
likelihood_results = []
best_h_results = []

calibration_method = "MLE" # MM or MLE
q_arr = [0.05,0.1,0.9,0.95]

ls = os.listdir(data_path+'train/')
ls = [l for l in ls if l.endswith('.csv')]
for file in ls:
    # Calibration 
    train = pd.read_csv(data_path+'train/'+file)
    spot   = train.loc[:,spot_name]
    future = train.loc[:,future_name]
    u = ECDF(spot)(spot)
    v = ECDF(future)(future)
    
    kde_brr = KDE(spot, "Gaussian")
    kde_btc = KDE(future, "Gaussian")
    
    for C_name in Copulae:
        Copulae[C_name].Law_RS = kde_brr
        Copulae[C_name].Law_RF = kde_btc
    
    paras = []
    likelihood = []
    best_h = []
    for C_name in Copulae:
        if calibration_method == "MLE":
            Copulae[C_name].canonical_calibrate(u,v)
            
        elif calibration_method == "MM":
            Copulae[C_name].mm_calibrate(u,v,q_arr)

        print(C_name,'is done.\n')
    
    for C_name in Copulae:
        paras.append((C_name,Copulae[C_name].paras))
        
    for C_name in Copulae:
        ln = Copulae[C_name].dependency_likelihood(u,v)
        likelihood.append((C_name,ln))
        
    paras_results.append(paras)
    likelihood_results.append(likelihood)
    
    # Get Best h
    best_h = []
    for C_name in Copulae:
        best_h.append(optimize_h(Copulae[C_name], k_arr, q_arr_ES))
    best_h = pd.DataFrame(best_h)
    best_h.columns = ['Var'] + ['ERM k=%i'%k for k in k_arr] +  ['ES q=%.2f'%q for q in q_arr_ES]
    best_h.index = Copulae_names 
    best_h_results.append(best_h)
    
#     # Testing
#     test = pd.read_csv(data_path+'test/'+file)
#     spot_test   = train.loc[:,spot_name]
#     future_test = train.loc[:,future_name]

Optimization terminated successfully.
         Current function value: -0.416899
         Iterations: 32
         Function evaluations: 66
Gaussian is done.

Optimization terminated successfully.
         Current function value: -0.522612
         Iterations: 120
         Function evaluations: 231
t_Copula is done.

Optimization terminated successfully.
         Current function value: -0.502592
         Iterations: 176
         Function evaluations: 339
t_Copula_Capped is done.

Optimization terminated successfully.
         Current function value: -0.388826
         Iterations: 41
         Function evaluations: 91
Clayton is done.

Optimization terminated successfully.
         Current function value: -0.422036
         Iterations: 42
         Function evaluations: 91
Frank is done.

Optimization terminated successfully.
         Current function value: -0.449668
         Iterations: 36
         Function evaluations: 79
Gumbel is done.

Optimization terminated successfully.
         

Optimization terminated successfully.
         Current function value: -0.428291
         Iterations: 123
         Function evaluations: 244
t_Copula_Capped is done.

Optimization terminated successfully.
         Current function value: -0.307838
         Iterations: 34
         Function evaluations: 74
Clayton is done.

Optimization terminated successfully.
         Current function value: -0.371233
         Iterations: 35
         Function evaluations: 77
Frank is done.

Optimization terminated successfully.
         Current function value: -0.404987
         Iterations: 33
         Function evaluations: 71
Gumbel is done.

Optimization terminated successfully.
         Current function value: -0.431727
         Iterations: 40
         Function evaluations: 91
Plackett is done.

Optimization terminated successfully.
         Current function value: -0.442052
         Iterations: 65
         Function evaluations: 140
Gauss Mix Indep is done.

Optimization terminated successfully.
   

Optimization terminated successfully.
         Current function value: -0.517188
         Iterations: 59
         Function evaluations: 130
Gauss Mix Indep is done.

Optimization terminated successfully.
         Current function value: 0.000993
         Iterations: 14
         Function evaluations: 28
Optimization terminated successfully.
         Current function value: 0.047656
         Iterations: 14
         Function evaluations: 28
Optimization terminated successfully.
         Current function value: 0.064136
         Iterations: 14
         Function evaluations: 28
Optimization terminated successfully.
         Current function value: 0.014951
         Iterations: 15
         Function evaluations: 30
Optimization terminated successfully.
         Current function value: 0.002259
         Iterations: 15
         Function evaluations: 31
Optimization terminated successfully.
         Current function value: -0.000906
         Iterations: 15
         Function evaluations: 33
Optim

Optimization terminated successfully.
         Current function value: 0.049078
         Iterations: 14
         Function evaluations: 28
Optimization terminated successfully.
         Current function value: 0.065320
         Iterations: 14
         Function evaluations: 28
Optimization terminated successfully.
         Current function value: 0.015862
         Iterations: 15
         Function evaluations: 33
Optimization terminated successfully.
         Current function value: 0.003263
         Iterations: 15
         Function evaluations: 31
Optimization terminated successfully.
         Current function value: 0.000187
         Iterations: 16
         Function evaluations: 32
Optimization terminated successfully.
         Current function value: 0.000866
         Iterations: 14
         Function evaluations: 28
Optimization terminated successfully.
         Current function value: 0.043935
         Iterations: 14
         Function evaluations: 28
Optimization terminated successful

Optimization terminated successfully.
         Current function value: 0.000194
         Iterations: 15
         Function evaluations: 32
Optimization terminated successfully.
         Current function value: -0.001574
         Iterations: 16
         Function evaluations: 35
Optimization terminated successfully.
         Current function value: 0.000450
         Iterations: 14
         Function evaluations: 28
Optimization terminated successfully.
         Current function value: 0.031161
         Iterations: 14
         Function evaluations: 28
Optimization terminated successfully.
         Current function value: 0.043454
         Iterations: 13
         Function evaluations: 26
Optimization terminated successfully.
         Current function value: 0.007072
         Iterations: 15
         Function evaluations: 31
Optimization terminated successfully.
         Current function value: -0.000768
         Iterations: 15
         Function evaluations: 32
Optimization terminated successf

Optimization terminated successfully.
         Current function value: 0.029947
         Iterations: 14
         Function evaluations: 28
Optimization terminated successfully.
         Current function value: 0.040799
         Iterations: 14
         Function evaluations: 28
Optimization terminated successfully.
         Current function value: 0.007190
         Iterations: 14
         Function evaluations: 30
Optimization terminated successfully.
         Current function value: -0.000356
         Iterations: 15
         Function evaluations: 33
Optimization terminated successfully.
         Current function value: -0.001661
         Iterations: 18
         Function evaluations: 38
Optimization terminated successfully.
         Current function value: 0.000596
         Iterations: 14
         Function evaluations: 28
Optimization terminated successfully.
         Current function value: 0.037466
         Iterations: 14
         Function evaluations: 28
Optimization terminated successf

Optimization terminated successfully.
         Current function value: 0.001015
         Iterations: 16
         Function evaluations: 32
Optimization terminated successfully.
         Current function value: -0.000912
         Iterations: 18
         Function evaluations: 36
Optimization terminated successfully.
         Current function value: 0.000802
         Iterations: 14
         Function evaluations: 28
Optimization terminated successfully.
         Current function value: 0.043893
         Iterations: 14
         Function evaluations: 28
Optimization terminated successfully.
         Current function value: 0.059574
         Iterations: 13
         Function evaluations: 26
Optimization terminated successfully.
         Current function value: 0.012655
         Iterations: 16
         Function evaluations: 33
Optimization terminated successfully.
         Current function value: 0.002097
         Iterations: 14
         Function evaluations: 30
Optimization terminated successfu

Optimization terminated successfully.
         Current function value: 0.040125
         Iterations: 14
         Function evaluations: 28
Optimization terminated successfully.
         Current function value: 0.054834
         Iterations: 13
         Function evaluations: 26
Optimization terminated successfully.
         Current function value: 0.011013
         Iterations: 14
         Function evaluations: 29
Optimization terminated successfully.
         Current function value: 0.001629
         Iterations: 14
         Function evaluations: 29
Optimization terminated successfully.
         Current function value: 0.000253
         Iterations: 14
         Function evaluations: 31
Optimization terminated successfully.
         Current function value: 0.000893
         Iterations: 14
         Function evaluations: 28
Optimization terminated successfully.
         Current function value: 0.042606
         Iterations: 15
         Function evaluations: 30
Optimization terminated successful

Optimization terminated successfully.
         Current function value: 0.002467
         Iterations: 13
         Function evaluations: 27
Optimization terminated successfully.
         Current function value: 0.000889
         Iterations: 10
         Function evaluations: 23
Optimization terminated successfully.
         Current function value: 0.000885
         Iterations: 14
         Function evaluations: 28
Optimization terminated successfully.
         Current function value: 0.042007
         Iterations: 15
         Function evaluations: 30
Optimization terminated successfully.
         Current function value: 0.052196
         Iterations: 15
         Function evaluations: 30
Optimization terminated successfully.
         Current function value: 0.014349
         Iterations: 15
         Function evaluations: 32
Optimization terminated successfully.
         Current function value: 0.004601
         Iterations: 13
         Function evaluations: 28
Optimization terminated successful

In [None]:
c_arr = []
date_range_arr = []
for i, file in enumerate(ls):
    train = pd.read_csv(data_path+'train/'+file)
    date_range = train.Date.iloc[-1] +' to ' + train.Date.iloc[0]
    date_range_arr.append(date_range)
    
    c = pd.DataFrame(paras_results[i])
    c.index = c.iloc[:,0]
    c = pd.DataFrame(c.iloc[:,1])
#     c.columns = ['paras']
    c_arr.append(c)
    
paras_results_pd = pd.concat(dict(zip(ls, c_arr)), axis=1)
paras_results_pd.columns = paras_results_pd.columns.droplevel(1)
paras_results_pd.index.name= None

l_arr = []
date_range_arr = []
for i, file in enumerate(ls):
    train = pd.read_csv(data_path+'train/'+file)
    date_range = train.Date.iloc[-1] +' to ' + train.Date.iloc[0]
    date_range_arr.append(date_range)
    
    c = pd.DataFrame(likelihood_results[i])
    c.index = c.iloc[:,0]
    c = pd.DataFrame(c.iloc[:,1])
#     c.columns = ['likelihood']
    l_arr.append(c)
    
likelihood_results_pd = pd.concat(dict(zip(ls, l_arr)), axis=1)
likelihood_results_pd.columns = likelihood_results_pd.columns.droplevel(1)
likelihood_results_pd.index.name= None

In [None]:
paras_results_pd

In [None]:
likelihood_results_pd

In [None]:
best_h_results_pd = pd.concat(dict(zip(ls, best_h_results)), axis=1)

In [None]:
def cap(h):
    if h < 0:
        return 0
    elif h >1:
        return 1
    else:
        return h

In [None]:
columns = best_h_results_pd.columns
for c in columns:
     best_h_results_pd.loc[:,c] = best_h_results_pd.loc[:,c].apply(cap)

In [None]:
def hedging_effectiveness(h_arr, spot, future, k_arr, q_arr):
    results = np.ones((len(h_arr),1+len(k_arr)+len(q_arr)))
    for i, h in enumerate(h_arr):
        rh = spot-h*future
        results[i,:]=1-risk_measures(k_arr, q_arr, rh)/risk_measures(k_arr, q_arr, spot)
    return np.array([results[i,i] for i in range(len(h_arr))])

In [None]:
date_range_arr = []
for i, file in enumerate(ls):
    train = pd.read_csv(data_path+'train/'+file)
    date_range = train.Date.iloc[-1] +' to ' + train.Date.iloc[0]
    date_range_arr.append(date_range)

In [None]:
display_likelihood = likelihood_results_pd.copy()
display_likelihood.columns = date_range_arr
display_likelihood = display_likelihood.reindex(sorted(display_likelihood.columns), axis=1)

In [None]:
display_paras = paras_results_pd.copy()
display_paras.columns = date_range_arr
display_paras = display_paras.reindex(sorted(display_paras.columns), axis=1)

In [None]:
display_best_h = best_h_results_pd.copy()
display_best_h.columns.set_levels(date_range_arr, level=0, inplace=True)
display_best_h = display_best_h.reindex(sorted(display_best_h.columns), axis=1)

In [None]:
if os.path.exists("../results/" + data_name)==False:
    print("Create new folder for results")
    os.mkdir("../results/" + data_name)
    os.mkdir("../results/" + data_name +"/MLE")
    os.mkdir("../results/" + data_name +"/MM")
    
if calibration_method == "MLE":
    path = "../results/"+ data_name +"/MLE/"
    paras_results_pd.to_json(path+"parameters.json")
    likelihood_results_pd.to_json(path+"likelihood.json")
    best_h_results_pd.to_json(path+"best_h.json")
    
    display_paras.to_html(path+"paras.html")
    display_likelihood.to_html(path+"likelihood.html")
    display_best_h.to_html(path+"best_h.html")

    
elif calibration_method == "MM":
    path = "../results/"+ data_name +"/MM/"
    paras_results_pd.to_json(path+"parameters.json")
    likelihood_results_pd.to_json(path+"likelihood.json")
    best_h_results_pd.to_json(path+"best_h.json")
    
    display_paras.to_html(path+"paras.html")
    display_likelihood.to_html(path+"likelihood.html")
    display_best_h.to_html(path+"best_h.html")

In [None]:
HE_results = []

for i, file in enumerate(ls):
    test = pd.read_csv(data_path+'train/'+file)
    spot   = test.loc[:,spot_name]
    future = test.loc[:,future_name]
    fn = lambda h_arr: hedging_effectiveness(h_arr, spot, future, k_arr, q_arr)
    HE = pd.DataFrame().reindex_like(best_h_results[0])
    HEs = best_h_results[i].apply(fn, axis=1)
    for i in range(len(HEs)):
        HE.iloc[i,:] = HEs[i]
    HE_results.append(HE)
HE_results_pd = pd.concat(dict(zip(ls, HE_results)), axis=1)
risk_measure_names = HE_results_pd.droplevel(0,axis=1).columns.unique()

fig, ax = plt.subplots(len(risk_measure_names), 1, figsize=(10,5*len(risk_measure_names)))
for i, name in enumerate(risk_measure_names): 
    ax[i].boxplot(HE_results_pd.droplevel(0,axis=1).loc[:,name])
    ax[i].set_xticklabels(Copulae_names)
    ax[i].set_title("In Sample Hedging Effectiveness of %s"%name)
    
if calibration_method == "MLE":
    fig.savefig("../results/"+ data_name +"/MLE/"+"In Sample Hedging Effectiveness.png", transparent=True)    
elif calibration_method == "MM":
    fig.savefig("../results/"+ data_name +"/MM/"+"In Sample Hedging Effectiveness.png", transparent=True)

In [None]:
HE_results = []

for i, file in enumerate(ls):
    test = pd.read_csv(data_path+'test/'+file)
    spot   = test.loc[:,spot_name]
    future = test.loc[:,future_name]
    fn = lambda h_arr: hedging_effectiveness(h_arr, spot, future, k_arr, q_arr)
    HE = pd.DataFrame().reindex_like(best_h_results[0])
    HEs = best_h_results[i].apply(fn, axis=1)
    for i in range(len(HEs)):
        HE.iloc[i,:] = HEs[i]
    HE_results.append(HE)
HE_results_pd = pd.concat(dict(zip(ls, HE_results)), axis=1)
risk_measure_names = HE_results_pd.droplevel(0,axis=1).columns.unique()

fig, ax = plt.subplots(len(risk_measure_names), 1, figsize=(10,5*len(risk_measure_names)))
for i, name in enumerate(risk_measure_names): 
    ax[i].boxplot(HE_results_pd.droplevel(0,axis=1).loc[:,name])
#     ax[i].set_xticks(np.linspace(1,len(Copulae_names)))
    ax[i].set_xticklabels(Copulae_names)
    ax[i].set_title("Out of Sample Hedging Effectiveness of %s"%name)
if calibration_method == "MLE":
    fig.savefig("../results/"+ data_name +"/MLE/"+"Out of Sample Hedging Effectiveness.png", transparent=True)    
elif calibration_method == "MM":
    fig.savefig("../results/"+ data_name +"/MM/"+"Out of Sample Hedging Effectiveness.png", transparent=True)