# Model confidence set (MCS) GARCH
In this notebook we compute the Model confidence set of each cryptocurrencies' models


In [1]:
# import libraries
import numpy as np
import pandas as pd
import os
import statsmodels.api as sm
from arch.bootstrap import MCS

In [2]:
import numpy as np
from numpy.random import rand
from numpy import ix_
import pandas as pd
np.random.seed(1337)

In [3]:
# Global variables
btc_path = './mcs/data/models/TS/BTC/MCS/'
eos_path = './mcs/data/models/TS/EOS/MCS/'
eth_path = './mcs/data/models/TS/ETH/MCS/'
iota_path = './mcs/data/models/TS/IOTA/MCS/'

In [4]:
crypto_list = [btc_path,eos_path,eth_path,iota_path]

# We perform the MCS algorithm

In [1]:
def bootstrap_sample(data, B, w):
    '''
    Bootstrap the input data
    data: input numpy data array
    B: boostrap size
    w: block length of the boostrap
    Retrieved from : https://michael-gong.com/blogs/model-confidence-set/
    '''
    t = len(data)
    p = 1 / w
    indices = np.zeros((t, B), dtype=int)
    indices[0, :] = np.ceil(t * rand(1, B))
    select = np.asfortranarray(rand(B, t).T < p)
    vals = np.ceil(rand(1, np.sum(np.sum(select))) * t).astype(int)
    indices_flat = indices.ravel(order="F")
    indices_flat[select.ravel(order="F")] = vals.ravel()
    indices = indices_flat.reshape([B, t]).T
    for i in range(1, t):
        indices[i, ~select[i, :]] = indices[i - 1, ~select[i, :]] + 1
    indices[indices > t] = indices[indices > t] - t
    indices -= 1
    return data[indices]


def compute_dij(losses, bsdata):
    '''Compute the loss difference'''
    t, M0 = losses.shape
    B = bsdata.shape[1]
    dijbar = np.zeros((M0, M0))
    for j in range(M0):
        dijbar[j, :] = np.mean(losses - losses[:, [j]], axis=0)

    dijbarstar = np.zeros((B, M0, M0))
    for b in range(B):
        meanworkdata = np.mean(losses[bsdata[:, b], :], axis=0)
        for j in range(M0):
            dijbarstar[b, j, :] = meanworkdata - meanworkdata[j]

    vardijbar = np.mean((dijbarstar - np.expand_dims(dijbar, 0)) ** 2, axis=0)
    vardijbar += np.eye(M0)

    return dijbar, dijbarstar, vardijbar


def calculate_PvalR(z, included, zdata0):
    '''Calculate the p-value of relative algorithm'''
    empdistTR = np.max(np.max(np.abs(z), 2), 1)
    zdata = zdata0[ix_(included - 1, included - 1)]
    TR = np.max(zdata)
    pval = np.mean(empdistTR > TR)
    
    return pval


def calculate_PvalSQ(z, included, zdata0):
    '''Calculate the p-value of sequential algorithm'''
    empdistTSQ = np.sum(z ** 2, axis=1).sum(axis=1) / 2
    zdata = zdata0[ix_(included - 1, included - 1)]
    TSQ = np.sum(zdata ** 2) / 2
    pval = np.mean(empdistTSQ > TSQ)
    return pval


def iterate(dijbar, dijbarstar, vardijbar, alpha, algorithm="R"):
    '''Iteratively excluding inferior model'''
    B, M0, _ = dijbarstar.shape
    z0 = (dijbarstar - np.expand_dims(dijbar, 0)) / np.sqrt(
        np.expand_dims(vardijbar, 0)
    )
    zdata0 = dijbar / np.sqrt(vardijbar)

    excludedR = np.zeros([M0, 1], dtype=int)
    pvalsR = np.ones([M0, 1])
    pval_record = np.ones([M0, 1])

    for i in range(M0 - 1):
        included = np.setdiff1d(np.arange(1, M0 + 1), excludedR)
        m = len(included)
        z = z0[ix_(range(B), included - 1, included - 1)]

        if algorithm == "R":
            pvalsR[i] = calculate_PvalR(z, included, zdata0)
        elif algorithm == "SQ":
            pvalsR[i] = calculate_PvalSQ(z, included, zdata0)

        scale = m / (m - 1)
        dibar = np.mean(dijbar[ix_(included - 1, included - 1)], 0) * scale
        dibstar = np.mean(dijbarstar[ix_(range(B), included - 1, included - 1)], 1) * (
            m / (m - 1)
        )
        vardi = np.mean((dibstar - dibar) ** 2, axis=0)
        t = dibar / np.sqrt(vardi)
        modeltoremove = np.argmax(t)
        model_to_remove.append(included[modeltoremove])
        excludedR[i] = included[modeltoremove]

    maxpval = pvalsR[0]
    for i in range(1, M0):
        if pvalsR[i] < maxpval:
            pvalsR[i] = maxpval
        else:
            maxpval = pvalsR[i]

    excludedR[-1] = np.setdiff1d(np.arange(1, M0 + 1), excludedR)
    pl = np.argmax(pvalsR > alpha)
    includedR = excludedR[pl:]
    excludedR = excludedR[:pl]
    return includedR - 1, excludedR - 1, pvalsR


def MCS(losses, alpha, B, w, algorithm):
    '''Main function of the MCS'''
    t, M0 = losses.shape
    bsdata = bootstrap_sample(np.arange(t), B, w)
    dijbar, dijbarstar, vardijbar = compute_dij(losses, bsdata)
    includedR, excludedR, pvalsR = iterate(dijbar, dijbarstar, vardijbar, alpha, algorithm=algorithm)
    return includedR, excludedR, pvalsR

In [6]:
class ModelConfidenceSet(object):
    def __init__(self, data, alpha, B, w, algorithm="SQ", names=None):
        """
        Implementation of Econometrica Paper:
        Hansen, Peter R., Asger Lunde, and James M. Nason. "The model confidence set." Econometrica 79.2 (2011): 453-497.
        Retrieved from https://michael-gong.com/blogs/model-confidence-set/
        Input:
            data->pandas.DataFrame or numpy.ndarray: input data, columns are the losses of each model 
            alpha->float: confidence level
            B->int: bootstrap size for computation covariance
            w->int: block size for bootstrap sampling
            algorithm->str: SQ or R, SQ is the first t-statistics in Hansen (2011) p.465, and R is the second t-statistics
            names->list: the name of each model (corresponding to each columns). 

        Method:
            run(self): compute the MCS procedure

        Attributes:
            included: models that are in the model confidence sets at confidence level of alpha
            excluded: models that are NOT in the model confidence sets at confidence level of alpha
            pvalues: the bootstrap p-values of each models
        """

        if isinstance(data, pd.DataFrame):
            self.data = data.values
            self.names = data.columns.values if names is None else names
        elif isinstance(data, np.ndarray):
            self.data = data
            self.names = np.arange(data.shape[1]) if names is None else names

        if alpha < 0 or alpha > 1:
            raise ValueError(
                f"alpha must be larger than zero and less than 1, found {alpha}"
            )
        if not isinstance(B, int):
            try:
                B = int(B)
            except Exception as identifier:
                raise RuntimeError(
                    f"Bootstrap size B must be a integer, fail to convert", identifier
                )
        if B < 1:
            raise ValueError(f"Bootstrap size B must be larger than 1, found {B}")
        if not isinstance(w, int):
            try:
                w = int(w)
            except Exception as identifier:
                raise RuntimeError(
                    f"Bootstrap block size w must be a integer, fail to convert",
                    identifier,
                )
        if w < 1:
            raise ValueError(f"Bootstrap block size w must be larger than 1, found {w}")

        if algorithm not in ["R", "SQ"]:
            raise TypeError(f"Only R and SQ algorithm supported, found {algorithm}")

        self.alpha = alpha
        self.B = B
        self.w = w
        self.algorithm = algorithm

    def run(self):
        included, excluded, pvals= MCS(
            self.data, self.alpha, self.B, self.w, self.algorithm
        )

        self.included = self.names[included].ravel().tolist()
        self.excluded = self.names[excluded].ravel().tolist()
        self.pvalues = pd.Series(pvals.ravel(), index=self.excluded + self.included)
        
        return self

In [46]:
for each in crypto_list: 
    model_to_remove = []
    crypto = each.split("/")[4]
    filename = f"{crypto}-ts-models.csv"
    print(f"Processing {crypto}")
    df_main = pd.read_csv(os.path.join(each,filename), index_col=0).iloc[3:,:]
    df_main.to_csv(f'./mcs/losses/ts/{crypto}.csv')
    print("shape",df_main.shape)
    mcs = ModelConfidenceSet(df_main, 0.1, 30, len(df_main.index)//20 , algorithm="SQ").run()
    print(f"MCS included {mcs.included}")
    print(mcs.pvalues)
    df_results = pd.DataFrame({"rank": range(1,len(mcs.pvalues)+1),"mcs.pvalues":mcs.pvalues.values[::-1]}, index=mcs.pvalues.index[::-1])
    df_results.to_csv(f"./mcs/results/ts-{crypto}.csv")
    print(df_results.head())
    print(model_to_remove)

Processing BTC
shape (377, 81)
MCS included ['BTC-model-56']
BTC-model-52    0.000000
BTC-model-43    0.000000
BTC-model-34    0.000000
BTC-model-44    0.000000
BTC-model-35    0.000000
                  ...   
BTC-model-71    0.000000
BTC-model-55    0.000000
BTC-model-62    0.033333
BTC-model-59    0.033333
BTC-model-56    1.000000
Length: 81, dtype: float64
              rank  mcs.pvalues
BTC-model-56     1     1.000000
BTC-model-59     2     0.033333
BTC-model-62     3     0.033333
BTC-model-55     4     0.000000
BTC-model-71     5     0.000000
[53, 44, 35, 45, 36, 54, 50, 41, 32, 51, 70, 33, 67, 42, 79, 76, 58, 61, 64, 73, 55, 34, 52, 43, 38, 29, 47, 31, 48, 49, 30, 21, 40, 39, 24, 27, 20, 23, 26, 19, 22, 46, 28, 25, 37, 12, 15, 18, 3, 11, 6, 9, 14, 17, 2, 5, 8, 10, 13, 16, 1, 4, 7, 65, 68, 71, 62, 74, 59, 77, 80, 75, 81, 78, 69, 66, 72, 56, 63, 60]
Processing EOS
shape (123, 81)




MCS included ['EOS-model-73']
EOS-model-75    0.000000
EOS-model-69    0.000000
EOS-model-30    0.000000
EOS-model-33    0.000000
EOS-model-27    0.000000
                  ...   
EOS-model-65    0.033333
EOS-model-80    0.033333
EOS-model-64    0.033333
EOS-model-74    0.033333
EOS-model-73    1.000000
Length: 81, dtype: float64
              rank  mcs.pvalues
EOS-model-73     1     1.000000
EOS-model-74     2     0.033333
EOS-model-64     3     0.033333
EOS-model-80     4     0.033333
EOS-model-65     5     0.033333
[76, 70, 31, 34, 28, 43, 40, 49, 46, 52, 37, 61, 41, 50, 54, 53, 67, 45, 51, 42, 55, 58, 79, 64, 47, 48, 44, 38, 39, 32, 29, 35, 36, 30, 33, 73, 3, 6, 9, 2, 5, 1, 8, 4, 7, 10, 13, 16, 11, 14, 17, 21, 24, 27, 12, 20, 23, 15, 26, 18, 19, 22, 25, 78, 77, 72, 69, 60, 63, 57, 71, 68, 59, 56, 62, 80, 66, 81, 65, 75]
Processing ETH
shape (239, 81)
MCS included ['ETH-model-58', 'ETH-model-56', 'ETH-model-55']
ETH-model-78    0.000000
ETH-model-63    0.000000
ETH-model-69    0.000



# we compute all models results

In [47]:
import pandas as pd
import numpy as np
import requests as rq
import matplotlib.pyplot as plt
from libraries.Cryptov2 import Cryptocurrency
from arch.univariate import arch_model

In [48]:
df_BTC = pd.read_csv("./mcs/models/ts/fits/BTC-results.csv", index_col=0)
df_EOS = pd.read_csv("./mcs/models/ts/fits/EOS-results.csv", index_col=0)
df_ETH = pd.read_csv("./mcs/models/ts/fits/ETH-results.csv", index_col=0)
df_IOTA = pd.read_csv("./msc/models/ts/fits/IOTA-results.csv", index_col=0)

In [49]:
all_cryptos = {
    "BTC":df_BTC,
    "ETH":df_ETH,
    "EOS":df_EOS,
    "IOTA":df_IOTA,
}

In [50]:
def get_ann(x, arg):
    my_arg_list = x.split('-')
    if len(my_arg_list) > 4:
        return(my_arg_list[arg])
    else:
        return("-") 

In [51]:
all_results = [each for each in os.listdir('./mcs/results/') if (each.startswith('ts')) & (len(each.split('-'))<=2)]

In [52]:
all_results

['ts-EOS.csv', 'ts-ETH.csv', 'ts-BTC.csv', 'ts-IOTA.csv']

In [53]:
data_frame = pd.read_csv(f'./mcs/results/{all_results[3]}')
data_frame

Unnamed: 0.1,Unnamed: 0,rank,mcs.pvalues
0,IOTA-model-8,1,1.0
1,IOTA-model-5,2,0.0
2,IOTA-model-2,3,0.0
3,IOTA-model-7,4,0.0
4,IOTA-model-4,5,0.0
...,...,...,...
76,IOTA-model-44,77,0.0
77,IOTA-model-53,78,0.0
78,IOTA-model-34,79,0.0
79,IOTA-model-43,80,0.0


In [54]:
def get_model_data(all_cryptos, crypto, model_fullname):
    model_num = model_fullname.split('-')[len(model_fullname.split('-'))-1]
    return(all_cryptos[crypto].iloc[int(model_num),:])

In [56]:
for each in all_results:
    crypto = each.split('-')[1].split('.')[0]
    data_frame = pd.read_csv(f'./mcs/results/{each}')
    
    model_number = data_frame.iloc[:,0].apply(lambda x :x)
    data_frame['TS Model'] = data_frame.iloc[:,0].apply(lambda x: get_model_data(all_cryptos, crypto,x)[0])
    data_frame['Distribution'] = data_frame.iloc[:,0].apply(lambda x: get_model_data(all_cryptos, crypto,x)[2])
    data_frame['p'] = data_frame.iloc[:,0].apply(lambda x: get_model_data(all_cryptos, crypto,x)[3])
    data_frame['q'] = data_frame.iloc[:,0].apply(lambda x: get_model_data(all_cryptos, crypto,x)[4])
    data_frame['mcs.pvalues'] = data_frame['mcs.pvalues'].apply(lambda x: round(x,2))
    data_frame = data_frame[['rank','TS Model','p','q','Distribution','mcs.pvalues']]
    data_frame.columns = ['Rank','Model','p','q','Dist.','MCS p val']
    data_frame.head(5).to_csv(f'./mcs/results/ts-{crypto}-top-5-formatted.csv', index=False)