In [1]:
# set up imports
import pandas as pd, numpy as np, seaborn as sns
from tabulate import tabulate
import statsmodels
import arch
import matplotlib
matplotlib.use('qt5agg')

# configure plot style
import matplotlib.pyplot as plt
plt.rcParams['mathtext.fontset'] = 'stix'
plt.rcParams['font.family'] = 'STIXGeneral'
plt.rcParams["figure.figsize"] = (9.5,4.15)
plt.rcParams['figure.constrained_layout.use'] = False
plt.rcParams['figure.dpi'] = 100
plt.rcParams['savefig.dpi'] = 100
plt.rcParams['lines.linewidth'] = 0.8
save_plot_to =  r'C:\\Users\joche\OneDrive\03 TUM - TUM-BWL\Semester 8\01 Bachelorarbeit\04 Results\Plots/'

In [2]:
# load data and crop to start_date:end_date
start_date = '2015-08-07'
end_date = '2020-06-26'
idx = pd.date_range(start_date, end_date)
index_name = 'date'
mydateparser = lambda x: pd.datetime.strptime(str(x), '%Y-%m-%d %H:%M:%S')
mydateparser1 = lambda x: pd.datetime.strptime(str(x), '%Y-%m-%d')

# btc
btc = pd.read_excel('Data/BTC_closing.xlsx', parse_dates=[0], index_col=0, date_parser=mydateparser)[start_date:end_date]
btc.index.name = index_name
btc.columns = ['btc']

# usd_eur
usd_eur = pd.read_excel('Data/DEXUSEU.xls', parse_dates=[0], index_col=0, skiprows=10, date_parser=mydateparser)[start_date:end_date]
usd_eur.index.name = index_name
usd_eur.columns = ['usd_eur']
usd_eur = usd_eur.loc[(usd_eur!=0).any(1)]

# tot_btc: only weekly data - missing values interpolated
tot_btc = pd.read_csv('Data/total-bitcoins.txt', index_col=0)[start_date:end_date]
tot_btc.index = pd.DatetimeIndex(tot_btc.index, normalize=True).normalize()
tot_btc.index.name = index_name
tot_btc = tot_btc.reindex(idx, fill_value=None)
tot_btc.interpolate(method='time', inplace=True, limit_direction='both')
tot_btc.columns = ['tot_btc']

# hs_rate: only weekly data - missing values interpolated
hs_rate = pd.read_csv('Data/hash-rate.txt', index_col=0)[start_date:end_date]
hs_rate.index = pd.DatetimeIndex(hs_rate.index, normalize=True).normalize()
hs_rate.index.name = index_name
hs_rate = hs_rate.reindex(idx, fill_value=None)
hs_rate.interpolate(method='time', inplace=True, limit_direction='both')
hs_rate.columns = ['hs_rate']

# eth
eth = pd.read_excel('Data/ETH.xlsx', parse_dates=[0], index_col=0, date_parser=mydateparser, usecols='A,E')[start_date:end_date]
eth.index.name = index_name
eth.columns = ['eth']

# ggl_trends: only weekly data - missing values interpolated
# ggl_trends: weighted average of 15 countries
ggl_trends = pd.read_csv('Data/googletrends.txt', index_col=0)[start_date:end_date]
ggl_trends.index = pd.DatetimeIndex(ggl_trends.index, normalize=True).normalize()
ggl_trends.index.name = index_name
ggl_trends = ggl_trends.reindex(idx, fill_value=None)
ggl_trends.interpolate(method='time', inplace=True, limit_direction='both')
ggl_trends['btc-average'] = np.round(ggl_trends.sum(axis=1)/15)
ggl_trends = ggl_trends[['btc-average']].copy()
ggl_trends.columns = ['ggl_trends']

# wiki_views: sum of 99 countries
wiki_views = pd.read_excel('Data/wikipedia.xlsx', parse_dates=[0], index_col=0, date_parser=mydateparser1)[start_date:end_date]
wiki_views.index = pd.DatetimeIndex(wiki_views.index, normalize=True).normalize()
wiki_views.index.name = index_name
wiki_views['wiki-total'] = np.round(wiki_views.sum(axis=1))
wiki_views = wiki_views[['wiki-total']].copy()
wiki_views.columns = ['wiki_views']

# wti_oil
oil_wti = pd.read_excel('Data/DCOILWTICO.xls', parse_dates=[0], index_col=0, skiprows=10, date_parser=mydateparser)[start_date:end_date]
oil_wti.index = pd.DatetimeIndex(oil_wti.index, normalize=True).normalize()
oil_wti.index.name = index_name
oil_wti.columns = ['oil_wti']
oil_wti = oil_wti.loc[(oil_wti!=0).any(1)]

# gold
gold = pd.read_excel('Data/GOLDAMGBD228NLBM10AM.xls', parse_dates=[0], index_col=0, skiprows=10, date_parser=mydateparser)[start_date:end_date]
gold.index = pd.DatetimeIndex(gold.index, normalize=True).normalize()
gold.index.name = index_name
gold.columns = ['gold']
gold = gold.loc[(gold!=0).any(1)]

# sp500
sp500 = pd.read_excel('Data/SP500.xls', parse_dates=[0], index_col=0, skiprows=10, date_parser=mydateparser)[start_date:end_date]
sp500.index = pd.DatetimeIndex(sp500.index, normalize=True).normalize()
sp500.index.name = index_name
sp500.columns = ['sp500']
sp500 = sp500.loc[(sp500!=0).any(1)]

# sse 
sse = pd.read_excel('Data/SSEcomposite.xlsx', parse_dates=[0], index_col=0, header=0, date_parser=mydateparser)[start_date:end_date]
sse.index = pd.DatetimeIndex(sse.index, normalize=True).normalize()
sse.index.name = index_name
sse = sse[['Zuletzt']].copy()
sse.columns = ['sse']

# ffd_rate
ffd_rate = pd.read_excel('Data/DFF.xls', parse_dates=[0], index_col=0, skiprows=10, date_parser=mydateparser)[start_date:end_date]
ffd_rate.index = pd.DatetimeIndex(ffd_rate.index, normalize=True).normalize()
ffd_rate.index.name = index_name
ffd_rate.columns = ['ffd_rate']

In [3]:
# merge data to one df (inner join)
from functools import reduce
temp = [btc,tot_btc,hs_rate,eth,ggl_trends,wiki_views,usd_eur,oil_wti,
              gold,sp500,sse,ffd_rate]
df = reduce(lambda left, right: pd.merge(left, right, left_index=True, right_index=True), temp)
df

Unnamed: 0_level_0,btc,tot_btc,hs_rate,eth,ggl_trends,wiki_views,usd_eur,oil_wti,gold,sp500,sse,ffd_rate
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2015-08-07,279.58,1.447762e+07,3.775640e+05,2.770000,2.0,15560,1.0958,43.87,1091.35,2077.57,3744.20,0.14
2015-08-10,264.47,1.448130e+07,3.687855e+05,0.708448,2.0,15113,1.0994,44.94,1094.80,2104.18,3928.42,0.14
2015-08-11,270.39,1.448498e+07,3.754390e+05,1.070000,2.0,15320,1.1042,43.11,1113.25,2084.07,3927.91,0.15
2015-08-12,266.38,1.448865e+07,3.820926e+05,1.220000,2.0,15255,1.1198,43.22,1116.80,2086.05,3886.32,0.15
2015-08-13,264.08,1.449229e+07,3.887461e+05,1.830000,2.0,23711,1.1144,42.27,1117.35,2083.39,3954.56,0.15
...,...,...,...,...,...,...,...,...,...,...,...,...
2020-06-18,9411.84,1.840797e+07,1.090229e+08,232.100000,10.0,16234,1.1216,38.79,1732.65,3115.34,2939.32,0.09
2020-06-19,9288.02,1.840862e+07,1.074749e+08,227.140000,10.0,15721,1.1189,39.72,1728.55,3097.74,2967.63,0.09
2020-06-22,9648.72,1.841192e+07,1.074657e+08,242.530000,10.0,16482,1.1260,40.60,1745.45,3117.86,2965.27,0.08
2020-06-23,9629.66,1.841279e+07,1.082351e+08,244.140000,10.0,16216,1.1322,40.40,1756.60,3131.29,2970.62,0.08


In [4]:
# split data in pre and post bubble 2018
pre_bubble_end_date = '2017-12-01'
post_bubble_start_date = '2018-02-01'

# pre bubble df
pre_df = df[:pre_bubble_end_date]

# post bubble df
post_df = df[post_bubble_start_date:]

In [None]:
# save btc_new.txt dataset with 1112 observations for use in M1-M3
def save_btcnew():
    btc_cropped = df['btc']
    btc_cropped.to_csv(path_or_buf=save_plot_to+'btc_new.txt')

# save_btcnew()

In [5]:
### RUN TO DEFINE ALL FUNCTIONS ###

# define overview-printing function of dataframe
def dates_overview(dataframe):
    print('start_date:\t', dataframe.index[0])
    print('end_date:\t', dataframe.index[-1])
    print('len df:\t\t', len(dataframe))
    
# define plotting function of dataframe
import matplotlib.dates as mdates
def plot_df(dataframe):
    fig_i, axs = plt.subplots(4,3, figsize=(9.5,4.15))
    for i, ax in enumerate(axs.flatten()):
        data = dataframe[dataframe.columns[i]]
        ax.plot(data, color='black')
        ax.set_title(dataframe.columns[i])
        ax.xaxis.set_ticks_position('none')
        ax.yaxis.set_ticks_position('none')
        ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
        ax.xaxis.set_minor_formatter(mdates.DateFormatter("%Y-%m"))
        #ax.spines['top'].set_alpha(0)
        ax.tick_params(labelsize=8)
    plt.tight_layout()
    return fig_i

# define log-taking and relabeling function
def log_of_df(dataframe):
    df_log = np.log(dataframe)
    new_cols = list()
    for i in df_log.columns:
        new_cols.append(i+'_log')
    df_log.columns = new_cols
    # fill na value of negative oil price on 2020-04-20 with 0
    df_log.fillna(value=0, inplace=True)
    return df_log

# define correlation-table generating function of dataframe
def corr_table_aslatex_of_df(dataframe):
    corr = dataframe.corr().round(3)
    print(tabulate(corr, headers=corr.columns, showindex=True, tablefmt="latex"))

# define heatmap-generating function of dataframe
def heatmap_corr_of_df(dataframe):
    corr = dataframe.corr()
    # more html colors here: https://www.w3schools.com/colors/colors_names.asp
    # pal = sns.light_palette('lightgrey', as_cmap=True)
    ax = sns.heatmap(corr, xticklabels=corr.columns.values, yticklabels=corr.columns.values, annot=True, 
            annot_kws={'size':7}, vmin=-1, center=0, vmax=1, cmap="YlGnBu")
    bottom, top = ax.get_ylim()
    ax.set_ylim(bottom + 0.5, top - 0.5)
    fig_3 = plt.gcf()
    fig_3.set_size_inches(9.5,4.15)
    plt.tick_params(axis='both', which='major', labelsize=7.5, labelbottom = False, bottom=False, top = False, labeltop=True)
    plt.xticks(fontsize=7.5, rotation=1)
    plt.yticks(fontsize=7.5)
    
# define train/test-splitting function of dataframe
def split_traintest_df(dataframe):
    train_size = int(len(dataframe) * 0.8)
    df_train, df_test = dataframe[0:train_size], dataframe[train_size:]
    # create dataframe for printout
    data = {'Dataframe': ['dataframe', 'df_train', 'df_test'],
            'date_start': [dataframe.index[0], df_train.index[0], df_test.index[0]],
            'date_end': [dataframe.index[-1], df_train.index[-1], df_test.index[-1]],
            'nobs': [len(dataframe), len(df_train), len(df_test)]}
    df_print = pd.DataFrame(data, columns=['Dataframe','date_start','date_end','nobs'])
    print(df_print)
    return df_train, df_test

# define adf- and pp-testing of dataframe with latex-printout on/off
from arch.unitroot import ADF, PhillipsPerron
def stationarity_tests(dataframe, latex):
    for col in dataframe:
        adf = ADF(dataframe[col])
        pp = PhillipsPerron(dataframe[col])
        if latex is False:
            # write summary as plain text to std.out
            print('Timeseries:\t',col,'\n',
                  adf.summary(),'\n\n',pp.summary(),'\n\n\n')
        else:
            # write summary as latex to file
            with open(save_plot_to + 'Stationarity_Tests_LaTeX.txt', 'a') as myfile:
                myfile.write('Timeseries:\t'+col+'\n'
                         +adf.summary().as_latex()+'\n\n'
                         +pp.summary().as_latex()+'\n\n\n')

# define first difference-taking function of dataframe
def diff_of_df(dataframe):
    df_train_log_diff = df_train_log.diff()
    # relabel columns
    new_cols = list()
    for i in df_train_log_diff.columns:
        new_cols.append(i+'_diff')
    df_train_log_diff.columns = new_cols
    return df_train_log_diff

# define stationaritiy table-generating function of a dataframe filepath
def stationarity_table_aslatex_from_df(filepath):
    # read in dataframe from .txt file
    stationarity_dataframe = pd.read_csv(save_plot_to+filepath, delimiter='\s+', header=0)
    # print dataframe as latex output
    print(tabulate(stationarity_dataframe, headers=stationarity_dataframe.columns, showindex=False, tablefmt="latex"))

# define granger causality test performing function of a dataframe
# code taken from: https://stackoverflow.com/questions/58005681/is-it-possible-to-run-a-vector-autoregression-analysis-on-a-large-gdp-data-with
from statsmodels.tsa.stattools import grangercausalitytests
maxlag=15
test = 'ssr-chi2test'
def grangers_causality_matrix(X_train, variables, test = 'ssr_chi2test', verbose=False):
    dataset = pd.DataFrame(np.zeros((len(variables), len(variables))), columns=variables, index=variables)
    for c in dataset.columns:
        for r in dataset.index:
            test_result = grangercausalitytests(X_train[[r,c]], maxlag=maxlag, verbose=False)
            p_values = [round(test_result[i+1][0][test][1],4) for i in range(maxlag)]
            if verbose: print(f'Y = {r}, X = {c}, P Values = {p_values}')
            min_p_value = np.min(p_values)
            dataset.loc[r,c] = min_p_value
    dataset.columns = [var + '_x' for var in variables]
    dataset.index = [var + '_y' for var in variables]
    return dataset

# define granger causality table generating function from dataframe
def grangercausality_table_aslatex_of_df(dataframe):
    print(tabulate(dataframe, headers=dataframe.columns, showindex=True, tablefmt="latex"))

# define grangercausality heatmap generating function of a dataframe
def grangercausality_heatmap_of_df(dataframe):
    ax = sns.heatmap(dataframe, xticklabels=dataframe.columns.values, yticklabels=dataframe.index.values,
                 annot=True, annot_kws={'size':7}, vmin=0, vmax=1, cmap="YlGnBu_r")
    bottom, top = ax.get_ylim()
    ax.set_ylim(bottom + 0.5, top - 0.5)
    fig = plt.gcf()
    fig.set_size_inches(9.5,4.15)
    plt.tick_params(axis='both', which='major', labelsize=7, labelbottom = False, bottom=False, top = False, labeltop=True)
    plt.xticks(fontsize=7, rotation=45)
    plt.yticks(fontsize=7)
    plt.show()

# define VAR-order selecting function of dataframe
from statsmodels.tsa.vector_ar.var_model import VAR
def select_VAR_order(dataframe):
    var_model = VAR(endog=dataframe)
    var_order_res = var_model.select_order(15).summary()
    res_as_html = var_order_res.as_html()
    df_var_order = pd.read_html(res_as_html, header=0, index_col=0)[0]
    return df_var_order

# define VAR-order latex-table-generating function of a var_order
def var_order_aslatex_of_order(var_order):
    print(tabulate(var_order, headers=var_order.columns, showindex=True, tablefmt="latex"))

# define VAR-model-fitting function of a dataframe and a lag order
def fit_VAR_model_of_df(dataframe, order):
    var_model = VAR(endog=dataframe)
    var_fitted = var_model.fit(maxlags = order)
    return var_fitted


# transform var_fitted into data frame
# code taken from: https://stackoverflow.com/questions/51734180/converting-statsmodels-summary-object-to-pandas-dataframe
def results_summary_to_dataframe(results):
    '''take the result of an statsmodel results table and transforms it into a dataframe'''
    pvals = results.pvalues
    coeff = results.params
    conf_lower = results.conf_int()[0]
    conf_higher = results.conf_int()[1]

    results_df = pd.DataFrame({"pvals":pvals,
                               "coeff":coeff,
                               "conf_lower":conf_lower,
                               "conf_higher":conf_higher
                                })
    #Reordering
    results_df = results_df[["coeff","pvals","conf_lower","conf_higher"]]
    return results_df

# define dataframe generating function from .txt file at filepath
def var_coeffs_aslatex_from_txt(filepath):
    # read in df from textfile
    var_btc_coefficients = pd.read_csv(save_plot_to+filepath, delimiter='\s+', header=0)
    var_btc_coefficients.set_index(var_btc_coefficients.columns[0], inplace=True)
    # generate latex output
    print(tabulate(var_btc_coefficients.round(4), headers=var_btc_coefficients.columns, showindex=True, tablefmt="latex"))

# define rolling forecast function
from sklearn.metrics import mean_squared_error as mse
def var_rolling_forecast(dataframe, lag_order):
    # create train and test dataframes
    train_size = int(len(dataframe) * 0.8)
    dataframe_train, dataframe_test = dataframe[0:train_size], dataframe[train_size:]
    index = len(dataframe_train) - 1
    # initialize lists
    pred_val_btc = list()
    lo_conf_int_btc = list()
    up_conf_int_btc = list()
    ausreisser_ctr = 0
    # rolling forecast
    for i in range(len(dataframe_test)):
        # input data, dependent on i
        input_data = log_of_df(dataframe).diff().values[lag_order+1:index + i]
        #input_data = np.log(dataframe).diff().values[lag_order+1:index + i]
        # fit model and predict 1 step
        dataframe_var = VAR(endog=input_data)
        dataframe_var_fitted = dataframe_var.fit(maxlags=lag_order)
        dataframe_var_result = dataframe_var_fitted.forecast_interval(y=input_data, steps=1, alpha=0.05)
        # obtain absolute (inversed) btc values for mean prediction, upper- and lower confidence interval
        yhat_btc = np.exp(dataframe_var_result[0][0][0] + np.log(dataframe).iloc[:,0][index+i])
        lo_conf_btc_val = np.exp(dataframe_var_result[1][0][0] + np.log(dataframe).iloc[:,0][index+i])
        up_conf_btc_val = np.exp(dataframe_var_result[2][0][0] + np.log(dataframe).iloc[:,0][index+i])
        if (dataframe.iloc[:,0][index+i+1] > up_conf_btc_val) or (dataframe.iloc[:,0][index+i+1] < lo_conf_btc_val):
            ausreisser_ctr += 1
        pred_val_btc.append(yhat_btc)
        lo_conf_int_btc.append(lo_conf_btc_val)
        up_conf_int_btc.append(up_conf_btc_val)
    # return [0]: mse, [1]: ausreisser ctr, [2]: predictions, [3]: lo_confint, [4]: up_confint
    return (np.sqrt(mse(pred_val_btc, dataframe_test.iloc[:,0].values)), ausreisser_ctr,
            pred_val_btc, lo_conf_int_btc, up_conf_int_btc)

# define series-generating and plotting function for dataframe_var_pred_result
def plot_var_pred_result(dataframe, dataframe_train, dataframe_test, dataframe_var_pred_result):
    # make series for plotting pred. vs. actual
    index_pred = np.arange(len(dataframe_train) + 1, len(dataframe) + 1)
    pred_val_btc_series = pd.Series(dataframe_var_pred_result[2], index=index_pred)
    test_series_btc = pd.Series(dataframe_test.iloc[:,0].values, index=index_pred)
    lo_conf_int_btc_series = pd.Series(dataframe_var_pred_result[3], index=index_pred)
    up_conf_int_btc_series = pd.Series(dataframe_var_pred_result[4], index=index_pred)
    # create plot
    fig = plt.plot(dataframe_train.iloc[:,0].values, label= r'$X_T$', color='black')
    plt.plot(test_series_btc, label= r'$X_{T+h}$', color='green')
    plt.plot(pred_val_btc_series, label= r'$\hat{X}_{T+h}$', color= 'red')
    plt.fill_between(lo_conf_int_btc_series.index, lo_conf_int_btc_series, up_conf_int_btc_series, color='k', alpha=0.1)
    plt.legend(loc='upper left')
    plt.title('Einschrittprognose btc VAR(5)')
    return fig


In [8]:
# overview of df
dates_overview(df)

In [9]:
# plot df
fig_1 = plot_df(df)

In [6]:
# take log of df
df_log = log_of_df(df)



In [16]:
# plot log time series
fig_2 = plot_df(df_log)

In [21]:
# correlation matrix of df
corr_table_aslatex_of_df(df)

In [23]:
# correlation heatmap of df
heatmap_corr_of_df(df)

In [7]:
# split df in df_train and df_test
df_train, df_test = split_traintest_df(df)

   Dataframe date_start   date_end  nobs
0  dataframe 2015-08-07 2020-06-24  1112
1   df_train 2015-08-07 2019-07-01   889
2    df_test 2019-07-02 2020-06-24   223


In [12]:
# take log of df_train
df_train_log = log_of_df(df_train)

In [48]:
# plot df_train_log
fig_4 = plot_df(df_train_log)

In [47]:
# adf- and pp-testing of df_train_log - no latex printout
stationarity_tests(df_train_log, latex=False)

In [13]:
# take first difference of df_train_log
df_train_log_diff = diff_of_df(df_train_log)
df_train_log_diff

In [53]:
# plot df_train_log_diff
fig_5 = plot_df(df_train_log_diff[1:])

In [55]:
# adf- and pp-testing of df_train_log_diff - no latex printout
stationarity_tests(df_train_log_diff[1:], latex=False)
    

In [68]:
# plot stationarity table of df
stationarity_table_aslatex_from_df('M4_stationarity tests.txt')

In [70]:
# perform granger causality test each on each based on df_train_log_diff
granger_causality_matrix = grangers_causality_matrix(df_train_log_diff[1:], variables = df_train_log_diff.columns)
granger_causality_matrix.round(4)

In [73]:
# generate granger causality table as latex output
grangercausality_table_aslatex_of_df(granger_causality_matrix)

In [76]:
# generate granger causality heatmap of df_train_log_diff
grangercausality_heatmap_of_df(granger_causality_matrix)

In [15]:
# select VAR-order for df_train_log_diff: 5
df_var_order = select_VAR_order(df_train_log_diff[1:])
df_var_order

In [19]:
# generate var_order latex output from df_var_order
var_order_aslatex_of_order(df_var_order)

In [27]:
# obtain VAR-model for df_train_log_diff and lag order = 5
df_var = fit_VAR_model_of_df(df_train_log_diff[1:], 5)
df_var.summary()
# -> Copy summary output and save as .txt

In [35]:
# read in and plot var_coeffs from textfile: 'VAR_coefficients_df.txt'
var_coeffs_aslatex_from_txt('VAR_coefficients_df.txt')

In [52]:
# IRF: impulse response of df_var
from statsmodels.tsa.vector_ar.irf import IRAnalysis
df_irf = df_var.irf(15)
df_irf.plot(orth=False, impulse='ggl_trends_log_diff' , response='btc_log_diff')

In [55]:
# IRF: cumulative impulse responses of df_var
df_irf.plot_cum_effects(orth=True, response='btc_log_diff')
    

In [206]:
# rolling forecast: obtain mse, ausreisser, predictions, lo-& up confint for df with lag order = 5
df_var_pred_result = var_rolling_forecast(df, 5)

# printout results
print('mse:\t\t', df_var_pred_result[0])
print('ausreisser:\t', df_var_pred_result[1])

In [204]:
# rolling forecast: create series and plot them for df_var_pred_result
fig_6 = plot_var_pred_result(df,df_train,df_test,df_var_pred_result)

In [94]:
# define cointegration matrix generating function of dataframe
from statsmodels.tsa.vector_ar.vecm import coint_johansen
result = coint_johansen(df_train, det_order=-1, k_ar_diff=1)
result.eig
# -> eig[0] (btc) is largest

array([4.58374448e-01, 2.39180874e-01, 9.70735726e-02, 6.17251342e-02,
       4.98199251e-02, 3.39128620e-02, 2.64646469e-02, 2.31061207e-02,
       1.42608074e-02, 1.11329201e-02, 6.52536036e-03, 4.12973213e-04])

In [95]:
# check eigenvector for btc
result.evec[0]

array([ 2.29437028e-05, -2.05765202e-04, -1.17405043e-03, -2.15917401e-04,
        1.68128521e-04, -2.61905312e-04,  1.17887809e-04,  5.96493824e-05,
       -9.47057782e-05,  8.82067289e-05, -1.56055428e-04, -2.02366970e-04])

In [92]:
from statsmodels.tsa.vector_ar.vecm import select_coint_rank
result = select_coint_rank(df_train, det_order=-1, k_ar_diff=1, method='trace',signif=0.01)
result.summary()

r_0,r_1,test statistic,critical value
0,12,1083.0,327.0
1,12,538.9,278.0
2,12,296.4,232.8
3,12,205.8,191.8
4,12,149.3,154.8


In [91]:
from statsmodels.tsa.vector_ar.vecm import VECM
df_vecm = VECM(endog=df_train, k_ar_diff=1, coint_rank=4)
df_vecm_fitted = df_vecm.fit()
df_vecm_fitted.summary()



0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
L1.btc,-0.0076,0.039,-0.193,0.847,-0.084,0.069
L1.tot_btc,-0.0028,0.003,-0.798,0.425,-0.010,0.004
L1.hs_rate,2.354e-05,2.37e-05,0.994,0.320,-2.29e-05,7e-05
L1.eth,-1.7473,0.530,-3.296,0.001,-2.786,-0.708
L1.ggl_trends,28.1323,10.862,2.590,0.010,6.844,49.421
L1.wiki_views,-0.0008,0.001,-1.139,0.255,-0.002,0.001
L1.usd_eur,-729.3177,2062.855,-0.354,0.724,-4772.439,3313.803
L1.oil_wti,-8.1169,10.427,-0.778,0.436,-28.553,12.319
L1.gold,-0.0304,1.201,-0.025,0.980,-2.385,2.324

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
L1.btc,0.2591,0.373,0.694,0.488,-0.473,0.991
L1.tot_btc,-0.1618,0.033,-4.852,0.000,-0.227,-0.096
L1.hs_rate,0.0001,0.000,0.653,0.514,-0.000,0.001
L1.eth,-0.6941,5.056,-0.137,0.891,-10.604,9.216
L1.ggl_trends,57.5247,103.597,0.555,0.579,-145.521,260.570
L1.wiki_views,-0.0063,0.006,-0.981,0.327,-0.019,0.006
L1.usd_eur,-4.262e+04,1.97e+04,-2.166,0.030,-8.12e+04,-4062.087
L1.oil_wti,91.0396,99.448,0.915,0.360,-103.875,285.954
L1.gold,17.5960,11.456,1.536,0.125,-4.857,40.049

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
L1.btc,56.7295,52.730,1.076,0.282,-46.620,160.079
L1.tot_btc,-7.2014,4.710,-1.529,0.126,-16.433,2.030
L1.hs_rate,0.3343,0.032,10.474,0.000,0.272,0.397
L1.eth,-834.8233,714.142,-1.169,0.242,-2234.517,564.870
L1.ggl_trends,1.368e+04,1.46e+04,0.935,0.350,-1.5e+04,4.24e+04
L1.wiki_views,-0.4350,0.911,-0.477,0.633,-2.221,1.351
L1.usd_eur,1.359e+06,2.78e+06,0.489,0.625,-4.09e+06,6.81e+06
L1.oil_wti,-1559.2250,1.4e+04,-0.111,0.912,-2.91e+04,2.6e+04
L1.gold,1532.6331,1618.008,0.947,0.344,-1638.604,4703.870

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
L1.btc,-0.0088,0.003,-3.132,0.002,-0.014,-0.003
L1.tot_btc,0.0002,0.000,0.954,0.340,-0.000,0.001
L1.hs_rate,1.463e-06,1.7e-06,0.861,0.389,-1.87e-06,4.8e-06
L1.eth,-0.0174,0.038,-0.458,0.647,-0.092,0.057
L1.ggl_trends,0.7085,0.779,0.909,0.363,-0.819,2.236
L1.wiki_views,-6.222e-05,4.85e-05,-1.282,0.200,-0.000,3.29e-05
L1.usd_eur,-122.9765,148.039,-0.831,0.406,-413.128,167.175
L1.oil_wti,-0.3713,0.748,-0.496,0.620,-1.838,1.095
L1.gold,-0.0756,0.086,-0.877,0.380,-0.245,0.093

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
L1.btc,0.0002,0.000,1.704,0.088,-2.82e-05,0.000
L1.tot_btc,-3.728e-06,9.83e-06,-0.379,0.704,-2.3e-05,1.55e-05
L1.hs_rate,4.617e-08,6.66e-08,0.693,0.488,-8.43e-08,1.77e-07
L1.eth,0.0028,0.001,1.904,0.057,-8.38e-05,0.006
L1.ggl_trends,0.5665,0.031,18.557,0.000,0.507,0.626
L1.wiki_views,-9.967e-06,1.9e-06,-5.244,0.000,-1.37e-05,-6.24e-06
L1.usd_eur,6.7853,5.798,1.170,0.242,-4.578,18.148
L1.oil_wti,0.0746,0.029,2.545,0.011,0.017,0.132
L1.gold,0.0006,0.003,0.168,0.866,-0.006,0.007

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
L1.btc,12.0296,1.944,6.189,0.000,8.220,15.839
L1.tot_btc,-0.1194,0.174,-0.688,0.492,-0.460,0.221
L1.hs_rate,0.0011,0.001,0.902,0.367,-0.001,0.003
L1.eth,-46.5666,26.325,-1.769,0.077,-98.162,5.029
L1.ggl_trends,600.6069,539.355,1.114,0.265,-456.510,1657.724
L1.wiki_views,0.1843,0.034,5.488,0.000,0.118,0.250
L1.usd_eur,3.295e+04,1.02e+05,0.322,0.748,-1.68e+05,2.34e+05
L1.oil_wti,-133.9373,517.757,-0.259,0.796,-1148.722,880.848
L1.gold,-36.7193,59.643,-0.616,0.538,-153.617,80.178

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
L1.btc,-7.084e-07,6.68e-07,-1.060,0.289,-2.02e-06,6.01e-07
L1.tot_btc,3.912e-08,5.97e-08,0.655,0.512,-7.79e-08,1.56e-07
L1.hs_rate,3.569e-10,4.04e-10,0.883,0.377,-4.36e-10,1.15e-09
L1.eth,3.929e-06,9.05e-06,0.434,0.664,-1.38e-05,2.17e-05
L1.ggl_trends,0.0001,0.000,0.790,0.429,-0.000,0.001
L1.wiki_views,3.249e-09,1.15e-08,0.281,0.778,-1.94e-08,2.59e-08
L1.usd_eur,0.0002,0.035,0.007,0.995,-0.069,0.069
L1.oil_wti,0.0002,0.000,1.029,0.304,-0.000,0.001
L1.gold,-6.666e-06,2.05e-05,-0.325,0.745,-4.69e-05,3.35e-05

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
L1.btc,0.0001,0.000,0.814,0.416,-0.000,0.000
L1.tot_btc,1.486e-05,1.17e-05,1.269,0.205,-8.09e-06,3.78e-05
L1.hs_rate,1.419e-07,7.93e-08,1.789,0.074,-1.36e-08,2.97e-07
L1.eth,-0.0008,0.002,-0.425,0.671,-0.004,0.003
L1.ggl_trends,0.0190,0.036,0.522,0.602,-0.052,0.090
L1.wiki_views,3.453e-07,2.27e-06,0.152,0.879,-4.09e-06,4.79e-06
L1.usd_eur,-5.4852,6.909,-0.794,0.427,-19.026,8.056
L1.oil_wti,-0.0463,0.035,-1.326,0.185,-0.115,0.022
L1.gold,-0.0013,0.004,-0.333,0.739,-0.009,0.007

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
L1.btc,-0.0024,0.001,-2.123,0.034,-0.005,-0.000
L1.tot_btc,0.0001,9.91e-05,1.287,0.198,-6.66e-05,0.000
L1.hs_rate,-4.741e-07,6.71e-07,-0.706,0.480,-1.79e-06,8.42e-07
L1.eth,0.0043,0.015,0.284,0.776,-0.025,0.034
L1.ggl_trends,0.1962,0.308,0.638,0.524,-0.407,0.799
L1.wiki_views,-2.012e-06,1.92e-05,-0.105,0.916,-3.96e-05,3.56e-05
L1.usd_eur,371.7328,58.456,6.359,0.000,257.162,486.304
L1.oil_wti,0.5879,0.295,1.990,0.047,0.009,1.167
L1.gold,-0.0549,0.034,-1.613,0.107,-0.122,0.012

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
L1.btc,0.0023,0.002,0.962,0.336,-0.002,0.007
L1.tot_btc,0.0001,0.000,0.557,0.577,-0.000,0.001
L1.hs_rate,4.596e-06,1.45e-06,3.164,0.002,1.75e-06,7.44e-06
L1.eth,-0.0029,0.033,-0.090,0.928,-0.067,0.061
L1.ggl_trends,-0.1171,0.666,-0.176,0.860,-1.423,1.188
L1.wiki_views,2.225e-05,4.15e-05,0.537,0.592,-5.9e-05,0.000
L1.usd_eur,95.4566,126.499,0.755,0.450,-152.477,343.391
L1.oil_wti,-0.0665,0.639,-0.104,0.917,-1.320,1.187
L1.gold,-0.0835,0.074,-1.134,0.257,-0.228,0.061

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
L1.btc,0.0024,0.005,0.501,0.617,-0.007,0.012
L1.tot_btc,0.0003,0.000,0.691,0.490,-0.001,0.001
L1.hs_rate,1.815e-06,2.86e-06,0.635,0.526,-3.79e-06,7.42e-06
L1.eth,-0.0177,0.064,-0.277,0.782,-0.143,0.108
L1.ggl_trends,-0.5479,1.311,-0.418,0.676,-3.117,2.022
L1.wiki_views,2.071e-05,8.16e-05,0.254,0.800,-0.000,0.000
L1.usd_eur,-316.1723,248.982,-1.270,0.204,-804.168,171.823
L1.oil_wti,0.9158,1.258,0.728,0.467,-1.551,3.382
L1.gold,-0.0149,0.145,-0.103,0.918,-0.299,0.269

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
L1.btc,-7.836e-07,3.71e-06,-0.211,0.833,-8.06e-06,6.49e-06
L1.tot_btc,-3.337e-08,3.31e-07,-0.101,0.920,-6.83e-07,6.16e-07
L1.hs_rate,3.379e-10,2.25e-09,0.150,0.880,-4.06e-09,4.74e-09
L1.eth,5.636e-05,5.02e-05,1.122,0.262,-4.21e-05,0.000
L1.ggl_trends,0.0006,0.001,0.536,0.592,-0.001,0.003
L1.wiki_views,-1.445e-07,6.41e-08,-2.254,0.024,-2.7e-07,-1.88e-08
L1.usd_eur,-0.1408,0.196,-0.720,0.471,-0.524,0.242
L1.oil_wti,-0.0003,0.001,-0.333,0.739,-0.002,0.002
L1.gold,-1.947e-05,0.000,-0.171,0.864,-0.000,0.000

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ec1,-0.0188,0.014,-1.359,0.174,-0.046,0.008
ec2,-9.159e-05,1.51e-05,-6.076,0.000,-0.000,-6.2e-05
ec3,-9.715e-07,2.38e-06,-0.409,0.683,-5.63e-06,3.68e-06
ec4,-0.5608,0.127,-4.428,0.000,-0.809,-0.313

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ec1,0.0368,0.132,0.279,0.781,-0.222,0.296
ec2,0.0006,0.000,4.229,0.000,0.000,0.001
ec3,6.532e-05,2.27e-05,2.883,0.004,2.09e-05,0.000
ec4,-0.8444,1.208,-0.699,0.485,-3.212,1.523

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ec1,49.6701,18.673,2.660,0.008,13.073,86.268
ec2,0.0516,0.020,2.541,0.011,0.012,0.091
ec3,-0.0061,0.003,-1.893,0.058,-0.012,0.000
ec4,-116.3166,170.621,-0.682,0.495,-450.728,218.095

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ec1,0.0067,0.001,6.732,0.000,0.005,0.009
ec2,-4.081e-06,1.08e-06,-3.772,0.000,-6.2e-06,-1.96e-06
ec3,-1.055e-06,1.7e-07,-6.188,0.000,-1.39e-06,-7.21e-07
ec4,-0.0738,0.009,-8.116,0.000,-0.092,-0.056

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ec1,3.994e-05,3.9e-05,1.025,0.305,-3.64e-05,0.000
ec2,-3.232e-11,4.24e-08,-0.001,0.999,-8.31e-08,8.3e-08
ec3,-6.339e-10,6.68e-09,-0.095,0.924,-1.37e-08,1.25e-08
ec4,-7.911e-05,0.000,-0.222,0.824,-0.001,0.001

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ec1,-0.6149,0.688,-0.893,0.372,-1.964,0.734
ec2,0.0016,0.001,2.202,0.028,0.000,0.003
ec3,-0.0006,0.000,-4.758,0.000,-0.001,-0.000
ec4,-16.2771,6.289,-2.588,0.010,-28.604,-3.950

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ec1,1.799e-07,2.37e-07,0.760,0.447,-2.84e-07,6.44e-07
ec2,2.058e-10,2.57e-10,0.800,0.424,-2.99e-10,7.1e-10
ec3,-2.364e-11,4.06e-11,-0.583,0.560,-1.03e-10,5.58e-11
ec4,2.108e-07,2.16e-06,0.097,0.922,-4.03e-06,4.45e-06

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ec1,9.366e-05,4.64e-05,2.018,0.044,2.67e-06,0.000
ec2,6.192e-08,5.05e-08,1.226,0.220,-3.7e-08,1.61e-07
ec3,-1.297e-08,7.96e-09,-1.631,0.103,-2.86e-08,2.62e-09
ec4,-0.0002,0.000,-0.538,0.591,-0.001,0.001

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ec1,0.0012,0.000,3.014,0.003,0.000,0.002
ec2,4.906e-07,4.27e-07,1.148,0.251,-3.47e-07,1.33e-06
ec3,-2.36e-07,6.73e-08,-3.506,0.000,-3.68e-07,-1.04e-07
ec4,-0.0088,0.004,-2.464,0.014,-0.016,-0.002

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ec1,0.0020,0.001,2.405,0.016,0.000,0.004
ec2,1.467e-06,9.24e-07,1.586,0.113,-3.45e-07,3.28e-06
ec3,-2.555e-07,1.46e-07,-1.754,0.079,-5.41e-07,3e-08
ec4,-0.0054,0.008,-0.701,0.483,-0.021,0.010

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ec1,0.0021,0.002,1.258,0.208,-0.001,0.005
ec2,4.646e-07,1.82e-06,0.255,0.798,-3.1e-06,4.03e-06
ec3,-3.723e-07,2.87e-07,-1.299,0.194,-9.34e-07,1.9e-07
ec4,-0.0100,0.015,-0.655,0.513,-0.040,0.020

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ec1,-6.539e-07,1.31e-06,-0.498,0.619,-3.23e-06,1.92e-06
ec2,2.096e-10,1.43e-09,0.147,0.883,-2.59e-09,3.01e-09
ec3,1.777e-10,2.25e-10,0.789,0.430,-2.64e-10,6.19e-10
ec4,4.713e-06,1.2e-05,0.393,0.695,-1.88e-05,2.82e-05

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
beta.1,1.0000,0,0,0.000,1.000,1.000
beta.2,-3.023e-18,0,0,0.000,-3.02e-18,-3.02e-18
beta.3,-5.076e-19,0,0,0.000,-5.08e-19,-5.08e-19
beta.4,-1.978e-16,0,0,0.000,-1.98e-16,-1.98e-16
beta.5,-626.6412,30.730,-20.392,0.000,-686.871,-566.412
beta.6,0.0936,2.3e+04,4.06e-06,1.000,-4.51e+04,4.51e+04
beta.7,-5404.1084,1.87e+05,-0.029,0.977,-3.72e+05,3.61e+05
beta.8,-73.1392,4.369,-16.739,0.000,-81.703,-64.575
beta.9,6.4950,0.006,1093.560,0.000,6.483,6.507

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
beta.1,-1.031e-14,0,0,0.000,-1.03e-14,-1.03e-14
beta.2,1.0000,0,0,0.000,1.000,1.000
beta.3,1.344e-17,0,0,0.000,1.34e-17,1.34e-17
beta.4,-9.457e-14,0,0,0.000,-9.46e-14,-9.46e-14
beta.5,2.083e+05,3420.562,60.903,0.000,2.02e+05,2.15e+05
beta.6,-44.3896,2.56e+06,-1.73e-05,1.000,-5.02e+06,5.02e+06
beta.7,1.486e+07,2.08e+07,0.713,0.476,-2.6e+07,5.57e+07
beta.8,-8236.9768,486.355,-16.936,0.000,-9190.214,-7283.739
beta.9,-1.153e+04,29.784,-387.074,0.000,-1.16e+04,-1.15e+04

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
beta.1,1.225e-13,0,0,0.000,1.22e-13,1.22e-13
beta.2,3.159e-15,0,0,0.000,3.16e-15,3.16e-15
beta.3,1.0000,0,0,0.000,1.000,1.000
beta.4,1.391e-12,0,0,0.000,1.39e-12,1.39e-12
beta.5,-1.461e+06,2.438,-5.99e+05,0.000,-1.46e+06,-1.46e+06
beta.6,320.3024,1826.841,0.175,0.861,-3260.240,3900.845
beta.7,1.177e+08,1.48e+04,7926.470,0.000,1.18e+08,1.18e+08
beta.8,-1.683e+05,0.347,-4.85e+05,0.000,-1.68e+05,-1.68e+05
beta.9,-2.54e+04,2.154,-1.18e+04,0.000,-2.54e+04,-2.54e+04

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
beta.1,-1.888e-19,0,0,0.000,-1.89e-19,-1.89e-19
beta.2,1.981e-19,0,0,0.000,1.98e-19,1.98e-19
beta.3,8.268e-20,0,0,0.000,8.27e-20,8.27e-20
beta.4,1.0000,0,0,0.000,1.000,1.000
beta.5,-36.7080,0.786,-46.723,0.000,-38.248,-35.168
beta.6,0.0055,588.605,9.29e-06,1.000,-1153.638,1153.649
beta.7,-3681.7020,4783.700,-0.770,0.442,-1.31e+04,5694.178
beta.8,-5.2428,0.112,-46.933,0.000,-5.462,-5.024
beta.9,1.5847,716.905,0.002,0.998,-1403.523,1406.693


In [90]:
df_vecm_fitted.beta[0]

array([ 1.00000000e+00, -7.10589748e-13,  6.71257892e-12,  9.16629883e-17,
        3.81734785e-18,  3.60765008e-14])