In [19]:
# set up imports
import pandas as pd, numpy as np, seaborn as sns
from tabulate import tabulate
import statsmodels
import arch
import matplotlib
matplotlib.use('qt5agg')

# configure plot style
import matplotlib.pyplot as plt
plt.rcParams['mathtext.fontset'] = 'stix'
plt.rcParams['font.family'] = 'STIXGeneral'
plt.rcParams["figure.figsize"] = (9.5,4.15)
plt.rcParams['figure.constrained_layout.use'] = False
plt.rcParams['figure.dpi'] = 100
plt.rcParams['savefig.dpi'] = 10
plt.rcParams['lines.linewidth'] = 0.8
save_plot_to =  r'C:\\Users\joche\OneDrive\03 TUM - TUM-BWL\Semester 8\01 Bachelorarbeit\04 Results\Plots/'

In [3]:
# load data and crop to start_date:end_date
start_date = '2015-08-07'
end_date = '2020-06-26'
idx = pd.date_range(start_date, end_date)
index_name = 'date'
mydateparser = lambda x: pd.datetime.strptime(str(x), '%Y-%m-%d %H:%M:%S')
mydateparser1 = lambda x: pd.datetime.strptime(str(x), '%Y-%m-%d')

# btc
btc = pd.read_excel('Data/BTC_closing.xlsx', parse_dates=[0], index_col=0, date_parser=mydateparser)[start_date:end_date]
btc.index.name = index_name
btc.columns = ['btc']

# usd_eur
usd_eur = pd.read_excel('Data/DEXUSEU.xls', parse_dates=[0], index_col=0, skiprows=10, date_parser=mydateparser)[start_date:end_date]
usd_eur.index.name = index_name
usd_eur.columns = ['usd_eur']
usd_eur = usd_eur.loc[(usd_eur!=0).any(1)]

# tot_btc: only weekly data - missing values interpolated
tot_btc = pd.read_csv('Data/total-bitcoins.txt', index_col=0)[start_date:end_date]
tot_btc.index = pd.DatetimeIndex(tot_btc.index, normalize=True).normalize()
tot_btc.index.name = index_name
tot_btc = tot_btc.reindex(idx, fill_value=None)
tot_btc.interpolate(method='time', inplace=True, limit_direction='both')
tot_btc.columns = ['tot_btc']

# hs_rate: only weekly data - missing values interpolated
hs_rate = pd.read_csv('Data/hash-rate.txt', index_col=0)[start_date:end_date]
hs_rate.index = pd.DatetimeIndex(hs_rate.index, normalize=True).normalize()
hs_rate.index.name = index_name
hs_rate = hs_rate.reindex(idx, fill_value=None)
hs_rate.interpolate(method='time', inplace=True, limit_direction='both')
hs_rate.columns = ['hs_rate']

# eth
eth = pd.read_excel('Data/ETH.xlsx', parse_dates=[0], index_col=0, date_parser=mydateparser, usecols='A,E')[start_date:end_date]
eth.index.name = index_name
eth.columns = ['eth']

# ggl_trends: only weekly data - missing values interpolated
# ggl_trends: weighted average of 15 countries
ggl_trends = pd.read_csv('Data/googletrends.txt', index_col=0)[start_date:end_date]
ggl_trends.index = pd.DatetimeIndex(ggl_trends.index, normalize=True).normalize()
ggl_trends.index.name = index_name
ggl_trends = ggl_trends.reindex(idx, fill_value=None)
ggl_trends.interpolate(method='time', inplace=True, limit_direction='both')
ggl_trends['btc-average'] = np.round(ggl_trends.sum(axis=1)/15)
ggl_trends = ggl_trends[['btc-average']].copy()
ggl_trends.columns = ['ggl_trends']

# wiki_views: sum of 99 countries
wiki_views = pd.read_excel('Data/wikipedia.xlsx', parse_dates=[0], index_col=0, date_parser=mydateparser1)[start_date:end_date]
wiki_views.index = pd.DatetimeIndex(wiki_views.index, normalize=True).normalize()
wiki_views.index.name = index_name
wiki_views['wiki-total'] = np.round(wiki_views.sum(axis=1))
wiki_views = wiki_views[['wiki-total']].copy()
wiki_views.columns = ['wiki_views']

# wti_oil
oil_wti = pd.read_excel('Data/DCOILWTICO.xls', parse_dates=[0], index_col=0, skiprows=10, date_parser=mydateparser)[start_date:end_date]
oil_wti.index = pd.DatetimeIndex(oil_wti.index, normalize=True).normalize()
oil_wti.index.name = index_name
oil_wti.columns = ['oil_wti']
oil_wti = oil_wti.loc[(oil_wti!=0).any(1)]

# gold
gold = pd.read_excel('Data/GOLDAMGBD228NLBM10AM.xls', parse_dates=[0], index_col=0, skiprows=10, date_parser=mydateparser)[start_date:end_date]
gold.index = pd.DatetimeIndex(gold.index, normalize=True).normalize()
gold.index.name = index_name
gold.columns = ['gold']
gold = gold.loc[(gold!=0).any(1)]

# sp500
sp500 = pd.read_excel('Data/SP500.xls', parse_dates=[0], index_col=0, skiprows=10, date_parser=mydateparser)[start_date:end_date]
sp500.index = pd.DatetimeIndex(sp500.index, normalize=True).normalize()
sp500.index.name = index_name
sp500.columns = ['sp500']
sp500 = sp500.loc[(sp500!=0).any(1)]

# sse 
sse = pd.read_excel('Data/SSEcomposite.xlsx', parse_dates=[0], index_col=0, header=0, date_parser=mydateparser)[start_date:end_date]
sse.index = pd.DatetimeIndex(sse.index, normalize=True).normalize()
sse.index.name = index_name
sse = sse[['Zuletzt']].copy()
sse.columns = ['sse']

# ffd_rate
ffd_rate = pd.read_excel('Data/DFF.xls', parse_dates=[0], index_col=0, skiprows=10, date_parser=mydateparser)[start_date:end_date]
ffd_rate.index = pd.DatetimeIndex(ffd_rate.index, normalize=True).normalize()
ffd_rate.index.name = index_name
ffd_rate.columns = ['ffd_rate']

In [4]:
# merge data to one df (inner join)
from functools import reduce
temp = [btc,tot_btc,hs_rate,eth,ggl_trends,wiki_views,usd_eur,oil_wti,
              gold,sp500,sse,ffd_rate]
df = reduce(lambda left, right: pd.merge(left, right, left_index=True, right_index=True), temp)

In [5]:
# split data in pre and post bubble 2018
pre_bubble_end_date = '2017-12-01'
post_bubble_start_date = '2018-02-01'

# pre bubble df
pre_df = df[:pre_bubble_end_date]

# post bubble df
post_df = df[post_bubble_start_date:]

In [5]:
# save btc_new.txt dataset with 1112 observations for use in M1-M3
def save_btcnew():
    btc_cropped = df['btc']
    btc_cropped.to_csv(path_or_buf=save_plot_to+'btc_new.txt')

# save_btcnew()

In [48]:
### RUN TO DEFINE ALL FUNCTIONS ###

# define overview-printing function of dataframe
def dates_overview(dataframe):
    print('start_date:\t', dataframe.index[0])
    print('end_date:\t', dataframe.index[-1])
    print('len df:\t\t', len(dataframe))
    
# define plotting function of dataframe
import matplotlib.dates as mdates
def plot_df(dataframe):
    fig_i, axs = plt.subplots(4,3, figsize=(9.5,4.15))
    for i, ax in enumerate(axs.flatten()):
        data = dataframe[dataframe.columns[i]]
        ax.plot(data, color='black')
        ax.set_title(dataframe.columns[i])
        ax.xaxis.set_ticks_position('none')
        ax.yaxis.set_ticks_position('none')
        ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
        ax.xaxis.set_minor_formatter(mdates.DateFormatter("%Y-%m"))
        #ax.spines['top'].set_alpha(0)
        ax.tick_params(labelsize=8)
    fig_i.set_size_inches(12,5)
    fig_i.tight_layout()
    return fig_i

# define log-taking and relabeling function
def log_of_df(dataframe):
    df_log = np.log(dataframe)
    new_cols = list()
    for i in df_log.columns:
        new_cols.append(i+'_log')
    df_log.columns = new_cols
    # fill na value of negative oil price on 2020-04-20 with 0
    df_log.fillna(value=0, inplace=True)
    return df_log

# define correlation-table generating function of dataframe
def corr_table_aslatex_of_df(dataframe):
    corr = dataframe.corr().round(3)
    print(tabulate(corr, headers=corr.columns, showindex=True, tablefmt="latex"))

# define heatmap-generating function of dataframe
def heatmap_corr_of_df(dataframe):
    corr = dataframe.corr()
    # more html colors here: https://www.w3schools.com/colors/colors_names.asp
    # pal = sns.light_palette('lightgrey', as_cmap=True)
    ax = sns.heatmap(corr, xticklabels=corr.columns.values, yticklabels=corr.columns.values, annot=True, 
            annot_kws={'size':7}, vmin=-1, center=0, vmax=1, cmap="YlGnBu")
    bottom, top = ax.get_ylim()
    ax.set_ylim(bottom + 0.5, top - 0.5)
    fig_3 = plt.gcf()
    fig_3.set_size_inches(9.5,4.15)
    plt.tick_params(axis='both', which='major', labelsize=7.5, labelbottom = False, bottom=False, top = False, labeltop=True)
    plt.xticks(fontsize=7.5, rotation=1)
    plt.yticks(fontsize=7.5)
    
# define train/test-splitting function of dataframe
def split_traintest_df(dataframe):
    train_size = int(len(dataframe) * 0.8)
    df_train, df_test = dataframe[0:train_size], dataframe[train_size:]
    # create dataframe for printout
    data = {'Dataframe': ['dataframe', 'df_train', 'df_test'],
            'date_start': [dataframe.index[0], df_train.index[0], df_test.index[0]],
            'date_end': [dataframe.index[-1], df_train.index[-1], df_test.index[-1]],
            'nobs': [len(dataframe), len(df_train), len(df_test)]}
    df_print = pd.DataFrame(data, columns=['Dataframe','date_start','date_end','nobs'])
    print(df_print)
    return df_train, df_test

# define adf- and pp-testing of dataframe with latex-printout on/off
from arch.unitroot import ADF, PhillipsPerron
def stationarity_tests(dataframe, latex):
    for col in dataframe:
        adf = ADF(dataframe[col])
        pp = PhillipsPerron(dataframe[col])
        if latex is False:
            # write summary as plain text to std.out
            print('Timeseries:\t',col,'\n',
                  adf.summary(),'\n\n',pp.summary(),'\n\n\n')
        else:
            # write summary as latex to file
            with open(save_plot_to + 'Stationarity_Tests_LaTeX.txt', 'a') as myfile:
                myfile.write('Timeseries:\t'+col+'\n'
                         +adf.summary().as_latex()+'\n\n'
                         +pp.summary().as_latex()+'\n\n\n')

# define first difference-taking function of dataframe
def diff_of_df(dataframe):
    df_train_log_diff = df_train_log.diff()
    # relabel columns
    new_cols = list()
    for i in df_train_log_diff.columns:
        new_cols.append(i+'_diff')
    df_train_log_diff.columns = new_cols
    return df_train_log_diff

# define stationaritiy table-generating function of a dataframe filepath
def stationarity_table_aslatex_from_df(filepath):
    # read in dataframe from .txt file
    stationarity_dataframe = pd.read_csv(save_plot_to+filepath, delimiter='\s+', header=0)
    # print dataframe as latex output
    print(tabulate(stationarity_dataframe, headers=stationarity_dataframe.columns, showindex=False, tablefmt="latex"))

# define granger causality test performing function of a dataframe
# code taken from: https://stackoverflow.com/questions/58005681/is-it-possible-to-run-a-vector-autoregression-analysis-on-a-large-gdp-data-with
from statsmodels.tsa.stattools import grangercausalitytests
maxlag=15
test = 'ssr-chi2test'
def grangers_causality_matrix(X_train, variables, test = 'ssr_chi2test', verbose=False):
    dataset = pd.DataFrame(np.zeros((len(variables), len(variables))), columns=variables, index=variables)
    for c in dataset.columns:
        for r in dataset.index:
            test_result = grangercausalitytests(X_train[[r,c]], maxlag=maxlag, verbose=False)
            p_values = [round(test_result[i+1][0][test][1],4) for i in range(maxlag)]
            if verbose: print(f'Y = {r}, X = {c}, P Values = {p_values}')
            min_p_value = np.min(p_values)
            dataset.loc[r,c] = min_p_value
    dataset.columns = [var + '_x' for var in variables]
    dataset.index = [var + '_y' for var in variables]
    return dataset

# define granger causality table generating function from dataframe
def grangercausality_table_aslatex_of_df(dataframe):
    print(tabulate(dataframe, headers=dataframe.columns, showindex=True, tablefmt="latex"))

# define grangercausality heatmap generating function of a dataframe
def grangercausality_heatmap_of_df(dataframe):
    fig, ax = plt.subplots()
    sns.heatmap(dataframe, xticklabels=dataframe.columns.values, yticklabels=dataframe.index.values,
                 annot=True, annot_kws={'size':7}, vmin=0, vmax=1, cmap="YlGnBu_r", ax=ax)
    bottom, top = ax.get_ylim()
    ax.set_ylim(bottom + 0.5, top - 0.5)
    fig = plt.gcf()
    fig.set_size_inches(9.5,4.15)
    plt.tick_params(axis='both', which='major', labelsize=7, labelbottom = False, bottom=False, top = False, labeltop=True)
    plt.xticks(fontsize=7, rotation=45)
    plt.yticks(fontsize=7)
    plt.set_size_inches(12,5)
    plt.tight_layout()
    return fig

# define VAR-order selecting function of dataframe
from statsmodels.tsa.vector_ar.var_model import VAR
def select_VAR_order(dataframe):
    var_model = VAR(endog=dataframe)
    var_order_res = var_model.select_order(15).summary()
    res_as_html = var_order_res.as_html()
    df_var_order = pd.read_html(res_as_html, header=0, index_col=0)[0]
    return df_var_order

# define VAR-order latex-table-generating function of a var_order
def var_order_aslatex_of_order(var_order):
    print(tabulate(var_order, headers=var_order.columns, showindex=True, tablefmt="latex"))

# define VAR-model-fitting function of a dataframe and a lag order
def fit_VAR_model_of_df(dataframe, order):
    var_model = VAR(endog=dataframe)
    var_fitted = var_model.fit(maxlags = order)
    return var_fitted


# transform var_fitted into data frame
# code taken from: https://stackoverflow.com/questions/51734180/converting-statsmodels-summary-object-to-pandas-dataframe
def results_summary_to_dataframe(results):
    '''take the result of an statsmodel results table and transforms it into a dataframe'''
    pvals = results.pvalues
    coeff = results.params
    conf_lower = results.conf_int()[0]
    conf_higher = results.conf_int()[1]

    results_df = pd.DataFrame({"pvals":pvals,
                               "coeff":coeff,
                               "conf_lower":conf_lower,
                               "conf_higher":conf_higher
                                })
    #Reordering
    results_df = results_df[["coeff","pvals","conf_lower","conf_higher"]]
    return results_df

# define dataframe generating function from .txt file at filepath
def var_coeffs_aslatex_from_txt(filepath):
    # read in df from textfile
    var_btc_coefficients = pd.read_csv(save_plot_to+filepath, delimiter='\s+', header=0)
    var_btc_coefficients.set_index(var_btc_coefficients.columns[0], inplace=True)
    # generate latex output
    print(tabulate(var_btc_coefficients.round(4), headers=var_btc_coefficients.columns, showindex=True, tablefmt="latex"))

# define rolling forecast function
from sklearn.metrics import mean_squared_error as mse
def var_rolling_forecast(dataframe, lag_order):
    # create train and test dataframes
    train_size = int(len(dataframe) * 0.8)
    dataframe_train, dataframe_test = dataframe[0:train_size], dataframe[train_size:]
    index = len(dataframe_train) - 1
    # initialize lists
    pred_val_btc = list()
    lo_conf_int_btc = list()
    up_conf_int_btc = list()
    ausreisser_ctr = 0
    # rolling forecast
    for i in range(len(dataframe_test)):
        # input data, dependent on i
        input_data = log_of_df(dataframe).diff().values[lag_order+1:index + i]
        # fit model and predict 1 step
        dataframe_var = VAR(endog=input_data)
        dataframe_var_fitted = dataframe_var.fit(maxlags=lag_order)
        dataframe_var_result = dataframe_var_fitted.forecast_interval(y=input_data, steps=1, alpha=0.05)
        # obtain absolute (inversed) btc values for mean prediction, upper- and lower confidence interval
        yhat_btc = np.exp(dataframe_var_result[0][0][0] + np.log(dataframe).iloc[:,0][index+i])
        lo_conf_btc_val = np.exp(dataframe_var_result[1][0][0] + np.log(dataframe).iloc[:,0][index+i])
        up_conf_btc_val = np.exp(dataframe_var_result[2][0][0] + np.log(dataframe).iloc[:,0][index+i])
        if (dataframe.iloc[:,0][index+i+1] > up_conf_btc_val) or (dataframe.iloc[:,0][index+i+1] < lo_conf_btc_val):
            ausreisser_ctr += 1
        pred_val_btc.append(yhat_btc)
        lo_conf_int_btc.append(lo_conf_btc_val)
        up_conf_int_btc.append(up_conf_btc_val)
    # return [0]: mse, [1]: ausreisser ctr, [2]: predictions, [3]: lo_confint, [4]: up_confint
    return (np.sqrt(mse(pred_val_btc, dataframe_test.iloc[:,0].values)), ausreisser_ctr,
            pred_val_btc, lo_conf_int_btc, up_conf_int_btc)

# define series-generating and plotting function for dataframe_var_pred_result
def plot_var_pred_result(dataframe, dataframe_train, dataframe_test, dataframe_var_pred_result):
    # make series for plotting pred. vs. actual
    index_pred = np.arange(len(dataframe_train) + 1, len(dataframe) + 1)
    pred_val_btc_series = pd.Series(dataframe_var_pred_result[2], index=index_pred)
    test_series_btc = pd.Series(dataframe_test.iloc[:,0].values, index=index_pred)
    lo_conf_int_btc_series = pd.Series(dataframe_var_pred_result[3], index=index_pred)
    up_conf_int_btc_series = pd.Series(dataframe_var_pred_result[4], index=index_pred)
    # create plot: 
    fig, axs = plt.subplots(1,2)
    axs[0].plot(dataframe_train.iloc[:,0].values, label= r'$btc_T$', color='black')
    axs[0].plot(test_series_btc, label= r'$btc_{T+h}$', color='green')
    axs[0].plot(pred_val_btc_series, label= r'$\hat{btc}_{T+h}$', color= 'red')
    axs[0].fill_between(lo_conf_int_btc_series.index, lo_conf_int_btc_series, up_conf_int_btc_series, color='k', alpha=0.1)
    axs[0].legend(loc='upper left')
    axs[0].title.set_text('Gesamter Zeitraum')
    axs[1].plot(test_series_btc, label= r'$btc_{T+h}$', color='green')
    axs[1].plot(pred_val_btc_series, label= r'$\hat{btc}_{T+h}$', color= 'red')
    axs[1].fill_between(lo_conf_int_btc_series.index, lo_conf_int_btc_series, up_conf_int_btc_series, color='k', alpha=0.1)
    axs[1].legend(loc='upper left')
    axs[1].title.set_text('Vorhersage-Zeitraum')
    fig.set_size_inches(15,2.5)
    return fig

# define cointegration rank testing function based on select_coint_rank of a dataframe
from statsmodels.tsa.vector_ar.vecm import select_coint_rank
def print_select_coint_rank(dataframe, method, k_ar_diff, signif):
    dataframe_vecm_rank = select_coint_rank(dataframe,det_order=-1,k_ar_diff=k_ar_diff,method=method,signif=signif)
    print(dataframe_vecm_rank.summary())
    # summary output must be copied and saved as .txt

# define trace cointegration testing function for dataframe
from statsmodels.tsa.vector_ar.vecm import coint_johansen
def print_cointegration_test_of(dataframe, k_ar_diff, signif):
    result = coint_johansen(dataframe, -1, k_ar_diff)
    d = {'0.90':0, '0.95':1, '0.99':2}
    trace_stat = result.lr1
    crit_val_trace_stat = result.cvt[:, d[str(1-signif)]]
    
    def adjust(val, length = 6): 
        return str(val).ljust(length)
    
    print(' Variable  \t\ttest-statistic\tCV(95%)\t\tSignif\n')
    for col, trace, cvt in zip(dataframe.columns, trace_stat, crit_val_trace_stat):
        if col is 'dax' or col is 'sp500':
            tab = 2
        else:
            tab = 1
        print(adjust(col), '\t'*tab, adjust(round(trace,2), 9),'\t', adjust(cvt, 8), '\t' , trace > cvt)
    # -> Copy summary output and save as .txt
    
# define VECM specifying function for dataframe
from statsmodels.tsa.vector_ar.vecm import VECM
def get_vecm_model(dataframe,k_ar_diff,coint_rank,det):
    dataframe_vecm = VECM(endog=dataframe,k_ar_diff=k_ar_diff,coint_rank=coint_rank,deterministic=det)
    dataframe_vecm_fitted = dataframe_vecm.fit()
    return dataframe_vecm_fitted

# define rolling forecast function
from sklearn.metrics import mean_squared_error as mse
def vecm_rolling_forecast(dataframe, lag_order, coint_rank, det):
    # create train and test dataframes
    train_size = int(len(dataframe) * 0.8)
    dataframe_train, dataframe_test = dataframe[0:train_size], dataframe[train_size:]
    index = len(dataframe_train) - 1
    # initialize lists
    pred_val_btc = list()
    lo_conf_int_btc = list()
    up_conf_int_btc = list()
    ausreisser_ctr = 0
    # rolling forecast
    for i in range(len(dataframe_test)):
        # input data, dependent on i
        input_data = log_of_df(dataframe).values[lag_order:index + i]
        # fit model and predict 1 step
        dataframe_vecm_fitted = get_vecm_model(dataframe=input_data,k_ar_diff=lag_order,coint_rank=coint_rank,det=det)
        dataframe_vecm_result = dataframe_vecm_fitted.predict(steps=1,alpha=0.05)
        # obtain absolute (inversed) btc values for mean prediction, upper- and lower confidence interval
        yhat_btc = np.exp(dataframe_vecm_result[0][0][0])
        lo_conf_btc_val = np.exp(dataframe_vecm_result[1][0][0])
        up_conf_btc_val = np.exp(dataframe_vecm_result[2][0][0])
        if (dataframe.iloc[:,0][index+i+1] > up_conf_btc_val) or (dataframe.iloc[:,0][index+i+1] < lo_conf_btc_val):
            ausreisser_ctr += 1
        pred_val_btc.append(yhat_btc)
        lo_conf_int_btc.append(lo_conf_btc_val)
        up_conf_int_btc.append(up_conf_btc_val)
    # return [0]: mse, [1]: ausreisser ctr, [2]: predictions, [3]: lo_confint, [4]: up_confint
    return (np.sqrt(mse(pred_val_btc, dataframe_test.iloc[:,0].values)), ausreisser_ctr,
            pred_val_btc, lo_conf_int_btc, up_conf_int_btc)


## not used ##
# imports the coint_johansen function to test for cointegration as a prerequisite for VEC 
# modeling
from statsmodels.tsa.vector_ar.vecm import coint_johansen

# uses https://nbviewer.jupyter.org/github/mapsa/seminario-doc-2014/blob/master/cointegration-
# example.ipynb to create functions to return the number of cointegrating vectors based 
# on the Trace version if the Johansen Cointegration Test
def johansen_trace(y, p):
        N, l = y.shape
        joh_trace = coint_johansen(y, 0, p)
        r = 0
        for i in range(l):
            if joh_trace.lr1[i] > joh_trace.cvt[i, 1]:     # 0: 90%  1:95% 2: 99%
                r = i + 1
        joh_trace.r = r

        return joh_trace

# loops through 1 to 10 lags of trading days
for i in range(1, 11): 
    # tests for cointegration at i lags
    joh_trace = johansen_trace(df_select_train_log, i)
    # prints the results
    print('Using the Trace Test, there are', joh_trace.r, '''cointegrating vectors at 
    %s lags between the df_select_train_log''' % i)
    # prints a space for readability
    print()

NameError: name 'df_select_train_log' is not defined

In [8]:
# overview of df
dates_overview(df)

In [9]:
# plot df
fig_1 = plot_df(df)

In [7]:
# take log of df
df_log = log_of_df(df)

In [12]:
# plot log time series
fig_2 = plot_df(df_log)

In [13]:
# save fig_2
fig_2.savefig(save_plot_to+'M4_fig_2.svg',format='svg',bbox_inches='tight',pad_inches = 0)

In [21]:
# correlation matrix of df
corr_table_aslatex_of_df(df)

In [23]:
# correlation heatmap of df
heatmap_corr_of_df(df)

In [17]:
# split df in df_train and df_test
df_train, df_test = split_traintest_df(df)

In [31]:
# take log of df_train
df_train_log = log_of_df(df_train)

In [48]:
# plot df_train_log
fig_4 = plot_df(df_train_log)

In [47]:
# adf- and pp-testing of df_train_log - no latex printout
stationarity_tests(df_train_log, latex=False)

In [11]:
# take first difference of df_train_log
df_train_log_diff = diff_of_df(df_train_log)
df_train_log_diff

In [17]:
# plot df_train_log_diff
fig_5 = plot_df(df_train_log_diff[1:])

In [18]:
# save fig_5
fig_5.savefig(save_plot_to+'M4_fig_5.svg',format='svg',bbox_inches='tight',pad_inches = 0)

In [55]:
# adf- and pp-testing of df_train_log_diff - no latex printout
stationarity_tests(df_train_log_diff[1:], latex=False)
    

In [68]:
# plot stationarity table of df
stationarity_table_aslatex_from_df('M4_stationarity tests.txt')

In [20]:
# perform granger causality test each on each based on df_train_log_diff
granger_causality_matrix = grangers_causality_matrix(df_train_log_diff[1:], variables = df_train_log_diff.columns)
granger_causality_matrix.round(4)

In [73]:
# generate granger causality table as latex output
grangercausality_table_aslatex_of_df(granger_causality_matrix)

In [28]:
# generate granger causality heatmap of df_train_log_diff
fig_6 = grangercausality_heatmap_of_df(granger_causality_matrix)

In [29]:
# save fig_6
fig_6.savefig(save_plot_to+'M4_fig_6.svg',format='svg',bbox_inches='tight',pad_inches = 0)

In [None]:
## VAR: df_select_train_log_diff (3 Variables) ##

In [30]:
# based on granger-causality: new df with btc, hs_rate and ggl_trends
df_select_train_log_diff = df_train_log_diff[['btc_log_diff','hs_rate_log_diff','ggl_trends_log_diff']]
df_select_train_log_diff

In [36]:
# select VAR-order for df_select_train_log_diff: 5
df_select_var_order = select_VAR_order(df_select_train_log_diff[1:])
df_select_var_order

In [37]:
# generate var_select_order latex output from df_select_var_order
var_order_aslatex_of_order(df_select_var_order)

In [36]:
# obtain VAR-model for df_select_train_log_diff and lag order = 5
df_select_var = fit_VAR_model_of_df(df_select_train_log_diff[1:], 5)
df_select_var.summary()
# -> Copy summary output and save as .txt

In [45]:
# read in and plot var_select_coeffs from textfile as latex: 'M4_df_select_VAR_coefficients.txt'
var_coeffs_aslatex_from_txt('M4_df_select_VAR_coefficients.txt')

In [28]:
# rolling forecast: obtain mse, ausreisser, predictions, lo-& up confint for df_select with lag order = 5
df_select_var_pred_result = var_rolling_forecast(df[['btc','hs_rate','ggl_trends']], 5)

# printout results
print('mse_select:\t\t', df_select_var_pred_result[0])
print('ausreisser_select:\t', df_select_var_pred_result[1])

In [30]:
# rolling forecast: create series and plot them for df_var_pred_result
fig_7 = plot_var_pred_result(df[['btc','hs_rate','ggl_trends']],
                             df_train[['btc','hs_rate','ggl_trends']],
                             df_test[['btc','hs_rate','ggl_trends']],
                             df_select_var_pred_result)

In [31]:
# save fig_7
fig_7.savefig(save_plot_to+'M4_fig_7.svg',format='svg',bbox_inches='tight',pad_inches = 0)

In [None]:
## VAR: df_train_log_diff (12 Variables) ##

In [46]:
# select VAR-order for df_train_log_diff: 5
df_var_order = select_VAR_order(df_train_log_diff[1:])
df_var_order

In [19]:
# generate var_order latex output from df_var_order
var_order_aslatex_of_order(df_var_order)

In [12]:
# obtain VAR-model for df_train_log_diff and lag order = 5
df_var = fit_VAR_model_of_df(df_train_log_diff[1:], 5)
df_var.summary()
# -> Copy summary output and save as .txt

In [22]:
# read in and plot var_coeffs from textfile: 'M4_df_VAR_coefficients.txt'
var_coeffs_aslatex_from_txt('M4_df_VAR_coefficients.txt')

In [27]:
# IRF: impulse response of df_var
from statsmodels.tsa.vector_ar.irf import IRAnalysis
df_var_irf = df_var.irf(15)
fig_9 = df_var_irf.plot(orth=False, response='btc_log_diff', figsize=(8,15),subplot_params={'fontsize' : 9})
fig_9.tight_layout()

In [28]:
# save fig_9
fig_9.savefig(save_plot_to+'M4_fig_9.svg',format='svg',bbox_inches='tight',pad_inches = 0)

In [29]:
# IRF: cumulative impulse responses of df_var
fig_10 = df_var_irf.plot_cum_effects(orth=False, response='btc_log_diff',figsize=(8,15),subplot_params={'fontsize' : 9})
fig_10.tight_layout()

In [30]:
# save fig_10
fig_10.savefig(save_plot_to+'M4_fig_10.svg',format='svg',bbox_inches='tight',pad_inches = 0)

In [23]:
# rolling forecast: obtain mse, ausreisser, predictions, lo-& up confint for df with lag order = 5
df_var_pred_result = var_rolling_forecast(df, 5)
# printout results
print('mse:\t\t', df_var_pred_result[0])
print('ausreisser:\t', df_var_pred_result[1])

In [24]:
# rolling forecast: create series and plot them for df_var_pred_result
fig_8 = plot_var_pred_result(df,df_train,df_test,df_var_pred_result)

In [26]:
# save fig_8
fig_8.savefig(save_plot_to+'M4_fig_8.svg',format='svg',bbox_inches='tight',pad_inches = 0)

In [None]:
## VECM: df_select_train_log (3 Variables) ##

In [52]:
## ARCHIVE ##
# series and lag plot to show trend and autocorrelation
fig, axs = plt.subplots(2,1)
axs[0].plot(df_log)
pd.plotting.lag_plot(df_log, ax = axs[1])


In [32]:
# based on granger-causality: new df with btc, hs_rate and ggl_trends
df_select_train_log = df_train_log[['btc_log','hs_rate_log','ggl_trends_log']]

In [33]:
# determine cointegration rank of df_select_train_log with trace statistic
print_select_coint_rank(df_select_train_log,'trace',10,0.05)
# -> Copy summary output and save as .txt

In [34]:
# read in and plot from textfile as latex: 'M4_select_tab_10_cointrank_trace.txt'
var_coeffs_aslatex_from_txt('M4_select_tab_10_cointrank_trace.txt')
# -> Copy latex output and input in overleaf

In [35]:
# determine cointegration rank of df_select_train_log with maxeig statistic
print_select_coint_rank(df_select_train_log,'maxeig',10,0.05)
# -> Copy summary output and save as .txt

In [36]:
# read in and plot from textfile as latex: 'M4_select_tab_11_cointrank_maxeig.txt'
var_coeffs_aslatex_from_txt('M4_select_tab_11_cointrank_maxeig.txt')
# -> Copy latex output and input in overleaf

In [37]:
# print out johansen cointegration result for df_select_train_log
print_cointegration_test_of(df_select_train_log,10,0.05)
# -> Copy summary output and save as .txt

In [38]:
# read in and plot from textfile as latex: 'M4_select_tab_12_johansen_cointrank_trace.txt'
var_coeffs_aslatex_from_txt('M4_select_tab_12_johansen_cointrank_trace.txt')
# -> Copy latex output and input in overleaf

In [55]:
# obtain VECM(4) for df_select_train_log
df_select_vecm = get_vecm_model(df_select_train_log,4,2,'nc')
df_select_vecm.summary()

In [29]:
# rolling forecast: obtain mse, ausreisser, predictions, lo-& up confint for df_select with lag order = 4
df_select_vecm_pred_result = vecm_rolling_forecast(df[['btc','hs_rate','ggl_trends']],lag_order=4,coint_rank=2,det='nc')

# printout results
print('mse_select:\t\t', df_select_vecm_pred_result[0])
print('ausreisser_select:\t', df_select_vecm_pred_result[1])

In [27]:
# rolling forecast: create series and plot them for df_vecm_pred_result
fig_11 = plot_var_pred_result(df[['btc','hs_rate','ggl_trends']],
                             df_train[['btc','hs_rate','ggl_trends']],
                             df_test[['btc','hs_rate','ggl_trends']],
                             df_select_vecm_pred_result)

In [28]:
# save fig_11
fig_11.savefig(save_plot_to+'M4_fig_11.svg',format='svg',bbox_inches='tight',pad_inches = 0)

In [None]:
## VECM: df_train_log (12 Variables) ##

In [94]:
# determine cointegration rank of df_train_log with trace statistic
print_select_coint_rank(df_train_log,'trace',10,0.05)
# -> Copy summary output and save as .txt

In [None]:
# read in and plot from textfile as latex: 'M4_tab_7_cointrank_trace.txt'
var_coeffs_aslatex_from_txt('M4_tab_7_cointrank_trace.txt')
# -> Copy latex output and input in overleaf

In [95]:
# determine cointegration rank of df_train_log with maxeig statistic
print_select_coint_rank(df_train_log,'maxeig',10,0.05)
# -> Copy summary output and save as .txt

In [96]:
# read in and plot from textfile as latex: 'M4_tab_8_cointrank_maxeig.txt'
var_coeffs_aslatex_from_txt('M4_tab_8_cointrank_maxeig.txt')
# -> Copy latex output and input in overleaf

In [105]:
# print out johansen cointegration result for df_train_log
print_cointegration_test_of(df_train_log,10,0.05)
# -> Copy summary output and save as .txt

In [107]:
# read in and plot from textfile as latex: 'M4_tab_9_johansen_cointrank_trace.txt'
var_coeffs_aslatex_from_txt('M4_tab_9_johansen_cointrank_trace.txt')
# -> Copy latex output and input in overleaf

In [35]:
# obtain vecm(5-1) for df_train_log and coint_rank 6
df_vecm = get_vecm_model(df_train_log,k_ar_diff=4,coint_rank=6,det='nc')
df_vecm.summary()

In [33]:
# read in and plot from textfile as latex: 'M4_tab_13_vecm_coeffs.txt'
var_coeffs_aslatex_from_txt('M4_tab_13_vecm_coeffs.txt')
# -> Copy latex output and input in overleaf

In [38]:
# read in and plot from textfile as latex: 'M4_tab_14_vecm_alpha1.txt'
var_coeffs_aslatex_from_txt('M4_tab_14_vecm_alpha1.txt')
# -> Copy latex output and input in overleaf

\begin{tabular}{lrrrrrr}
\hline
     &    coef &   std.err &      z &   P\ensuremath{>}|z| &   [0.025 &   0.975] \\
\hline
 ec1 & -0.0379 &     0.011 & -3.577 &   0     &   -0.059 &   -0.017 \\
 ec2 & -0.0107 &     0.034 & -0.313 &   0.754 &   -0.077 &    0.056 \\
 ec3 &  0.0132 &     0.012 &  1.067 &   0.286 &   -0.011 &    0.038 \\
 ec4 &  0.0046 &     0.003 &  1.47  &   0.142 &   -0.002 &    0.011 \\
 ec5 &  0.0318 &     0.011 &  2.955 &   0.003 &    0.011 &    0.053 \\
 ec6 & -0.0061 &     0.009 & -0.69  &   0.49  &   -0.024 &    0.011 \\
\hline
\end{tabular}


In [40]:
# read in and plot from textfile as latex: 'M4_tab_15_vecm_beta1.txt'
var_coeffs_aslatex_from_txt('M4_tab_15_vecm_beta1.txt')
# -> Copy latex output and input in overleaf

\begin{tabular}{lrrrrrr}
\hline
         &     coef &   std.err &      z &   P\ensuremath{>}|z| &   [0.025 &   0.975] \\
\hline
 beta.1  &   1      &     0     &  0     &   0     &    1     &    1     \\
 beta.2  &  -0      &     0     &  0     &   0     &   -0     &   -0     \\
 beta.3  &   0      &     0     &  0     &   0     &    0     &    0     \\
 beta.4  &  -0      &     0     &  0     &   0     &   -0     &   -0     \\
 beta.5  &   0      &     0     &  0     &   0     &    0     &    0     \\
 beta.6  &  -0      &     0     &  0     &   0     &   -0     &   -0     \\
 beta.7  & -16.0754 &     1.756 & -9.155 &   0     &  -19.517 &  -12.634 \\
 beta.8  &   1.894  &     0.628 &  3.015 &   0.003 &    0.663 &    3.125 \\
 beta.9  &   6.6093 &     2.634 &  2.509 &   0.012 &    1.447 &   11.772 \\
 beta.10 &  -7.8263 &     6.748 & -1.16  &   0.246 &  -21.052 &    5.399 \\
 beta.11 &  -0.1407 &     1.673 & -0.084 &   0.933 &   -3.42  &    3.139 \\
 beta.12 &   0.3692 &     4.524 &  0

In [46]:
# IRF: impulse response of df_vecm
df_vecm_irf = df_vecm.irf(15)
fig_12 = df_vecm_irf.plot(orth=False, response='btc_log', figsize=(8,15),subplot_params={'fontsize' : 9})
fig_12.tight_layout()

In [44]:
# save fig_12
fig_12.savefig(save_plot_to+'M4_fig_12.svg',format='svg',bbox_inches='tight',pad_inches = 0)