## Programming Assignment 2
Matriculation Nr: 01/1151248

Other group members: 1151277, 1152810, 919755

In [121]:
import numpy as np
import pandas as pd
import scipy.stats as st

from statsmodels.tsa.api import VAR

In [122]:
def Z_matrix(y: np.array, p: int, c: int):
    """Calculate the Z-matrix for a given input

    Args:
        y (np.array): input with all the data of shape (T + p) × K
        p (int): lags
        c (int): intercept yes=1, no=0

    Returns:
        (np.array): Z-matrix for given input
    """
    
    y = y.T # transpose y
    T = np.size(y, 1) - p # get T (number of observations)
    
    # build Z-matrix
    Z = y[:, :T]
    for i in range(1, p):
        Z = np.concatenate((y[:,i:T+i], Z), 0)
        
    if (c ==1):
        Z = np.concatenate((np.ones(shape=(1, T)), Z), 0)       
    
    return Z

In [123]:
def B_matrix(y: np.array, p: int, c: int):
    """Calculates the B matrix with the estimated coefficients

    Args:
        y (np.array): input with all the data of shape (T + p) × K
        p (int): lags
        c (int): intercept yes=1, no=0

    Returns:
        _type_: B = matrix with estimated coefficients; Z=Z-matrix; sigma_u=covariance matrix
    """
    
    # get Z-matrix from function above
    Z = Z_matrix(y, p, c)
    
    y = y.T # transpose y
    y = y[:,p:] # first p observations are lost as we need prior lags for estimation
    K = np.size(y, 0) # number of variables
    T = np.size(y, 1) - p # number of observations
    
    # calculate B
    B = y @ Z.T @ np.linalg.inv((Z@Z.T))
    
    # calculate sigma_u (covariance matrix)
    sigma_u = (1/(T-K*p-1))*((y-(B@Z))@(y-(B@Z)).T)
    
    return B, Z, sigma_u

In [124]:
def granger(y: np.array, p: int, dummy_vec: list, c=1):
    """Performs the Granger Causality Test on a given set of variables

    Args:
        y (np.array): input with all the data of shape (T + p) × K
        p (int): lags
        dummy_vec (list): list with causing (1) and caused (0) signs
        c (int): intercept yes=1, no=0 

    Returns:
        _type_: Wald- and F-statistic together with implied p-values
    """
    
    y = y.T # transpose y
    n_cause = sum(dummy_vec) # number of causing variables = 1's in the list
    n_caused = len(dummy_vec) - n_cause # number of caused variables
       
    # arrange in right order (first cause variables, then caused variables)
    cause = []
    caused = []
    for i, n in enumerate(dummy_vec):
        if n == 1:
            cause.append(y[i])
        else:
            caused.append(y[i])
    
    cause = np.column_stack(cause)
    caused = np.column_stack(caused)
    y = np.concatenate((cause, caused), axis=1)
    
        
    # get B matrix, Z, and covariance matrix from above function
    K = y.shape[1]  # number of variables
    T = np.size(y, 1) - p
    B, Z, sigma_u = B_matrix(y, p, c) # return all three
    
    # extract relevant positions for C matrix
    positions = np.arange(0, B.size).reshape(np.size(B,1), np.size(B,0)).T # matrix of shape B and continuous index
    
    # get indices for positions that should be checked
    relevant_parts = []
    for p in range(0, p):
        pos_A = positions[:n_caused, 1+n_caused+K*p:1+n_caused+K*p+n_cause]
        for arr in pos_A:
            for num in arr:
                relevant_parts.append(num)
    relevant_parts.sort() # sort indices
    
    # vectorize B matrix (F=column-wise)
    vec_B = B.flatten(order="F").T
    
    # initialize C with zeros only
    C = np.zeros([len(relevant_parts), len(vec_B)])
    
    # add 1 at relevant parts
    for i, num in enumerate(relevant_parts):
        C[i, num] = 1
    
    # calculate lambdas
    lambda_w = (C@vec_B).T @ np.linalg.inv(C @ np.kron(np.linalg.inv(Z@Z.T), sigma_u) @ C.T) @ C@vec_B
    lambda_f = lambda_w/len(relevant_parts)
    
    # degrees of freedom
    df_chi2 = len(relevant_parts)
    df_fn = len(relevant_parts)
    df_fd = T*K-(K**2)*p-K
    
    # p_values
    p_val_chi2 = round(abs(1-st.chi2.cdf(lambda_w, df_chi2)), 4)
    p_val_f = round(abs(st.f.cdf(lambda_f, df_fn, df_fd)), 4)

    return lambda_w, p_val_chi2, lambda_f, p_val_f

In [125]:
# read in data
awm = pd.read_csv("awm19up18.csv")
awm.rename(columns={awm.columns[0]: "Q" }, inplace = True)

of_interest = ["Q", "YER", "ITR", "LTN", "STN"]
awm = awm[awm.columns.intersection(of_interest)]
awm.set_index('Q', inplace=True)

awm.head()

Unnamed: 0_level_0,YER,ITR,STN,LTN
Q,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1970Q1,738304.250471,191787.320701,7.986993,7.922865
1970Q2,752495.866789,203176.72061,7.956545,8.254439
1970Q3,761561.610862,206267.925392,7.602189,8.384747
1970Q4,770787.033957,205426.358549,7.242308,8.340932
1971Q1,769439.321026,204324.652554,6.516898,7.860624


In [126]:
# calculate logs and first differences and assign names accordingly
awm["YER_log"] = np.log(awm['YER'])
awm["ITR_log"] = np.log(awm['ITR'])

awm["d_lgdp"] = awm["YER_log"].diff()
awm["d_invest"] = awm["ITR_log"].diff()

awm["d_lgdp"] = awm["d_lgdp"] * 400
awm["d_invest"] = awm["d_invest"] * 400

awm["d_R"] = awm["LTN"].diff()
awm["d_r"] = awm["STN"].diff()

awm.dropna(inplace=True)

awm.head()

Unnamed: 0_level_0,YER,ITR,STN,LTN,YER_log,ITR_log,d_lgdp,d_invest,d_R,d_r
Q,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1970Q2,752495.866789,203176.72061,7.956545,8.254439,13.531151,12.221831,7.6158,23.075637,0.331574,-0.030448
1970Q3,761561.610862,206267.925392,7.602189,8.384747,13.543126,12.236931,4.790229,6.039915,0.130308,-0.354356
1970Q4,770787.033957,205426.358549,7.242308,8.340932,13.555167,12.232843,4.816415,-1.635326,-0.043815,-0.35988
1971Q1,769439.321026,204324.652554,6.516898,7.860624,13.553417,12.227465,-0.700008,-2.150982,-0.480308,-0.72541
1971Q2,779295.962146,209724.71336,5.938638,7.930762,13.566146,12.253551,5.091522,10.434248,0.070138,-0.578261


In [127]:
# get the input for our function
y_t = np.array(awm[["d_lgdp", "d_invest", "d_R", "d_r"]])


In [128]:
# calculate Granger Causality
granger(y_t, 2, [0, 0, 1, 1], 1)

(25.500968289745057, 0.0013, 3.187621036218132, nan)

### Check with Built-In Functions

In [129]:
# check result with statsmodels VAR module
model = VAR(awm[["d_lgdp", "d_invest", "d_R", "d_r"]])
results = model.fit(2)

  self._init_dates(dates, freq)


In [130]:
# Wald test
granger_stat_wald = results.test_causality(['d_R', "d_r"], ["d_lgdp", "d_invest"], kind='wald')
granger_stat_wald.summary()

Test statistic,Critical value,p-value,df
25.79,15.51,0.001,8


In [131]:
# F-test
granger_stat_f = results.test_causality(['d_R', "d_r"], ["d_lgdp", "d_invest"], kind='f')
granger_stat_f.summary()

Test statistic,Critical value,p-value,df
3.223,1.951,0.001,"(8, 720)"
