## Programming Assignment 3
Matriculation Nr: 01/1152810

Other group members: 1151277, 919755, 1151248

In [139]:
import numpy as np
import pandas as pd
import scipy.stats as st

from statsmodels.tsa.api import VAR

Read Appendix D in Lutkepohl (2005). Write a function that implements a residual bootstrap for a VAR(p) with intercept and returns the bootstrap standard errors of the VAR coefficients in B.†

The function should take
- a T + p × K matrix of observations on yt,
- the lag length p,
- and the number of bootstrap replications R as input.

In [140]:
def Z_matrix(y: np.array, p: int, c: int):
    """Calculate the Z-matrix for a given input

    Args:
        y (np.array): input with all the data of shape (T + p) × K
        p (int): lags
        c (int): intercept yes=1, no=0

    Returns:
        (np.array): Z-matrix for given input
    """

    y = y.T

    #determine matrix dimensions:
    T = y.shape[1] - p
    K = y.shape[0]

    # build Z-matrix
    if c==1:
        Z = np.ones((1, T+p), dtype=float)

    # 1b stacked lagged data
    for i in range(p):
        #add i columns of leading zeros (EDIT: empty, comp cost lower) to ktpmat
        zeros = np.zeros((K, i), dtype=float)
        zerostack = np.hstack((zeros, y[:,:(T+p-i)]))
        # vertically stack this to Z
        Z = np.vstack((Z, zerostack))

    # cutting of leading p columns and retrieving Z
    Z = Z[:, p-1:-1]

    return Z

In [141]:
def B_matrix(y: np.array, p: int, c: int):
    """Calculates the B matrix with the estimated coefficients

    Args:
        y (np.array): input with all the data of shape (T + p) × K
        p (int): lags
        c (int): intercept yes=1, no=0

    Returns:
        _type_: B = matrix with estimated coefficients; Z=Z-matrix; sigma_u=covariance matrix
    """

    # get Z-matrix from function above
    Z = Z_matrix(y, p, c)

    y = y.T # transpose y
    y = y[:,p:] # first p observations are lost as we need prior lags for estimation
    K = y.shape[0] # number of variables
    T = y.shape[1] # number of observations

    # calculate B
    B = y @ Z.T @ np.linalg.inv((Z@Z.T))

    # calculate sigma_u (covariance matrix)
    sigma_u = (1/(T-K*p-1))*(y-(B@Z))@(y-(B@Z)).T

    return B, Z, sigma_u

In [178]:
def resid_bootstrap(Tpkmat, p):
    '''
    :param Tpkmat: a T + p × K matrix of observations on yt,
    :param p: the lag length p,
    :param R: and the number of bootstrap replications R as input.
    :return: returns the bootstrap standard errors of the VAR coefficients in B
    '''

    y = Tpkmat.T # transpose input matrix to K x (T+p)
    T = y.shape[1] - p # get T (number of observations)
    K = y.shape[0]

    '''
    Description from Lütkepohl, appendix D, page 709
    (1) The parameters of the model under consideration are estimated. Let uthat, t = 1, . . . , T, be the estimation residuals.
    '''

    B, Z, sigma_u = B_matrix(Tpkmat, p, c=1)

    '''
    (2) Centered residuals are computed (usual average). Bootstrap residuals u∗1, . . . , u∗T are then obtained by randomly drawing with replacement from the centered residuals.
    '''

    uthat = y[:,p:] - (B@Z)
    uthatbar = np.sum(uthat, axis=1)/T
    uthatcenterded = uthat - uthatbar.T.reshape(K, 1)
    draws = np.random.randint(0, T, T)


    '''
    (3) Bootstrap time series are computed recursively [...]where the same initial values may be used for each generated series, (y∗ −p+1, . . . , y∗0) = (y−p+1, . . . , y0).
    '''
    # set bootstrap time series pre-sample values to the same presample series from original data for every repetition

    bs_y = y[:,:p]

    for i in range(T):
        y_t = B[:,0] + uthatcenterded[:,draws[i]]
        for l in range(p):
            y_t = y_t + (B[:, (l*K+1):(l*K+K+1)] @ bs_y[:,-l])
        bs_y = np.hstack((bs_y, y_t.reshape(K, 1)))

    B_bs, Z_bs, sigma_u_bs = B_matrix(bs_y.T, p, c=1)

    return B_bs, Z_bs, sigma_u_bs

In [180]:
def bootstrap_se(Tpkmat, p, R):
    K = Tpkmat.shape[1]

    B_bs_list = np.empty((K, K*p+1))
    for i in range(R):
        B_bs, _, _ = resid_bootstrap(Tpkmat, p)
        B_bs_list = np.dstack((B_bs_list, B_bs))

    Bbar_bs_list = np.mean(B_bs_list, axis = 2)
    deviation = B_bs_list - Bbar_bs_list[:, :, None]
    deviation_squared = deviation**2
    sd = np.sqrt(np.sum(deviation_squared, axis=2)/(R-1))
    se = sd/np.sqrt(R)

    return se

In [182]:
# read in data
awm = pd.read_csv("awm19up18.csv")
awm.rename(columns={awm.columns[0]: "Q" }, inplace = True)

of_interest = ["Q", "YER", "ITR", "LTN", "STN"]
awm = awm[awm.columns.intersection(of_interest)]
awm.set_index('Q', inplace=True)

# calculate logs and first differences and assign names accordingly
awm["YER_log"] = np.log(awm['YER'])
awm["ITR_log"] = np.log(awm['ITR'])

awm["d_lgdp"] = awm["YER_log"].diff()
awm["d_invest"] = awm["ITR_log"].diff()

awm["d_lgdp"] = awm["d_lgdp"] * 400
awm["d_invest"] = awm["d_invest"] * 400

awm["d_R"] = awm["LTN"].diff()
awm["d_r"] = awm["STN"].diff()

awm.dropna(inplace=True)

# get the input for our function
y_t = np.array(awm[["d_lgdp", "d_invest", "d_R", "d_r"]])

In [183]:
B, Z, sigma_u = B_matrix(y_t, p=2, c=1)

In [188]:
B_se = bootstrap_se(y_t, 2, R=499)

In [189]:
## does our B_matrix function return SDs?
print(B, B_se)

[[ 8.19991990e-01  4.41893745e-01 -3.09516135e-02  3.53840313e-01
   2.80812932e-01  1.40348551e-01  2.89658568e-02 -6.61782716e-01
  -6.99814626e-01]
 [-1.00607134e+00  1.20312057e+00 -2.28829643e-01 -1.26631209e+00
   6.99606104e-01 -5.17309344e-02  2.41250166e-01 -1.21968589e+00
  -1.57970603e+00]
 [-6.57473325e-02  1.20603459e-02  1.78784492e-04  5.15582681e-01
   3.78561218e-02  1.32232293e-02 -5.88137520e-03 -2.20154123e-01
   7.06111408e-02]
 [-2.43278598e-01  7.28410851e-02 -7.47895590e-03  4.22382718e-01
   2.78528354e-01  3.87124972e-02 -2.40842053e-03 -2.89226900e-01
  -7.25841414e-02]] [[0.02283937 0.00683669 0.00624018 0.0254008  0.01533375 0.0065982
  0.00611172 0.02538575 0.01457782]
 [0.0559889  0.01297949 0.00715414 0.05918127 0.03959638 0.01243265
  0.01167807 0.06350459 0.03945869]
 [0.00330672 0.00063863 0.00037406 0.0037824  0.00239577 0.0006323
  0.00029105 0.00374192 0.00237312]
 [0.00540538 0.0011102  0.00090607 0.00560284 0.00392899 0.00115815
  0.00079881 0.00

Write a Python function that simulates time series data from a K-dimensional VAR(2) process yt = A1yt−1 + A2yt−2 + ut, where the innovations ut are drawn from a multivariate normal distribution with mean zero and covariance matrix Σu. Use y−1 = y0 = 0 as starting values, where 0 is a K × 1 vector of zeros, generate time series of length T + 50 and discard the first 50 observations, such that you have available time series of total length equal to T.

Your function should take A1, A2, Σu and T as an input and should return a T × K matrix of observations on yt.

In [192]:
def var2sim(A1, A2, sigma_u, T):
    '''
    :param A1:
    :param A2:
    :param sigma_u:
    :param T:
    :return:
    '''
    K = sigma_u.shape[0]

    # set starting values
    y_tminus1 = np.zeros((K, 1))
    y_tminus2 = np.zeros((K, 1))

    P = np.linalg.cholesky(sigma_u)

    y = np.empty((K, 0))
    for i in range(T+50):
        # draw disturbance u_t
        u_t = P @ np.random.standard_normal(K)
        u_t = u_t.reshape(K, 1)
        #recursively calculate y_t
        y_t = A1@y_tminus1 + A2@y_tminus2 - 2 + u_t
        y = np.hstack((y, y_t))
        y_tminus2 = y_tminus1
        y_tminus1 = y_t

    # discard first 50 observations
    y = y[:,50:]

    return y

In [195]:
K = 4
p = 2
T = 100

A1 = B[:,1:K+1]
A2 = B[:,K+1:2*K+1]

var2sim(A1, A2, sigma_u, T)


array([[ 3.34775524,  2.72235854, -2.00055344, -0.38940007, -0.34949528,
        -2.90363878, -0.44865126,  0.70613017,  5.65320823,  3.37677632,
         5.40045483,  4.03573176, -0.60086612, -0.87464702,  2.17666725,
        -3.69543869, -5.58652651, -2.57700428, -2.26455655, -2.62997635,
        -3.85101517, -1.94920017,  1.63177463, -1.0342273 ,  0.88527081,
         3.77233221, -3.00627628, -0.56752971, -0.13803759,  0.85408527,
         1.49626022,  3.96786196,  1.94053258,  4.01319926,  1.52780245,
         1.81096723,  2.03333437,  0.10382343, -0.13346659,  4.01106363,
        -0.04496531, -3.47350015, -2.95566092,  1.60866626,  3.1819228 ,
         3.51206641,  3.18145074, -0.26474359,  1.8530833 ,  5.82659499,
        -0.87363365, -0.9993103 ,  1.1457673 ,  0.62227537, -0.89431452,
         4.34683249,  2.48537347,  3.66956162,  5.93120846,  6.85538201,
         4.08635729, -0.74650098,  5.25529243,  1.59516729,  4.30413131,
         1.52867223,  1.6673621 ,  4.21000515,  5.4

# remaining code from assignment 2

In [None]:
def granger(y: np.array, p: int, dummy_vec: list, c=1):
    """Performs the Granger Causality Test on a given set of variables

    Args:
        y (np.array): input with all the data of shape (T + p) × K
        p (int): lags
        dummy_vec (list): list with causing (1) and caused (0) signs
        c (int): intercept yes=1, no=0 

    Returns:
        _type_: Wald- and F-statistic together with implied p-values
    """
    
    y = y.T # transpose y
    n_cause = sum(dummy_vec) # number of causing variables = 1's in the list
    n_caused = len(dummy_vec) - n_cause # number of caused variables
       
    # arrange in right order (first cause variables, then caused variables)
    cause = []
    caused = []
    for i, n in enumerate(dummy_vec):
        if n == 1:
            cause.append(y[i])
        else:
            caused.append(y[i])
    
    cause = np.column_stack(cause)
    caused = np.column_stack(caused)
    y = np.concatenate((cause, caused), axis=1)
    
    
    
    # get B matrix, Z, and covariance matrix from above function
    K = y.shape[1]  # number of variables
    T = np.size(y, 0) - p
    B, Z, sigma_u = B_matrix(y, p, c) # return all three
    
    # get indices for positions that should be checked
    relevant_parts = []
    for p_ in range(p):
        for a_v in range(n_cause):
            for p_v in range(n_caused):
                relevant_parts.append(K*c + n_cause + p_v + a_v*K + p_*(K**2))
   
    # vectorize B matrix (F=column-wise)
    vec_B = B.flatten(order="F").T
    
    # initialize C with zeros only
    C = np.zeros([len(relevant_parts), len(vec_B)])
    
    
    # add 1 at relevant parts
    for i, num in enumerate(relevant_parts):
        C[i, num] = 1
    
    # calculate lambdas
    lambda_w = (C@vec_B).T @ np.linalg.inv(C @ np.kron(np.linalg.inv(Z@Z.T), sigma_u) @ C.T) @ C@vec_B
    lambda_f = lambda_w/len(relevant_parts)
    
    # degrees of freedom
    df_chi2 = len(relevant_parts)
    df_fn = len(relevant_parts)
    df_fd = T*K-((K**2)*p)-K
    
    # p_values
    p_val_chi2 = round(abs(1-st.chi2.cdf(lambda_w, df_chi2)), 4)
    p_val_f = round(abs(1-st.f.cdf(lambda_f, df_fn, df_fd)), 4)
    
    # ftest degrees freedom
    df_fd = (len(relevant_parts),T*K-((K**2)*p)-K)

    return lambda_w, p_val_chi2, df_fn, lambda_f, p_val_f, df_fd

In [None]:
# calculate Granger Causality
ts_w, p_w, df_w, ts_f, p_f, df_f = granger(y_t, 2, [0, 0, 1, 1], 1)
print(f'ts_w: {ts_w}, \np_w: {p_w}, \ndf_w: {df_w}, \nts_f: {ts_f}, \np_f: {p_f}, \ndf_f: {df_f}')

### Check with Built-In Functions

In [None]:
# check result with statsmodels VAR module
model = VAR(awm[["d_lgdp", "d_invest", "d_R", "d_r"]])
results = model.fit(2)

In [None]:
# Wald test
granger_stat_wald = results.test_causality(["d_lgdp", "d_invest"],['d_R', "d_r"], kind='wald')
granger_stat_wald.summary()

In [None]:
# F-test
granger_stat_f = results.test_causality(["d_lgdp", "d_invest"],['d_R', "d_r"], kind='f')
granger_stat_f.summary()