## Programming Assignment 3
Matriculation Nr: 01/1152810

Other group members: 1151277, 919755, 1151248

In [3]:
import numpy as np
import pandas as pd
import scipy.stats as st

from statsmodels.tsa.api import VAR

# Task 1
Read Appendix D in Lutkepohl (2005). Write a function that implements a residual bootstrap for a VAR(p) with intercept and returns the bootstrap standard errors of the VAR coefficients in B.†

The function should take
- a T + p × K matrix of observations on yt,
- the lag length p,
- and the number of bootstrap replications R as input.

In [4]:
def Z_matrix(y: np.array, p: int, c: int):
    """Calculate the Z-matrix for a given input

    Args:
        y (np.array): input with all the data of shape (T + p) × K
        p (int): lags
        c (int): intercept yes=1, no=0

    Returns:
        (np.array): Z-matrix for given input
    """

    y = y.T

    #determine matrix dimensions:
    T = y.shape[1] - p
    K = y.shape[0]

    # build Z-matrix
    if c==1:
        Z = np.ones((1, T+p), dtype=float)

    # 1b stacked lagged data
    for i in range(p):
        #add i columns of leading zeros (EDIT: empty, comp cost lower) to ktpmat
        zeros = np.zeros((K, i), dtype=float)
        zerostack = np.hstack((zeros, y[:,:(T+p-i)]))
        # vertically stack this to Z
        Z = np.vstack((Z, zerostack))

    # cutting of leading p columns and retrieving Z
    Z = Z[:, p-1:-1]

    return Z

In [5]:
def B_matrix(y: np.array, p: int, c: int):
    """Calculates the B matrix with the estimated coefficients

    Args:
        y (np.array): input with all the data of shape (T + p) × K
        p (int): lags
        c (int): intercept yes=1, no=0

    Returns:
        _type_: B = matrix with estimated coefficients; Z=Z-matrix; sigma_u=covariance matrix
    """

    # get Z-matrix from function above
    Z = Z_matrix(y, p, c)

    y = y.T # transpose y
    y = y[:,p:] # first p observations are lost as we need prior lags for estimation
    K = y.shape[0] # number of variables
    T = y.shape[1] # number of observations

    # calculate B
    B = y @ Z.T @ np.linalg.inv((Z@Z.T))

    # calculate sigma_u (covariance matrix)
    sigma_u = (1/(T-K*p-1))*(y-(B@Z))@(y-(B@Z)).T

    return B, Z, sigma_u

In [6]:
def resid_bootstrap(Tpkmat, p):
    '''
    :param Tpkmat: a T + p × K matrix of observations on yt,
    :param p: the lag length p,
    :param R: and the number of bootstrap replications R as input.
    :return: returns the bootstrap standard errors of the VAR coefficients in B
    '''

    y = Tpkmat.T # transpose input matrix to K x (T+p)
    T = y.shape[1] - p # get T (number of observations)
    K = y.shape[0]

    '''
    Description from Lütkepohl, appendix D, page 709
    (1) The parameters of the model under consideration are estimated. Let uthat, t = 1, . . . , T, be the estimation residuals.
    '''

    B, Z, sigma_u = B_matrix(Tpkmat, p, c=1)

    '''
    (2) Centered residuals are computed (usual average). Bootstrap residuals u∗1, . . . , u∗T are then obtained by randomly drawing with replacement from the centered residuals.
    '''

    uthat = y[:,p:] - (B@Z)
    uthatbar = np.sum(uthat, axis=1)/T
    uthatcenterded = uthat - uthatbar.T.reshape(K, 1)
    draws = np.random.randint(0, T, T)


    '''
    (3) Bootstrap time series are computed recursively [...]where the same initial values may be used for each generated series, (y∗ −p+1, . . . , y∗0) = (y−p+1, . . . , y0).
    '''
    # set bootstrap time series pre-sample values to the same presample series from original data for every repetition

    bs_y = y[:,:p]

    for i in range(T):
        y_t = B[:,0] + uthatcenterded[:,draws[i]]
        for l in range(p):
            y_t = y_t + (B[:, (l*K+1):(l*K+K+1)] @ bs_y[:,-l])
        bs_y = np.hstack((bs_y, y_t.reshape(K, 1)))

    B_bs, Z_bs, sigma_u_bs = B_matrix(bs_y.T, p, c=1)

    return B_bs, Z_bs, sigma_u_bs

In [7]:
def bootstrap_se(Tpkmat, p, R):
    K = Tpkmat.shape[1]

    B_bs_list = np.empty((K, K*p+1))
    for i in range(R):
        B_bs, _, _ = resid_bootstrap(Tpkmat, p)
        B_bs_list = np.dstack((B_bs_list, B_bs))

    Bbar_bs_list = np.mean(B_bs_list, axis = 2)
    deviation = B_bs_list - Bbar_bs_list[:, :, None]
    deviation_squared = deviation**2
    sd = np.sqrt(np.sum(deviation_squared, axis=2)/(R-1))
    se = sd/np.sqrt(R)

    return se

# Task 2
Use the VAR(2) from 2) on programming assignment 2 and your function from 1) with R = 499 bootstrap replications. Report the bootstrap standard errors of the VAR coefficients and compare them to the asymptotic standard errors from the Python VAR package/or your VAR estimation function.

In [8]:
# read in data
awm = pd.read_csv("awm19up18.csv")
awm.rename(columns={awm.columns[0]: "Q" }, inplace = True)

of_interest = ["Q", "YER", "ITR", "LTN", "STN"]
awm = awm[awm.columns.intersection(of_interest)]
awm.set_index('Q', inplace=True)

# calculate logs and first differences and assign names accordingly
awm["YER_log"] = np.log(awm['YER'])
awm["ITR_log"] = np.log(awm['ITR'])

awm["d_lgdp"] = awm["YER_log"].diff()
awm["d_invest"] = awm["ITR_log"].diff()

awm["d_lgdp"] = awm["d_lgdp"] * 400
awm["d_invest"] = awm["d_invest"] * 400

awm["d_R"] = awm["LTN"].diff()
awm["d_r"] = awm["STN"].diff()

awm.dropna(inplace=True)

# get the input for our function
y_t = np.array(awm[["d_lgdp", "d_invest", "d_R", "d_r"]])

In [9]:
B, Z, sigma_u = B_matrix(y_t, p=2, c=1)

In [10]:
B_se = bootstrap_se(y_t, 2, R=499)

In [11]:
print(B, B_se)

[[ 1.06215220e+00  1.61966835e-01 -1.02585995e-02  1.33841904e+01
   2.98371761e+00  5.79521898e-02  9.93488944e-03 -1.25041634e+01
  -1.46961778e+01]
 [-1.71737723e+00  3.57250335e-01 -7.60219886e-02 -1.69256714e+01
   4.91454522e+00 -1.96235238e-02  4.45799123e-02 -1.03675069e+01
  -1.39595271e+01]
 [-4.22394981e+00  2.42373907e-01  1.39346146e-03  1.80719672e+01
   1.15044849e+00  3.19615409e-01 -8.32276617e-02 -2.47484481e+01
   3.20862724e+00]
 [-1.40472592e+01  3.23600533e-01 -1.84046616e-02  1.19040570e+01
   3.90262951e+00  2.10542297e-01 -6.98091982e-03 -2.90121146e+01
  -2.89123777e+00]]


# Task 3
Write a Python function that simulates time series data from a K-dimensional VAR(2) process yt = A1yt−1 + A2yt−2 + ut, where the innovations ut are drawn from a multivariate normal distribution with mean zero and covariance matrix Σu. Use y−1 = y0 = 0 as starting values, where 0 is a K × 1 vector of zeros, generate time series of length T + 50 and discard the first 50 observations, such that you have available time series of total length equal to T.

Your function should take A1, A2, Σu and T as an input and should return a T × K matrix of observations on yt.

In [12]:
def var2sim(A1, A2, sigma_u, T):
    '''
    :param A1:
    :param A2:
    :param sigma_u:
    :param T:
    :return:
    '''
    K = sigma_u.shape[0]

    # set starting values
    y_tminus1 = np.zeros((K, 1))
    y_tminus2 = np.zeros((K, 1))

    P = np.linalg.cholesky(sigma_u)

    y = np.empty((K, 0))
    for i in range(T+50):
        # draw disturbance u_t
        u_t = P @ np.random.standard_normal(K)
        u_t = u_t.reshape(K, 1)
        #recursively calculate y_t
        y_t = A1@y_tminus1 + A2@y_tminus2 - 2 + u_t
        y = np.hstack((y, y_t))
        y_tminus2 = y_tminus1
        y_tminus1 = y_t

    # discard first 50 observations
    y = y[:,50:]

    return y.T

In [13]:
K = 4
p = 2
T = 100

A1 = B[:,1:K+1]
A2 = B[:,K+1:2*K+1]

var2sim(A1, A2, sigma_u, T)


array([[-1.53708432,  6.27735458, -3.22677323, -4.12215519],
       [-0.47921139,  0.36915628, -3.15259238, -3.01510349],
       [ 0.15628225, 10.60784644, -3.00918952, -3.2814632 ],
       [-0.27453634,  6.14958639, -3.66630978, -3.95205397],
       [-1.01622283,  5.91769531, -3.416074  , -3.33218208],
       [ 0.47063642,  8.23628365, -3.22928981, -2.71769182],
       [ 4.83934219, 16.84273948, -3.28323843, -2.94957597],
       [ 3.89610382, 16.19680253, -3.11373019, -2.73674655],
       [ 4.21078919, 18.63721738, -2.9978715 , -2.51011639],
       [ 3.37543573, 10.42395994, -2.91610051, -2.92613807],
       [-1.99660476,  1.10572781, -3.02334524, -2.55402014],
       [-2.01089122,  6.97651186, -3.38648307, -3.08438749],
       [-3.41223542,  0.07932598, -3.22649629, -2.87964209],
       [-0.17476255,  9.18832762, -3.51536294, -2.67424969],
       [-2.82643149, -1.09914398, -3.80290677, -4.16041831],
       [-1.22811519,  0.77642969, -3.51651045, -3.37675574],
       [ 1.3405538 ,  7.

# Task 4
Write a Python function that computes the h-step ahead point forecasts yT (h) and the corresponding MSE matrix ˆΣy(h) based on a VAR(p) with intercept. The inputs to the function should be a K × T matrix of observations, the lag order p, and the forecast horizon h. As an output, the function should return the h-step ahead forecasts and the corresponding MSE matrix.‡

In [130]:
def hstep_forecast(y, p, h):
    '''
    :param y: a K × T matrix of observations
    :param p:
    :param h:
    :return:
    '''
    K = y.shape[0]
    T = y.shape[1]

    # retrieving estimates
    B, Z, sigma_u = B_matrix(y.T, p, c=1)

    # constructing matrices
    J1 = np.hstack((np.zeros((K, 1)), np.identity(K), np.zeros((K, K*(p-1)))))

    row0 = np.hstack((np.ones((1,1)), np.zeros((1, K*p))))
    rowz = np.hstack((np.zeros((K*(p-1), 1)), np.identity(K*(p-1)), np.zeros((K*(p-1), K))))
    B = np.vstack((row0, B, rowz))

    Zt = y[:,-p:]                       # selecting y[:,-p:] from t-p up to t
    Zt = Zt[:,::-1].T.flatten()             # reverse order horizontally, transpose and flatten.
    Zt = np.hstack((np.array([(1)]), Zt)).reshape(K*p+1, 1)   # adding one leading 1, transposing, dimension is: 1+K*T x 1
    # predicting y_th

    y_th = J1@B**h@Zt


    # ab hier alles wild. wie berechnen wir small sample mse? oder bootstrappen wir hier (formula von task1)?

    # THIS IS ONLY THE FORMULA FOR one-step ahead forecast, others need to be calculated differently, see Lütkepohl page 98 ff
    sigma_hat_yh_hat = sigma_u + ((K*p+1)/T)*sigma_u

    ## was machen wir mit all dem stuff auf seite 97 ff? offensichtlich ist sigma_hat_yt_hat(2) != sigma_hat_yt(2) (see page 99)
    PHIi = J1@B**(i-1)@J1.T
    #sigma_hat_yh_hat = sigma_u + PHIi@sigma_u@PHIi.T + 1/T * omega_hat




    return y_th, sigma_hat_yh_hat

# Task 5
Use your function in 3) and generate time series data of length T = 100 according to a bivariate VAR(2) process with the following parameters: [...]

Use the function in 4) to compute h-step ahead forecasts and the corresponding MSE matrix based on a VAR(2) process with intercept for h = 1 and h = 4. Use these estimates to set up a 95% interval forecasts assuming that the process yt is Gaussian.

In [131]:
T = 100
p = 2
A1 = np.array([(0.4, 0.25), (0.0, 0.5)])
A2 = np.array([(0.2, 0.4), (0.0, 0.0)])
sigma_u = np.array([(1, 0.5), (0.5, 1)])

time_series_TK = var2sim(A1, A2, sigma_u, T)

h = 1
y_th1, mse_mat1 = hstep_forecast(time_series_TK.T, p, h)

h = 4
y_th4, mse_mat4 = hstep_forecast(time_series_TK.T, p, h)


#### set up 95 % interval forecast (assuming data is generated from gaussian process)

CIone95 = [y_th1 - 1.96 * np.sqrt(np.diag(mse_mat1)), y_th1 + 1.96 * np.sqrt(np.diag(mse_mat1))]
CIfour95 = [y_th4 - 1.96 * np.sqrt(np.diag(mse_mat4)), y_th4 + 1.96 * np.sqrt(np.diag(mse_mat4))]


# remaining code from assignment 2

### Check with Built-In Functions

In [18]:
# check result with statsmodels VAR module
model = VAR(awm[["d_lgdp", "d_invest", "d_R", "d_r"]])
results = model.fit(2)

  self._init_dates(dates, freq)


In [19]:
# Wald test
granger_stat_wald = results.test_causality(["d_lgdp", "d_invest"],['d_R', "d_r"], kind='wald')
granger_stat_wald.summary()

Test statistic,Critical value,p-value,df
16.4,15.51,0.037,8


In [20]:
# F-test
granger_stat_f = results.test_causality(["d_lgdp", "d_invest"],['d_R', "d_r"], kind='f')
granger_stat_f.summary()

Test statistic,Critical value,p-value,df
2.05,1.951,0.038,"(8, 720)"
