In [1]:
%matplotlib inline
import time
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm, multivariate_normal
from scipy import io

# Own code
sys.path.append("../")
from utils.data_utils import create_data, create_dgp_data, transformation, standardize, generate_dgp_tvp_var, generate_matrices, generate_contemp_matrices
from utils.tvp_models import TVPVARModel, tvp_ar_contemp, tvp_ar_non_contemp

# Suppress scientific notation in numpy
np.set_printoptions(suppress=True)

### Generate data according to a TVP-VAR DGP

In [2]:
M = 2
T = 200
p = 1
train = 150
np.random.seed(12345)

In [None]:
# def generate_dgp_tvp_var(M, T, p, diagonal_coefficient, cross_coefficient, sigma_states, sigma_observation, covariance, binomial_prob):

#     y = np.zeros((M,T))
#     A_1_vec = np.zeros((M*(M*p),T))
#     selection_mask = np.random.binomial(1,binomial_prob,M*(M*p)) == 1
    
#     for t in range(T):

#         if t == 0:
#             A_1_vec[selection_mask,t] = cross_coefficient
#             np.fill_diagonal(A_1_vec[:,t].reshape(M,(M*p)), np.repeat(diagonal_coefficient,M)) 
#             y[:,t] = np.ones(M)
#         else:
#             A_1_vec[:,t] = A_1_vec[:,t-1] + multivariate_normal.rvs(mean=np.zeros(M**2), cov=np.diag(np.ones(M**2)*sigma_states))

#             ## Eigen values check
#             eigen_values = np.linalg.eig(A_1_vec[:,t].reshape(M,M))[0]
#             stationary = any(eigen_values < 1)
#             if stationary == False:
#                 print(f'Failed eigen values requirement < 1 (explosive process)')
#                 print(f'Iteration: {t}')
#                 break

#             Z = np.zeros((M,M**2))
#             for m in range(M):
#                 Z[m,m*M:(m*M+M)] = y[:,t-1]
#             y[:,t] = Z@A_1_vec[:,t] + multivariate_normal.rvs(mean=np.zeros(M), cov=(np.diag(np.ones(M))*sigma_observation + np.tril(np.repeat(covariance,M),-1)))
            
#     return y, A_1_vec

In [None]:
y, coefficients = generate_dgp_tvp_var(M, T, p, 1/2, 1/4, 4*1e-5, 1, 1/2, 1e-2)
y_matrix, X_matrix = generate_matrices(T, M, p, y)
y_matrix_contemp, X_matrix_contemp = generate_contemp_matrices(T, M, p, y)

In [16]:
np.savetxt("../data/y.csv",y[:,1:].T, delimiter=",")
np.savetxt("../data/x.csv",x.T, delimiter=",")
np.savetxt("../data/coeff.csv", A_1_vec[:M,1:], delimiter=",")
np.savetxt("../data/sigma_obs.csv", sigma_observations[:,1:], delimiter=",")

Generate the matrices for the own implementation of TVP-(V)AR

In [68]:
# def generate_matrices(T, M, p, y):

#     series = y

#     lagged_T = T - p
#     lagged_series = []
#     y_series = []

#     lagged_y = np.ones((lagged_T, M * p))
#     k = M*(M*p)
#     position_counter = 0
#     total_lags = M * p

#     for m in range(M):
#         y_m = series[m]
#         for i in range(1, p + 1):

#             lagged_y[:, position_counter] = y_m[(p - i):-i]
#             position_counter += 1

#     # Create lagged dependent matrix
#     X = np.zeros((lagged_T, M, k))
#     stacked_X = np.zeros((M, lagged_T, k))

#     for m in range(M):
#         stacked_X[m, :, m * (total_lags):(m + 1) * total_lags] = lagged_y

#     stacked_list = list()

#     for m in range(M):
#         stacked_list.append(stacked_X[m])

#     for t in range(lagged_T):
#         X[t] = np.squeeze(np.dstack(tuple(stacked_list)))[t].T

#     for s in series:
#         y_series.append(s[p:])

#     y_own = np.array(y_series)
#     X_own = X

#     return y_own, X_own

In [48]:
# def generate_contemp_matrices(T, M, p, y):

#     # Contemperous values added

#     series = y

#     lagged_T = T - p
#     lagged_series = []
#     y_series = []

#     lagged_y = np.ones((lagged_T, M * p))
#     k = M*(M*p)+M*(M-1)
#     variable_list = np.arange(M)
#     position_counter = 0
#     total_lags = M * p + (M-1)

#     for m in range(M):
#         y_m = series[m]
#         for i in range(1, p + 1):

#             lagged_y[:, position_counter] = y_m[(p - i):-i]
#             position_counter += 1

#     # Create lagged dependent matrix
#     X = np.zeros((lagged_T, M, k))
#     stacked_X = np.zeros((M, lagged_T, k))

#     for m in range(M):
#         contemp_y = np.zeros((T-1,M-1))

#         if m != 0:
#             contemp_y[:,:m] = series[:m][:,1:].T

#         stacked_X[m, :, m * (total_lags):(m + 1) * total_lags] = np.hstack((lagged_y, contemp_y))

#     stacked_list = list()

#     for m in range(M):
#         stacked_list.append(stacked_X[m])

#     for t in range(lagged_T):
#         X[t] = np.squeeze(np.dstack(tuple(stacked_list)))[t].T

#     for s in series:
#         y_series.append(s[p:])

#     y_own = np.array(y_series)
#     X_own = X
    
#     return y_own, X_own

### Static DGP

In [42]:
# y_0 = 5
# y_series = np.zeros(T+1)
# phi = 0.5
# sigma = 0.5
# location = 0

# for i in range(T+1):

#     if i == 0:
        
#         y_series[i] = phi*y_0 + np.random.normal(location, sigma)
        
#     else:
        
#         y_series[i] = phi*y_series[i-1] + np.random.normal(location, sigma)


In [43]:
# #y_series = (y_series - y_series.mean())/y_series.std()

# x = y_series[:-1]
# y = y_series[1:]

In [None]:
# tvp_ar = TVPVARModel(np.expand_dims(x,1),np.expand_dims(y,1), p, train, False, homoskedastic=True)
# tvp_ar.k = 1
# tvp_ar.iterations = 25
# tvp_ar.initialize_priors(prior='horseshoe')#,prior_parameters={'b0': 2, 'a0': 2})
# mt1t, st1t  = tvp_ar.train(print_status=True)
# #mt1t_mean_set.append(mt1t)
# #sigma_set.append(tvp_ar.sigma_t

### DGP according to Koop & Korobilis (2020) - MATLAB

In [None]:
y_matlab_dict = io.loadmat('../../Gary Koop & Dimitris Korobilis (2020) - VBDVS/MONTE_CARLO/y.mat')
x_matlab_dict = io.loadmat('../../Gary Koop & Dimitris Korobilis (2020) - VBDVS/MONTE_CARLO/x.mat')

y_matlab = y_matlab_dict['y']
x_matlab = x_matlab_dict['x']

### TVP-AR

Own implementation

### Simulation of the DGP

In [None]:
import pickle

different_m = [2,5,10]

for M in different_m:

    n_iterations = 200
    np.random.seed(12345)

    # M = 10
    T = 200
    p = 1
    train = 150
    prior = 'lasso'

    contemperanous_statistics = []
    non_contemperanous_statistics = []

    for run in range(n_iterations):

        y, coefficients = generate_dgp_tvp_var(M, T, p, 1/2, 1/4, 4*1e-5, 1, 1/2, 1e-2)
        y_matrix, X_matrix = generate_matrices(T, M, p, y)
        y_matrix_contemp, X_matrix_contemp = generate_contemp_matrices(T, M, p, y)

#         # Without contemperanous values
#         msfe, alpl, mt1t, sigma = tvp_ar_non_contemp(T, M, p, train, X_matrix, y_matrix)
#         msd = np.mean((mt1t - coefficients[:,1:train])**2)
#         non_contemperanous_statistics.append([msfe, alpl, msd])

        # Contemperanous values added 
        msfe_contemp, alpl_contemp, mt1t_full_contemp, mt1t_coeff_contemp, sigma_contemp = tvp_ar_contemp(T, M, p, train, X_matrix_contemp, y_matrix_contemp)
        msd_contemp = np.mean((mt1t_coeff_contemp - coefficients[:,1:train])**2)    

        contemperanous_statistics.append([msfe_contemp, alpl_contemp, msd_contemp])

        if ((run+1) % (n_iterations/20) == 0.0):
            print(f'Run {run+1} -> MSD: {msd} - {msd_contemp} | MSFE: {msfe.mean()} - {msfe_contemp.mean()} | ALPL: {alpl.mean()} - {alpl_contemp.mean()}')

    # Save simulation parameters
    simulation_parameters = [M,T,p,train, 1/2, 1/4, 4*1e-5, 1, 1/2, 1e-2, 12345, prior]

    simulation_statistics = [contemperanous_statistics, simulation_parameters]

    with open(f'../simulations/statistics_{M}_{T}_{p}_{prior}_{n_iterations}_contemp.pkl', 'wb') as f:
        pickle.dump(simulation_statistics, f, pickle.HIGHEST_PROTOCOL)

In [18]:
def simulation_run(run):
    
    start_time = time.time()
    
    M = 2
    T = 200
    p = 1
    train = 150

    y, coefficients = generate_dgp_tvp_var(M, T, p, 1/2, 1/4, 4*1e-5, 1, 1/2, 1e-2)
    y_matrix_contemp, X_matrix_contemp = generate_contemp_matrices(T, M, p, y)

    # Contemperanous values added 
    msfe_contemp, alpl_contemp, mt1t_full_contemp, mt1t_coeff_contemp, sigma_contemp = tvp_ar_contemp(T, M, p, train, X_matrix_contemp, y_matrix_contemp)
    msd_contemp = np.mean((mt1t_coeff_contemp - coefficients[:,1:train])**2)
    
    elapsed_time = time.time() - start_time
    
    print(f'Run: {run} | elapsed for run: {round(elapsed_time,4)}')
    
    return [msfe_contemp, alpl_contemp, msd_contemp]

In [12]:
import warnings
warnings.filterwarnings("ignore")

In [16]:
M = 2
T = 200
p = 1
train = 150
prior = 'lasso'

contemperanous_statistics = []

simulation_run(M,T,p)

[array([1.32723715, 1.31447714, 1.38644336, 1.479075  , 1.37413676,
        1.38149867, 1.36640555, 1.37664842]),
 array([0.26692014, 0.23180799, 0.23751774, 0.21921769, 0.35985002,
        0.39586062, 0.05366612, 0.        ]),
 0.028054248195413554]

In [None]:
%%time
import time
import os
from multiprocessing import Pool, Array


# They are going to be some warnings for the first iterations of the model (has to do with initialization)
import warnings
warnings.filterwarnings("ignore")

iterations = np.arange(1,16,1)

M = 2

n_iterations = 200
np.random.seed(11111)

# M = 10
T = 200
p = 1
train = 150
prior = 'lasso'

if __name__ == '__main__':
    
    pool = Pool()
    simulation = pool.map(simulation_run, iterations)

In [22]:
np.block(simulation[0])[0:8]

array([0.94535682, 1.27283063, 1.32795231, 1.27587858, 1.21390982,
       1.24119635, 1.23151354, 1.20167439])

In [43]:
different_m = [2,5,10]

for M in different_m:

    n_iterations = 200
    np.random.seed(12345)

    # M = 10
    T = 200
    p = 1
    train = 150
    prior = 'lasso'

    contemperanous_statistics = []
    non_contemperanous_statistics = []

    for run in range(n_iterations):

        y, coefficients = generate_dgp_tvp_var(M, T, p, 1/2, 1/4, 4*1e-5, 1, 1/2, 1e-2)
    
    x = y[:,:-1]
    np.savetxt(f'../simulations/datasets/y_{M}_{T}_{p}_{prior}_{run}.csv',y[:,1:].T, delimiter=",")
    np.savetxt(f'../simulations/datasets/x_{M}_{T}_{p}_{prior}_{run}.csv',x.T, delimiter=",")
    np.savetxt(f'../simuations/datasets/coefficients_{M}_{T}_{p}_{prior}_{run}.csv', coefficients[:,1:], delimiter=",")

### Other stuff

In [None]:
total_h = 8

msfe_rw = np.zeros(total_h)

for h in range(total_h):

    msfe_rw[h] = np.mean((y_own[:,(train-1+h):] - y_own[:,(train-2):-(h+1)])**2,1)[0]
    ratio = msfe_ar[h]/msfe_rw[h]
    
    print(f'Ratio VAR_IE_VI/RW ({h+1}-step ahead): {np.round(ratio,4)}')
    

In [None]:
np.mean((np.vstack(mt1t_mean_set)[:,:] - A_1_vec[:,1:train])**2)

In [None]:
print(f'VAR_IE_VI: {full_mt1t.mean(1)}')
print(f'TRUE: {A_1_vec[:,1:train].mean(1)}')

In [None]:
plt.plot(full_mt1t[1,:])

In [None]:
# tvp_ar = TVPVARModel(np.expand_dims(x_matlab,1),y_matlab, p, 300, False, homoskedastic=False)
# tvp_ar.k = 10
# tvp_ar.iterations = 1000
# tvp_ar.initialize_priors(prior='svss',prior_parameters={'g0': 1, 'h0': 12, 'pi0': 0.5})
# mt1t, st1t = tvp_ar.train(threshold=1e-200)

In [None]:
MSD = ((mt1t - A_1_vec[0:M,1:-1])**2).mean()
insample_msfe_ar = tvp_ar.insample_msfe()

print(f'MSD: {MSD} | insample MSFE: {insample_msfe_ar}')


In [None]:
tvp_ar.sigma_t


In [None]:
total_h = 8

msfe_ar, aapl_ar = tvp_ar.calculate_metrics(total_h, constant=False)

In [None]:
ratio_msfe = np.zeros(total_h)
for h in range(total_h):
    ratio_msfe[h] = msfe_ar[h]/np.mean((y_own[:,(train-1+h):] - y_own[:,(train-2):-(h+1)])**2,1)[0]
    print(f'Ratio AR_VI/RW ({h+1}-step ahead): {np.round(ratio_msfe[h],4)}')
    

In [235]:
full_mt1t.mean(1)

(400,)

In [None]:
A_1_vec[:M,:].mean(1)

### TVP-VAR

Default implementation from statsmodels


In [None]:
import statsmodels.api as sm
from statsmodels.tsa.api import VAR

var_ols = VAR(y.T)
results_var = var_ols.fit(1, trend='nc')
results_var.summary()

In [243]:
np.squeeze(results_var.coefs).reshape(M*M)

ValueError: operands could not be broadcast together with shapes (400,) (400,149) 

In [262]:
msd_const = np.mean((np.repeat(np.squeeze(results_var.coefs).reshape(M*M),149).reshape(M*M,149) - A_1_vec[:,1:train])**2)

In [263]:
msd_tvp = np.mean((full_mt1t - A_1_vec[:,1:train])**2)

In [265]:
msd_tvp/msd_const

1.817871705297146

Own implementation

In [None]:
tvp_var = TVPVARModel(X_own, y_own.T, p, train, False, homoskedastic=True)
tvp_var.iterations = 50
tvp_var.initialize_priors(prior='lasso')#, prior_parameters = {'a0':10, 'b0':10})
mt1t, __ = tvp_var.train()

In [None]:
tvp_var.bt_h

In [None]:
MSD = ((mt1t - A_1_vec[:,1:train])**2).mean()
insample_msfe_var = tvp_var.insample_msfe()

print(f'MSD: {MSD} | insample MSFE: {insample_msfe_var}')

Visualise the first coefficient *(estimated versus true)*

In [None]:
plt.plot(mt1t[0,:])

In [None]:
plt.plot(A_1_vec[0,1:])

In [None]:
plt.plot(tvp_var.sigma_t[:,0])

In [None]:
total_h = 8

msfe_var, aapl_var = tvp_var.calculate_metrics(total_h, constant=False)


In [None]:
ratio_msfe = np.zeros((total_h,M))
for h in range(total_h):
    ratio_msfe[h,:] = msfe_var[h]/np.mean((y_own[:,(train-1+h):] - y_own[:,(train-2):-(h+1)])**2,1)[0]
    print(f'Ratio VAR_VI/RW ({h+1}-step ahead): {np.round(ratio_msfe[h].mean(),4)}')
    

In [None]:
msfe_var.mean(1)


In [None]:
plt.plot(tvp_var.y_pred[:,0,0])


In [None]:
plt.plot(y_own[0,train:])
