### Simulation study

This notebook contains all the code to conduct the simulation study for the VI-based TVP-BVARS. 

The three different priors are:

- Stochastic Variable Search and Selection (SVSS)
- Least Absolute Shrinkage and Selection Operator (Lasso)
- Horseshoe

A note of caution, this code runs completely parallel and uses all the available cores on the system that it is running on. Therefore, don't be afraid if your laptop starts to float (or freeze) this is normal. 
You can terminate the notebook in the terminal using Ctrl+C, this will close all the Python processes associated with this notebook.

In [None]:
%matplotlib inline
import time
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm, multivariate_normal
from scipy import io
import pickle

# Own code
sys.path.append("../")
from utils.data_utils import generate_dgp_tvp_var, generate_matrices, generate_contemp_matrices
from utils.tvp_models import TVPVARModel, tvp_ar_contemp, tvp_ar_non_contemp, tvp_ar_contemp_decomposition

# Suppress scientific notation in numpy
np.set_printoptions(suppress=True)

In [None]:
# The only parameter that has to change for a different prior, is this global prior variable. All the 8 scenarios are calculated below.
prior = 'lasso_alternative'

In [None]:
def simulation_run(run, M, prior):
    
    start_time = time.time()
    
    # Fixed for simulation
    T = 200
    p = 1
    sparsity = 0.40
    
    train = T - 25
    
    y = np.loadtxt(f'../simulations/datasets/y_{M}_{T}_{p}_{sparsity}_{run}_het_py.csv', delimiter=",")
    coefficients = np.loadtxt(f'../simulations/datasets/coefficients_{M}_{T}_{p}_{sparsity}_{run}_het_py.csv', delimiter=",")
    
    y_matrix_contemp, X_matrix_contemp = generate_contemp_matrices(T, M, p, y)
    
    # Contemperanous values added 
    model = tvp_ar_contemp_decomposition(T-p, M, p, train, X_matrix_contemp, y_matrix_contemp, prior, print_status=False, iterations=25)
    msfe_contemp, alpl_contemp, coeff_contemp, sigma_contemp, *_ = model.result()
    
    msd_contemp = np.mean((coefficients[:,:train].reshape(M,M, train) - coeff_contemp[:,1:,:train])**2)
    
    elapsed_time = time.time() - start_time
    
    print(f'Run: {run}, M: {M} & prior: {prior} -> MSD: {msd_contemp} | MSFE: {msfe_contemp.mean()} | ALPL: {alpl_contemp.mean()}')
    
    return [msfe_contemp, alpl_contemp, msd_contemp, M, prior]

In [None]:
%%time
import time
import os
from multiprocessing import Pool, Array

# They are going to be some disgusting warnings for the first iterations of the model (has to do with initialization)
import warnings
warnings.filterwarnings("ignore")

np.random.seed(12345)
n_iterations = 200

iterations_set = np.arange(1,n_iterations+1,1)
M = (3,7)
prior_list = [prior]#, "lasso_alternative", "horseshoe"]
args_tuples = [x for sub_list in [x for sub_list in [[[(n, m, prior) for n in iterations_set] for m in M] for prior in prior_list] for x in sub_list] for x in sub_list]

if __name__ == '__main__':
    
    pool = Pool()
    simulation_results = pool.starmap(simulation_run, args_tuples)

In [None]:
T = 200
train = 175
p = 1
sparsity = 0.40

simulation_parameters = [T,p,train, 1/3, 1/9, 4*1e-5, 1/6, 1/2, sparsity, 1e-2, 1e-9, 12345]

dump_to_disk = [simulation_results, simulation_parameters]

with open(f'../simulations/results/statistics_{T}_{p}_{n_iterations}_{prior}_{sparsity}_huber_2.pkl', 'wb') as f:
        pickle.dump(dump_to_disk, f, pickle.HIGHEST_PROTOCOL)
        

T = 200, SPARSITY = 0.20

In [None]:
def simulation_run(run, M, prior):
    
    start_time = time.time()
    
    # Fixed for simulation|
    T = 200
    p = 1
    train = T - 25
    sparsity = 0.20
    
    y = np.loadtxt(f'../simulations/datasets/y_{M}_{T}_{p}_{sparsity}_{run}_het_py.csv', delimiter=",")
    coefficients = np.loadtxt(f'../simulations/datasets/coefficients_{M}_{T}_{p}_{sparsity}_{run}_het_py.csv', delimiter=",")
    
    y_matrix_contemp, X_matrix_contemp = generate_contemp_matrices(T, M, p, y)
    
    # Contemperanous values added 
    model = tvp_ar_contemp_decomposition(T-p, M, p, train, X_matrix_contemp, y_matrix_contemp, prior, print_status=False, iterations=25)
    msfe_contemp, alpl_contemp, coeff_contemp, sigma_contemp = model.result()
    
    msd_contemp = np.mean((coefficients[:,:train].reshape(M,M, train) - coeff_contemp[:,1:,:train])**2)
    
    elapsed_time = time.time() - start_time
    
    print(f'Run: {run}, M: {M} & prior: {prior} -> MSD: {msd_contemp} | MSFE: {msfe_contemp.mean()} | ALPL: {alpl_contemp.mean()}')
    
    return [msfe_contemp, alpl_contemp, msd_contemp, M, prior]

In [None]:
%%time
import time
import os
from multiprocessing import Pool, Array

# They are going to be some disgusting warnings for the first iterations of the model (has to do with initialization)
import warnings
warnings.filterwarnings("ignore")

np.random.seed(12345)
n_iterations = 200

iterations_set = np.arange(1,n_iterations+1,1)
M = (3,7)
prior_list = [prior]
args_tuples = [x for sub_list in [x for sub_list in [[[(n, m, prior) for n in iterations_set] for m in M] for prior in prior_list] for x in sub_list] for x in sub_list]

if __name__ == '__main__':
    
    pool = Pool()
    simulation_results = pool.starmap(simulation_run, args_tuples)

In [None]:
T = 200
train = 175
p = 1
sparsity = 0.20

simulation_parameters = [T,p,train, 1/3, 1/9, 4*1e-5, 1/6, 1/2, sparsity, 1e-2, 1e-9, 12345]

dump_to_disk = [simulation_results, simulation_parameters]

with open(f'../simulations/results/statistics_{T}_{p}_{n_iterations}_{prior}_{sparsity}_huber.pkl', 'wb') as f:
        pickle.dump(dump_to_disk, f, pickle.HIGHEST_PROTOCOL)
        

T = 100, SPARSITY = 0.40

In [None]:
def simulation_run(run, M, prior):
    
    start_time = time.time()
    
    # Fixed for simulation
    T = 100
    p = 1
    train = 75
    sparsity = 0.40
    
    y = np.loadtxt(f'../simulations/datasets/y_{M}_{T}_{p}_{sparsity}_{run}_het_py.csv', delimiter=",")
    coefficients = np.loadtxt(f'../simulations/datasets/coefficients_{M}_{T}_{p}_{sparsity}_{run}_het_py.csv', delimiter=",")
    
    y_matrix_contemp, X_matrix_contemp = generate_contemp_matrices(T, M, p, y)
    
    # Contemperanous values added 
    model = tvp_ar_contemp_decomposition(T-p, M, p, train, X_matrix_contemp, y_matrix_contemp, prior, print_status=False, iterations=25)
    msfe_contemp, alpl_contemp, coeff_contemp, sigma_contemp = model.result()
    
    msd_contemp = np.mean((coefficients[:,:train].reshape(M,M, train) - coeff_contemp[:,1:,:train])**2)
    
    elapsed_time = time.time() - start_time
    
    print(f'Run: {run}, M: {M} & prior: {prior} -> MSD: {msd_contemp} | MSFE: {msfe_contemp.mean()} | ALPL: {alpl_contemp.mean()}')
    
    return [msfe_contemp, alpl_contemp, msd_contemp, M, prior]

In [None]:
%%time
import time
import os
from multiprocessing import Pool, Array

# They are going to be some disgusting warnings for the first iterations of the model (has to do with initialization)
import warnings
warnings.filterwarnings("ignore")

np.random.seed(12345)
n_iterations = 200

iterations_set = np.arange(1,n_iterations+1,1)
M = (3,7)
prior_list = [prior]#, "lasso_alternative", "horseshoe"]
args_tuples = [x for sub_list in [x for sub_list in [[[(n, m, prior) for n in iterations_set] for m in M] for prior in prior_list] for x in sub_list] for x in sub_list]

if __name__ == '__main__':
    
    pool = Pool()
    simulation_results = pool.starmap(simulation_run, args_tuples)

In [None]:
T = 100
train = 75
p = 1
sparsity = 0.40

simulation_parameters = [T,p,train, 1/3, 1/9, 4*1e-5, 1/6, 1/2, sparsity, 1e-2, 1e-9, 12345]

dump_to_disk = [simulation_results, simulation_parameters]

with open(f'../simulations/results/statistics_{T}_{p}_{n_iterations}_{prior}_{sparsity}_huber.pkl', 'wb') as f:
        pickle.dump(dump_to_disk, f, pickle.HIGHEST_PROTOCOL)

T = 100, SPARSITY = 0.20

In [None]:
def simulation_run(run, M, prior):
    
    start_time = time.time()
    
    # Fixed for simulation
    T = 100
    p = 1
    train = 75
    sparsity = 0.20
    
    y = np.loadtxt(f'../simulations/datasets/y_{M}_{T}_{p}_{sparsity}_{run}_het_py.csv', delimiter=",")
    coefficients = np.loadtxt(f'../simulations/datasets/coefficients_{M}_{T}_{p}_{sparsity}_{run}_het_py.csv', delimiter=",")
    
    y_matrix_contemp, X_matrix_contemp = generate_contemp_matrices(T, M, p, y)
    
    # Contemperanous values added 
    model = tvp_ar_contemp_decomposition(T-p, M, p, train, X_matrix_contemp, y_matrix_contemp, prior, print_status=False, iterations=25)
    msfe_contemp, alpl_contemp, coeff_contemp, sigma_contemp = model.result()
    
    msd_contemp = np.mean((coefficients[:,:train].reshape(M,M, train) - coeff_contemp[:,1:,:train])**2)
    
    elapsed_time = time.time() - start_time
    
    print(f'Run: {run}, M: {M} & prior: {prior} -> MSD: {msd_contemp} | MSFE: {msfe_contemp.mean()} | ALPL: {alpl_contemp.mean()}')
    
    return [msfe_contemp, alpl_contemp, msd_contemp, M, prior]

In [None]:
%%time
import time
import os
from multiprocessing import Pool, Array

# They are going to be some disgusting warnings for the first iterations of the model (has to do with initialization)
import warnings
warnings.filterwarnings("ignore")

np.random.seed(12345)
n_iterations = 200

iterations_set = np.arange(1,n_iterations+1,1)
M = (3,7)
prior_list = [prior]#, "lasso_alternative", "horseshoe"]
args_tuples = [x for sub_list in [x for sub_list in [[[(n, m, prior) for n in iterations_set] for m in M] for prior in prior_list] for x in sub_list] for x in sub_list]

if __name__ == '__main__':
    
    pool = Pool()
    simulation_results = pool.starmap(simulation_run, args_tuples)

In [None]:
T = 100
train = 75
p = 1
sparsity = 0.20

simulation_parameters = [T,p,train, 1/3, 1/9, 4*1e-5, 1/6, 1/2, sparsity, 1e-2, 1e-9, 12345]

dump_to_disk = [simulation_results, simulation_parameters]

with open(f'../simulations/results/statistics_{T}_{p}_{n_iterations}_{prior}_{sparsity}_huber.pkl', 'wb') as f:
        pickle.dump(dump_to_disk, f, pickle.HIGHEST_PROTOCOL)

In [None]:
optimal_set = []

for run in range(1,11):

    # Fixed for simulation
    T = 200
    p = 1
    train = 175
    prior_train = train-10
    sparsity = 0.05
    M = 2
    prior = "lasso_alternative"

    y = np.loadtxt(f'../simulations/datasets/y_{M}_{T}_{p}_{sparsity}_{run}_het_py.csv', delimiter=",")
    coefficients = np.loadtxt(f'../simulations/datasets/coefficients_{M}_{T}_{p}_{sparsity}_{run}_het_py.csv', delimiter=",")

    y_matrix_contemp, X_matrix_contemp = generate_contemp_matrices(T, M, p, y)

    prior_parameters = None

    a0_set = np.linspace(1,5,5)
    b0_set = np.hstack(([1], np.linspace(3,12,4)))
    tpl_list = [x for sub_list in [[(a,b) for a in a0_set] for b in b0_set] for x in sub_list]

    msfe_list = []

    for parameters in tpl_list:

        optim_pior_parameters = {'a0_lasso':parameters[0],'b0_lasso':parameters[1]}
        msfe_contemp, alpl_contemp, mt1t_full_contemp, mt1t_coeff_contemp, sigma_contemp, ar_model = tvp_ar_contemp(train, M, p, prior_train, X_matrix_contemp[:train], y_matrix_contemp[:,:train], prior, print_status=False, iterations=100, prior_parameters=optim_pior_parameters)

        msfe_list.append(msfe_contemp.mean())

    optimal_prior = tpl_list[msfe_list.index(min(msfe_list))]
    prior_parameters = {'a0_lasso':optimal_prior[0],'b0_lasso':optimal_prior[1]}

    print(f"Run: {run} -> Optimal prior: {optimal_prior}")
    optimal_set.append(optimal_prior)
    

In [None]:
def simulation_run(run, M, prior):
    
    start_time = time.time()
    
    # Fixed for simulation
    T = 200
    p = 1
    train = 175
    prior_train = train-10
    sparsity = 0.05
    
    y = np.loadtxt(f'../simulations/datasets/y_{M}_{T}_{p}_{sparsity}_{run}_het_py.csv', delimiter=",")
    coefficients = np.loadtxt(f'../simulations/datasets/coefficients_{M}_{T}_{p}_{sparsity}_{run}_het_py.csv', delimiter=",")
    
    y_matrix_contemp, X_matrix_contemp = generate_contemp_matrices(T, M, p, y)
    
#     prior_parameters = None
    
#     if prior == 'lasso_alternative':
        
#         a0_set = np.linspace(1,5,5)
#         b0_set = np.hstack(([1], np.linspace(3,12,4)))
#         tpl_list = [x for sub_list in [[(a,b) for a in a0_set] for b in b0_set] for x in sub_list]
        
#         msfe_list = []
        
#         for parameters in tpl_list:
            
#             optim_pior_parameters = {'a0_lasso':parameters[0],'b0_lasso':parameters[1]}
#             msfe_contemp, alpl_contemp, mt1t_full_contemp, mt1t_coeff_contemp, sigma_contemp, ar_model = tvp_ar_contemp(train, M, p, prior_train, X_matrix_contemp[:train], y_matrix_contemp[:,:train], prior, print_status=False, iterations=100, prior_parameters=optim_pior_parameters)
            
#             msfe_list.append(msfe_contemp.mean())
        
#         optimal_prior = tpl_list[msfe_list.index(min(msfe_list))]
#         prior_parameters = {'a0_lasso':optimal_prior[0],'b0_lasso':optimal_prior[1]}
        
#     elif prior == 'svss':
        
#         g0_set = np.linspace(1,5,5)
#         h0_set = np.hstack(([1], np.linspace(3,12,4)))
#         tpl_list = [x for sub_list in [[(g,h) for g in g0_set] for h in h0_set] for x in sub_list]
        
#         msfe_list = []
        
#         for parameters in tpl_list:
            
#             optim_pior_parameters = {'g0':parameters[0],'h0':parameters[1], 'pi0': 0.5}
#             msfe_contemp, alpl_contemp, mt1t_full_contemp, mt1t_coeff_contemp, sigma_contemp, ar_model = tvp_ar_contemp(train, M, p, prior_train, X_matrix_contemp[:train], y_matrix_contemp[:,:train], prior, print_status=False, iterations=100, prior_parameters=optim_pior_parameters)
            
#             msfe_list.append(msfe_contemp.mean())
        
#         optimal_prior = tpl_list[msfe_list.index(min(msfe_list))]
#         prior_parameters = {'g0': optimal_prior[0], 'h0': optimal_prior[1], 'pi0': 0.5}
        
#     elif prior == 'horseshoe':
#         prior_parameters = {'a0': 1, 'b0': 1}
        
#         a0_set = np.hstack(([1.5], np.linspace(3,12,4)))
#         b0_set = np.hstack(([1.5],np.linspace(1,5,5)))
#         tpl_list = [x for sub_list in [[(a,b) for a in a0_set] for b in b0_set] for x in sub_list]
        
#         msfe_list = []
        
#         for parameters in tpl_list:
            
#             optim_pior_parameters = {'a0':parameters[0],'b0':parameters[1]}
#             msfe_contemp, alpl_contemp, mt1t_full_contemp, mt1t_coeff_contemp, sigma_contemp, ar_model = tvp_ar_contemp(train, M, p, prior_train, X_matrix_contemp[:train], y_matrix_contemp[:,:train], prior, print_status=False, iterations=100, prior_parameters=optim_pior_parameters)
            
#             msfe_list.append(msfe_contemp.mean())
        
#         optimal_prior = tpl_list[msfe_list.index(min(msfe_list))]
#         prior_parameters = {'a0':optimal_prior[0],'b0':optimal_prior[1]}
    
    # Contemperanous values added 
    msfe_contemp, alpl_contemp, mt1t_full_contemp, mt1t_coeff_contemp, sigma_contemp, ar_model = tvp_ar_contemp(T, M, p, train, X_matrix_contemp, y_matrix_contemp, prior, print_status=False, iterations=100)
    msd_contemp = np.mean((mt1t_coeff_contemp - coefficients[:,1:train])**2)
    
    elapsed_time = time.time() - start_time
    
    print(f'Run: {run}, M: {M} & prior: {prior} -> MSD: {msd_contemp} | MSFE: {msfe_contemp.mean()} | ALPL: {alpl_contemp.mean()}')
    
    return [msfe_contemp, alpl_contemp, msd_contemp, M, prior]