In [1]:
%matplotlib inline
import time
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm, multivariate_normal
from scipy import io
import pickle

# Own code
sys.path.append("../")
from utils.data_utils import generate_dgp_tvp_var, generate_matrices, generate_contemp_matrices
from utils.tvp_models import TVPVARModel, tvp_ar_contemp, tvp_ar_non_contemp, tvp_ar_contemp_decomposition

# Suppress scientific notation in numpy
np.set_printoptions(suppress=True)



In [2]:
# Set prior for all simulation runs
prior = 'lasso_alternative'


In [6]:
def simulation_run(run, M, prior):
    
    start_time = time.time()
    
    # Fixed for simulation
    T = 200
    p = 1
    sparsity = 0.40
    
    train = T - 25
    
    y = np.loadtxt(f'../simulations/datasets/y_{M}_{T}_{p}_{sparsity}_{run}_het_py.csv', delimiter=",")
    coefficients = np.loadtxt(f'../simulations/datasets/coefficients_{M}_{T}_{p}_{sparsity}_{run}_het_py.csv', delimiter=",")
    
    y_matrix_contemp, X_matrix_contemp = generate_contemp_matrices(T, M, p, y)
    
    # Contemperanous values added 
    model = tvp_ar_contemp_decomposition(T-p, M, p, train, X_matrix_contemp, y_matrix_contemp, prior, print_status=False, iterations=25)
    msfe_contemp, alpl_contemp, coeff_contemp, sigma_contemp, *_ = model.result()
    
    msd_contemp = np.mean((coefficients[:,:train].reshape(M,M, train) - coeff_contemp[:,1:,:train])**2)
    
    elapsed_time = time.time() - start_time
    
    print(f'Run: {run}, M: {M} & prior: {prior} -> MSD: {msd_contemp} | MSFE: {msfe_contemp.mean()} | ALPL: {alpl_contemp.mean()}')
    
    return [msfe_contemp, alpl_contemp, msd_contemp, M, prior]

In [7]:
%%time
import time
import os
from multiprocessing import Pool, Array

# They are going to be some disgusting warnings for the first iterations of the model (has to do with initialization)
import warnings
warnings.filterwarnings("ignore")

np.random.seed(12345)
n_iterations = 200

iterations_set = np.arange(1,n_iterations+1,1)
M = (3,7)
prior_list = [prior]#, "lasso_alternative", "horseshoe"]
args_tuples = [x for sub_list in [x for sub_list in [[[(n, m, prior) for n in iterations_set] for m in M] for prior in prior_list] for x in sub_list] for x in sub_list]

if __name__ == '__main__':
    
    pool = Pool()
    simulation_results = pool.starmap(simulation_run, args_tuples)

Run: 106, M: 3 & prior: lasso_alternative -> MSD: 0.025339772391327443 | MSFE: 1.0769863860039823 | ALPL: -26.8468815721512
Run: 29, M: 3 & prior: lasso_alternative -> MSD: 0.02943851612733063 | MSFE: 1.499757901273027 | ALPL: -29.758700072390518
Run: 1, M: 3 & prior: lasso_alternative -> MSD: 0.023068513923706468 | MSFE: 1.5746530153820233 | ALPL: -27.215973044175062
Run: 64, M: 3 & prior: lasso_alternative -> MSD: 0.015059187247618523 | MSFE: 2.7435834500762137 | ALPL: -29.572650932869966
Run: 43, M: 3 & prior: lasso_alternative -> MSD: 0.023871570697395133 | MSFE: 1.0369753705789 | ALPL: -25.336590799439463
Run: 92, M: 3 & prior: lasso_alternative -> MSD: 0.02251963847393826 | MSFE: 5.956788553855162 | ALPL: -29.72970050187565
Run: 71, M: 3 & prior: lasso_alternative -> MSD: 0.024477318572071246 | MSFE: 0.963368654435617 | ALPL: -28.15724963841419
Run: 8, M: 3 & prior: lasso_alternative -> MSD: 0.02705725174791797 | MSFE: 1.1283172174312601 | ALPL: -29.95469486125686
Run: 50, M: 3 &

In [8]:
T = 200
train = 175
p = 1
sparsity = 0.40

simulation_parameters = [T,p,train, 1/3, 1/9, 4*1e-5, 1/6, 1/2, sparsity, 1e-2, 1e-9, 12345]

dump_to_disk = [simulation_results, simulation_parameters]

with open(f'../simulations/results/statistics_{T}_{p}_{n_iterations}_{prior}_{sparsity}_huber_2.pkl', 'wb') as f:
        pickle.dump(dump_to_disk, f, pickle.HIGHEST_PROTOCOL)
        

T = 200, SPARSITY = 0.20

In [15]:
def simulation_run(run, M, prior):
    
    start_time = time.time()
    
    # Fixed for simulation|
    T = 200
    p = 1
    train = T - 25
    sparsity = 0.20
    
    y = np.loadtxt(f'../simulations/datasets/y_{M}_{T}_{p}_{sparsity}_{run}_het_py.csv', delimiter=",")
    coefficients = np.loadtxt(f'../simulations/datasets/coefficients_{M}_{T}_{p}_{sparsity}_{run}_het_py.csv', delimiter=",")
    
    y_matrix_contemp, X_matrix_contemp = generate_contemp_matrices(T, M, p, y)
    
    # Contemperanous values added 
    model = tvp_ar_contemp_decomposition(T-p, M, p, train, X_matrix_contemp, y_matrix_contemp, prior, print_status=False, iterations=25)
    msfe_contemp, alpl_contemp, coeff_contemp, sigma_contemp = model.result()
    
    msd_contemp = np.mean((coefficients[:,:train].reshape(M,M, train) - coeff_contemp[:,1:,:train])**2)
    
    elapsed_time = time.time() - start_time
    
    print(f'Run: {run}, M: {M} & prior: {prior} -> MSD: {msd_contemp} | MSFE: {msfe_contemp.mean()} | ALPL: {alpl_contemp.mean()}')
    
    return [msfe_contemp, alpl_contemp, msd_contemp, M, prior]

In [16]:
%%time
import time
import os
from multiprocessing import Pool, Array

# They are going to be some disgusting warnings for the first iterations of the model (has to do with initialization)
import warnings
warnings.filterwarnings("ignore")

np.random.seed(12345)
n_iterations = 200

iterations_set = np.arange(1,n_iterations+1,1)
M = (3,7)
prior_list = [prior]
args_tuples = [x for sub_list in [x for sub_list in [[[(n, m, prior) for n in iterations_set] for m in M] for prior in prior_list] for x in sub_list] for x in sub_list]

if __name__ == '__main__':
    
    pool = Pool()
    simulation_results = pool.starmap(simulation_run, args_tuples)

Run: 1, M: 3 & prior: horseshoe -> MSD: 0.02046429105415826 | MSFE: 1.68171682801187 | ALPL: -28.498134816518807
Run: 29, M: 3 & prior: horseshoe -> MSD: 0.026324315273252956 | MSFE: 1.0518461320577794 | ALPL: -27.070675273313775
Run: 71, M: 3 & prior: horseshoe -> MSD: 0.024528783626680695 | MSFE: 0.9547720226835491 | ALPL: -28.327308966425804
Run: 106, M: 3 & prior: horseshoe -> MSD: 0.02545498458785699 | MSFE: 1.044885347378615 | ALPL: -27.419579871680156
Run: 64, M: 3 & prior: horseshoe -> MSD: 0.015036662995255681 | MSFE: 2.7354282771201373 | ALPL: -29.834563867395687
Run: 43, M: 3 & prior: horseshoe -> MSD: 0.02015586275266593 | MSFE: 1.1319032851497637 | ALPL: -26.6945596724026
Run: 99, M: 3 & prior: horseshoe -> MSD: 0.024196589939279336 | MSFE: 1.6556832120008167 | ALPL: -28.557290292708892
Run: 57, M: 3 & prior: horseshoe -> MSD: 0.01582068594491665 | MSFE: 0.7373359385164735 | ALPL: -26.688238643255715
Run: 92, M: 3 & prior: horseshoe -> MSD: 0.022597825026982002 | MSFE: 5.8

In [17]:
T = 200
train = 175
p = 1
sparsity = 0.20

simulation_parameters = [T,p,train, 1/3, 1/9, 4*1e-5, 1/6, 1/2, sparsity, 1e-2, 1e-9, 12345]

dump_to_disk = [simulation_results, simulation_parameters]

with open(f'../simulations/results/statistics_{T}_{p}_{n_iterations}_{prior}_{sparsity}_huber.pkl', 'wb') as f:
        pickle.dump(dump_to_disk, f, pickle.HIGHEST_PROTOCOL)
        

T = 100, SPARSITY = 0.40

In [18]:
def simulation_run(run, M, prior):
    
    start_time = time.time()
    
    # Fixed for simulation
    T = 100
    p = 1
    train = 75
    sparsity = 0.40
    
    y = np.loadtxt(f'../simulations/datasets/y_{M}_{T}_{p}_{sparsity}_{run}_het_py.csv', delimiter=",")
    coefficients = np.loadtxt(f'../simulations/datasets/coefficients_{M}_{T}_{p}_{sparsity}_{run}_het_py.csv', delimiter=",")
    
    y_matrix_contemp, X_matrix_contemp = generate_contemp_matrices(T, M, p, y)
    
    # Contemperanous values added 
    model = tvp_ar_contemp_decomposition(T-p, M, p, train, X_matrix_contemp, y_matrix_contemp, prior, print_status=False, iterations=25)
    msfe_contemp, alpl_contemp, coeff_contemp, sigma_contemp = model.result()
    
    msd_contemp = np.mean((coefficients[:,:train].reshape(M,M, train) - coeff_contemp[:,1:,:train])**2)
    
    elapsed_time = time.time() - start_time
    
    print(f'Run: {run}, M: {M} & prior: {prior} -> MSD: {msd_contemp} | MSFE: {msfe_contemp.mean()} | ALPL: {alpl_contemp.mean()}')
    
    return [msfe_contemp, alpl_contemp, msd_contemp, M, prior]

In [19]:
%%time
import time
import os
from multiprocessing import Pool, Array

# They are going to be some disgusting warnings for the first iterations of the model (has to do with initialization)
import warnings
warnings.filterwarnings("ignore")

np.random.seed(12345)
n_iterations = 200

iterations_set = np.arange(1,n_iterations+1,1)
M = (3,7)
prior_list = [prior]#, "lasso_alternative", "horseshoe"]
args_tuples = [x for sub_list in [x for sub_list in [[[(n, m, prior) for n in iterations_set] for m in M] for prior in prior_list] for x in sub_list] for x in sub_list]

if __name__ == '__main__':
    
    pool = Pool()
    simulation_results = pool.starmap(simulation_run, args_tuples)

Run: 92, M: 3 & prior: horseshoe -> MSD: 0.033000002352520125 | MSFE: 1.0835066410063596 | ALPL: -28.56790930669623
Run: 50, M: 3 & prior: horseshoe -> MSD: 0.024332568869079214 | MSFE: 1.2747582637221178 | ALPL: -30.04995191953038
Run: 36, M: 3 & prior: horseshoe -> MSD: 0.0498925292720887 | MSFE: 1.0758308813570943 | ALPL: -30.177632382493268
Run: 8, M: 3 & prior: horseshoe -> MSD: 0.020257336080267956 | MSFE: 2.8921104817835976 | ALPL: -29.162853886250485
Run: 29, M: 3 & prior: horseshoe -> MSD: 0.01915782890470828 | MSFE: 0.5076417061019153 | ALPL: -29.644140434398572
Run: 64, M: 3 & prior: horseshoe -> MSD: 0.021517741697433233 | MSFE: 1.884724053686927 | ALPL: -29.76654092218758
Run: 1, M: 3 & prior: horseshoe -> MSD: 0.013524825569844584 | MSFE: 1.5118975444113802 | ALPL: -29.13066069423696
Run: 71, M: 3 & prior: horseshoe -> MSD: 0.021810798870728076 | MSFE: 1.2589286963993287 | ALPL: -28.538147985038414
Run: 22, M: 3 & prior: horseshoe -> MSD: 0.03592473831733181 | MSFE: 7.019

In [20]:
T = 100
train = 75
p = 1
sparsity = 0.40

simulation_parameters = [T,p,train, 1/3, 1/9, 4*1e-5, 1/6, 1/2, sparsity, 1e-2, 1e-9, 12345]

dump_to_disk = [simulation_results, simulation_parameters]

with open(f'../simulations/results/statistics_{T}_{p}_{n_iterations}_{prior}_{sparsity}_huber.pkl', 'wb') as f:
        pickle.dump(dump_to_disk, f, pickle.HIGHEST_PROTOCOL)

T = 100, SPARSITY = 0.20

In [21]:
def simulation_run(run, M, prior):
    
    start_time = time.time()
    
    # Fixed for simulation
    T = 100
    p = 1
    train = 75
    sparsity = 0.20
    
    y = np.loadtxt(f'../simulations/datasets/y_{M}_{T}_{p}_{sparsity}_{run}_het_py.csv', delimiter=",")
    coefficients = np.loadtxt(f'../simulations/datasets/coefficients_{M}_{T}_{p}_{sparsity}_{run}_het_py.csv', delimiter=",")
    
    y_matrix_contemp, X_matrix_contemp = generate_contemp_matrices(T, M, p, y)
    
    # Contemperanous values added 
    model = tvp_ar_contemp_decomposition(T-p, M, p, train, X_matrix_contemp, y_matrix_contemp, prior, print_status=False, iterations=25)
    msfe_contemp, alpl_contemp, coeff_contemp, sigma_contemp = model.result()
    
    msd_contemp = np.mean((coefficients[:,:train].reshape(M,M, train) - coeff_contemp[:,1:,:train])**2)
    
    elapsed_time = time.time() - start_time
    
    print(f'Run: {run}, M: {M} & prior: {prior} -> MSD: {msd_contemp} | MSFE: {msfe_contemp.mean()} | ALPL: {alpl_contemp.mean()}')
    
    return [msfe_contemp, alpl_contemp, msd_contemp, M, prior]

In [22]:
%%time
import time
import os
from multiprocessing import Pool, Array

# They are going to be some disgusting warnings for the first iterations of the model (has to do with initialization)
import warnings
warnings.filterwarnings("ignore")

np.random.seed(12345)
n_iterations = 200

iterations_set = np.arange(1,n_iterations+1,1)
M = (3,7)
prior_list = [prior]#, "lasso_alternative", "horseshoe"]
args_tuples = [x for sub_list in [x for sub_list in [[[(n, m, prior) for n in iterations_set] for m in M] for prior in prior_list] for x in sub_list] for x in sub_list]

if __name__ == '__main__':
    
    pool = Pool()
    simulation_results = pool.starmap(simulation_run, args_tuples)

Run: 1, M: 3 & prior: horseshoe -> MSD: 0.01338718729054073 | MSFE: 1.4509245343253327 | ALPL: -29.112465661392083
Run: 50, M: 3 & prior: horseshoe -> MSD: 0.02786360185308323 | MSFE: 1.1230366307498194 | ALPL: -30.485774990247066
Run: 36, M: 3 & prior: horseshoe -> MSD: 0.0498925292720887 | MSFE: 1.0758308813570943 | ALPL: -30.177632382493268
Run: 29, M: 3 & prior: horseshoe -> MSD: 0.019571220503789625 | MSFE: 0.5091957948485746 | ALPL: -29.367870620554378
Run: 8, M: 3 & prior: horseshoe -> MSD: 0.020137180705616545 | MSFE: 2.415172589920961 | ALPL: -29.0729567050111
Run: 15, M: 3 & prior: horseshoe -> MSD: 0.02438965564331813 | MSFE: 1.928068929013247 | ALPL: -30.011798379211385
Run: 71, M: 3 & prior: horseshoe -> MSD: 0.021810798870728076 | MSFE: 1.2589286963993287 | ALPL: -28.538147985038414
Run: 22, M: 3 & prior: horseshoe -> MSD: 0.03326793018538012 | MSFE: 6.749580639858401 | ALPL: -29.860004228053917
Run: 92, M: 3 & prior: horseshoe -> MSD: 0.041218506898165926 | MSFE: 1.12936

In [23]:
T = 100
train = 75
p = 1
sparsity = 0.20

simulation_parameters = [T,p,train, 1/3, 1/9, 4*1e-5, 1/6, 1/2, sparsity, 1e-2, 1e-9, 12345]

dump_to_disk = [simulation_results, simulation_parameters]

with open(f'../simulations/results/statistics_{T}_{p}_{n_iterations}_{prior}_{sparsity}_huber.pkl', 'wb') as f:
        pickle.dump(dump_to_disk, f, pickle.HIGHEST_PROTOCOL)

In [24]:
optimal_set = []

for run in range(1,11):

    # Fixed for simulation
    T = 200
    p = 1
    train = 175
    prior_train = train-10
    sparsity = 0.05
    M = 2
    prior = "lasso_alternative"

    y = np.loadtxt(f'../simulations/datasets/y_{M}_{T}_{p}_{sparsity}_{run}_het_py.csv', delimiter=",")
    coefficients = np.loadtxt(f'../simulations/datasets/coefficients_{M}_{T}_{p}_{sparsity}_{run}_het_py.csv', delimiter=",")

    y_matrix_contemp, X_matrix_contemp = generate_contemp_matrices(T, M, p, y)

    prior_parameters = None

    a0_set = np.linspace(1,5,5)
    b0_set = np.hstack(([1], np.linspace(3,12,4)))
    tpl_list = [x for sub_list in [[(a,b) for a in a0_set] for b in b0_set] for x in sub_list]

    msfe_list = []

    for parameters in tpl_list:

        optim_pior_parameters = {'a0_lasso':parameters[0],'b0_lasso':parameters[1]}
        msfe_contemp, alpl_contemp, mt1t_full_contemp, mt1t_coeff_contemp, sigma_contemp, ar_model = tvp_ar_contemp(train, M, p, prior_train, X_matrix_contemp[:train], y_matrix_contemp[:,:train], prior, print_status=False, iterations=100, prior_parameters=optim_pior_parameters)

        msfe_list.append(msfe_contemp.mean())

    optimal_prior = tpl_list[msfe_list.index(min(msfe_list))]
    prior_parameters = {'a0_lasso':optimal_prior[0],'b0_lasso':optimal_prior[1]}

    print(f"Run: {run} -> Optimal prior: {optimal_prior}")
    optimal_set.append(optimal_prior)
    

OSError: ../simulations/datasets/y_2_200_1_0.05_1_het_py.csv not found.

In [None]:
def simulation_run(run, M, prior):
    
    start_time = time.time()
    
    # Fixed for simulation
    T = 200
    p = 1
    train = 175
    prior_train = train-10
    sparsity = 0.05
    
    y = np.loadtxt(f'../simulations/datasets/y_{M}_{T}_{p}_{sparsity}_{run}_het_py.csv', delimiter=",")
    coefficients = np.loadtxt(f'../simulations/datasets/coefficients_{M}_{T}_{p}_{sparsity}_{run}_het_py.csv', delimiter=",")
    
    y_matrix_contemp, X_matrix_contemp = generate_contemp_matrices(T, M, p, y)
    
#     prior_parameters = None
    
#     if prior == 'lasso_alternative':
        
#         a0_set = np.linspace(1,5,5)
#         b0_set = np.hstack(([1], np.linspace(3,12,4)))
#         tpl_list = [x for sub_list in [[(a,b) for a in a0_set] for b in b0_set] for x in sub_list]
        
#         msfe_list = []
        
#         for parameters in tpl_list:
            
#             optim_pior_parameters = {'a0_lasso':parameters[0],'b0_lasso':parameters[1]}
#             msfe_contemp, alpl_contemp, mt1t_full_contemp, mt1t_coeff_contemp, sigma_contemp, ar_model = tvp_ar_contemp(train, M, p, prior_train, X_matrix_contemp[:train], y_matrix_contemp[:,:train], prior, print_status=False, iterations=100, prior_parameters=optim_pior_parameters)
            
#             msfe_list.append(msfe_contemp.mean())
        
#         optimal_prior = tpl_list[msfe_list.index(min(msfe_list))]
#         prior_parameters = {'a0_lasso':optimal_prior[0],'b0_lasso':optimal_prior[1]}
        
#     elif prior == 'svss':
        
#         g0_set = np.linspace(1,5,5)
#         h0_set = np.hstack(([1], np.linspace(3,12,4)))
#         tpl_list = [x for sub_list in [[(g,h) for g in g0_set] for h in h0_set] for x in sub_list]
        
#         msfe_list = []
        
#         for parameters in tpl_list:
            
#             optim_pior_parameters = {'g0':parameters[0],'h0':parameters[1], 'pi0': 0.5}
#             msfe_contemp, alpl_contemp, mt1t_full_contemp, mt1t_coeff_contemp, sigma_contemp, ar_model = tvp_ar_contemp(train, M, p, prior_train, X_matrix_contemp[:train], y_matrix_contemp[:,:train], prior, print_status=False, iterations=100, prior_parameters=optim_pior_parameters)
            
#             msfe_list.append(msfe_contemp.mean())
        
#         optimal_prior = tpl_list[msfe_list.index(min(msfe_list))]
#         prior_parameters = {'g0': optimal_prior[0], 'h0': optimal_prior[1], 'pi0': 0.5}
        
#     elif prior == 'horseshoe':
#         prior_parameters = {'a0': 1, 'b0': 1}
        
#         a0_set = np.hstack(([1.5], np.linspace(3,12,4)))
#         b0_set = np.hstack(([1.5],np.linspace(1,5,5)))
#         tpl_list = [x for sub_list in [[(a,b) for a in a0_set] for b in b0_set] for x in sub_list]
        
#         msfe_list = []
        
#         for parameters in tpl_list:
            
#             optim_pior_parameters = {'a0':parameters[0],'b0':parameters[1]}
#             msfe_contemp, alpl_contemp, mt1t_full_contemp, mt1t_coeff_contemp, sigma_contemp, ar_model = tvp_ar_contemp(train, M, p, prior_train, X_matrix_contemp[:train], y_matrix_contemp[:,:train], prior, print_status=False, iterations=100, prior_parameters=optim_pior_parameters)
            
#             msfe_list.append(msfe_contemp.mean())
        
#         optimal_prior = tpl_list[msfe_list.index(min(msfe_list))]
#         prior_parameters = {'a0':optimal_prior[0],'b0':optimal_prior[1]}
    
    # Contemperanous values added 
    msfe_contemp, alpl_contemp, mt1t_full_contemp, mt1t_coeff_contemp, sigma_contemp, ar_model = tvp_ar_contemp(T, M, p, train, X_matrix_contemp, y_matrix_contemp, prior, print_status=False, iterations=100)
    msd_contemp = np.mean((mt1t_coeff_contemp - coefficients[:,1:train])**2)
    
    elapsed_time = time.time() - start_time
    
    print(f'Run: {run}, M: {M} & prior: {prior} -> MSD: {msd_contemp} | MSFE: {msfe_contemp.mean()} | ALPL: {alpl_contemp.mean()}')
    
    return [msfe_contemp, alpl_contemp, msd_contemp, M, prior]