## Startup

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import grangercausalitytests, adfuller, coint
from statsmodels.tools.eval_measures import rmse, aic

import os
import pickle

In [2]:
np.random.seed(42)

In [20]:
dataroute=os.path.join("..",  "data")
dumproute=os.path.join("..",  "dump")
resultsroute=os.path.join("..",  "results")

## Data Retrieval

In [4]:
start='2013-01-01'
end="2023-06-01"

name=f'processed_dataset_{start}_{end}.pickle'
filename=os.path.join(dataroute, name)
with open(filename, 'rb') as handle:
    data=pickle.load(handle)
    
name=f'finaldf_dataset_{start}_{end}.pickle'
filename=os.path.join(dataroute, name)
with open(filename, 'rb') as handle:
    df=pickle.load(handle)

In [5]:
df.head(3)

Unnamed: 0_level_0,^MERV_rets,^MERV_log_rets,^MERV_gk_vol,GGAL_rets,GGAL_log_rets,GGAL_gk_vol,GGAL.BA_rets,GGAL.BA_log_rets,GGAL.BA_gk_vol,YPF_rets,...,BMA_gk_vol,BMA.BA_rets,BMA.BA_log_rets,BMA.BA_gk_vol,USD_rets,USD_log_rets,USD_gk_vol,USD_^MERV_rets,USD_^MERV_log_rets,USD_^MERV_gk_vol
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-03,0.007552,0.007524,0.000129,-0.012748,-0.01283,0.001228,0.010616,0.01056,0.000677,-0.004016,...,0.000557,-0.007812,-0.007843,0.000857,0.011942,0.011871,2e-05,-0.002186,-0.002188,0.000129
2013-01-04,0.007092,0.007067,0.000158,-0.010043,-0.010094,0.000554,-0.006303,-0.006323,0.000208,-0.000672,...,0.000695,-0.027559,-0.027946,0.000692,0.021496,0.021269,0.000228,-0.008133,-0.008167,0.000158
2013-01-07,-0.001035,-0.001035,2.2e-05,-0.014493,-0.014599,0.000517,0.002114,0.002112,6.3e-05,0.00538,...,0.000862,0.012146,0.012073,0.000494,-0.004061,-0.004069,1.7e-05,-0.007749,-0.007779,2.2e-05


# VAR Training

## Lag selection
In this instance, we will select an optimal lag length for all models.
We will take a single value for all VARs using GGAL.BA, since this is the stock that provides the highest volume in the Argentine Market.

If we require to fit every single var model to its optimum, we will use
VAR().fit(maxlags=15, ic='aic')

In [6]:
df.columns

Index(['^MERV_rets', '^MERV_log_rets', '^MERV_gk_vol', 'GGAL_rets',
       'GGAL_log_rets', 'GGAL_gk_vol', 'GGAL.BA_rets', 'GGAL.BA_log_rets',
       'GGAL.BA_gk_vol', 'YPF_rets', 'YPF_log_rets', 'YPF_gk_vol',
       'YPFD.BA_rets', 'YPFD.BA_log_rets', 'YPFD.BA_gk_vol', 'EDN_rets',
       'EDN_log_rets', 'EDN_gk_vol', 'EDN.BA_rets', 'EDN.BA_log_rets',
       'EDN.BA_gk_vol', 'BMA_rets', 'BMA_log_rets', 'BMA_gk_vol',
       'BMA.BA_rets', 'BMA.BA_log_rets', 'BMA.BA_gk_vol', 'USD_rets',
       'USD_log_rets', 'USD_gk_vol', 'USD_^MERV_rets', 'USD_^MERV_log_rets',
       'USD_^MERV_gk_vol'],
      dtype='object')

In [7]:
log_rets_list=[column for column in df.columns if column.endswith("log_rets")]
vol_list=[column for column in df.columns if column.endswith("vol")]
simple_rets_list=[column for column in df.columns if (column.endswith("log_rets")) and (column not in log_rets_list)]

In [8]:
log_rets_list

['^MERV_log_rets',
 'GGAL_log_rets',
 'GGAL.BA_log_rets',
 'YPF_log_rets',
 'YPFD.BA_log_rets',
 'EDN_log_rets',
 'EDN.BA_log_rets',
 'BMA_log_rets',
 'BMA.BA_log_rets',
 'USD_log_rets',
 'USD_^MERV_log_rets']

In [9]:
components=["USD_log_rets", "USD_gk_vol", "GGAL.BA_log_rets", "GGAL.BA_gk_vol"]
model=VAR(df[components])

  self._init_dates(dates, freq)


In [10]:
results_df=pd.DataFrame(columns=["AIC", "BIC", "HQIC"])
models_dict={}
for i in range(1,
               int(np.round(12*(len(df.index)/100.)**(1./4), 0))): 
    # este es el valor que statsmodels define como standard si no se especifica maxlags
    result=model.fit(maxlags=i)
    models_dict[i]=model
    results_df.loc[i,"AIC"]=result.aic
    results_df.loc[i,"BIC"]=result.bic
    results_df.loc[i,"HQIC"]=result.hqic

In [11]:
results_df

Unnamed: 0,AIC,BIC,HQIC
1,-42.870958,-42.824703,-42.854173
2,-42.910428,-42.827141,-42.880203
3,-42.913366,-42.793023,-42.869693
4,-42.918427,-42.761004,-42.861297
5,-42.927098,-42.73257,-42.856501
6,-42.927589,-42.695931,-42.843515
7,-42.924991,-42.656179,-42.827431
8,-42.929427,-42.623437,-42.818372
9,-42.927667,-42.584475,-42.803107
10,-42.926289,-42.54587,-42.788215


In [12]:
order_select=model.select_order(int(np.round(12*(len(df.index)/100.)**(1./4), 0)))
order_select.summary()

0,1,2,3,4
,AIC,BIC,FPE,HQIC
0.0,-42.52,-42.51,3.412e-19,-42.52
1.0,-42.87,-42.83,2.404e-19,-42.86
2.0,-42.91,-42.83*,2.309e-19,-42.88*
3.0,-42.92,-42.79,2.301e-19,-42.87
4.0,-42.92,-42.76,2.288e-19,-42.86
5.0,-42.93,-42.73,2.273e-19,-42.86
6.0,-42.93,-42.69,2.274e-19,-42.84
7.0,-42.92,-42.65,2.280e-19,-42.83
8.0,-42.93,-42.62,2.267e-19,-42.82


In [13]:
order_select.selected_orders

{'aic': 18, 'bic': 2, 'hqic': 2, 'fpe': 18}

The selected order will be, by parsimony, 2 lags.

In [16]:
orderlag=2

In [22]:
# importamos la lista de variables
with open(os.path.join(dumproute, "tickerlist.pickle"), "rb") as f:
    tickerlist=pickle.load(f)
    
tickerlist

['^MERV',
 'GGAL',
 'GGAL.BA',
 'YPF',
 'YPFD.BA',
 'EDN',
 'EDN.BA',
 'BMA',
 'BMA.BA']

In [38]:
var_models = {}
# Iterate over each stock
for stock in tickerlist:
    # Select relevant columns for the VAR model
    columns = ['USD_log_rets', 'USD_gk_vol', f'{stock}_log_rets', f'{stock}_gk_vol']
    # Create a new DataFrame for the VAR model
    var_data = df[columns]
    # Fit a VAR model for the current stock
    model = VAR(var_data)
    results = model.fit(orderlag)
    # Store the VAR model results in the dictionary
    var_models[stock] = results

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


In [39]:
var_models["GGAL"].params

Unnamed: 0,USD_log_rets,USD_gk_vol,GGAL_log_rets,GGAL_gk_vol
const,0.001488,8.3e-05,0.000255,0.000846
L1.USD_log_rets,-0.051588,0.00197,0.024007,0.002592
L1.USD_gk_vol,0.831301,0.313186,-0.702593,0.727737
L1.GGAL_log_rets,-0.044058,-0.000485,0.000275,-0.002139
L1.GGAL_gk_vol,-0.201224,0.025688,0.694739,0.239321
L2.USD_log_rets,-0.018254,0.000313,0.013894,0.005261
L2.USD_gk_vol,-1.664296,0.070361,-0.358153,0.335681
L2.GGAL_log_rets,-0.024172,-0.000257,-0.00212,-0.004325
L2.GGAL_gk_vol,0.503874,0.008412,-0.57914,0.141186


'0.14.0'