## Startup

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from statsmodels.tsa.api import VAR
from statsmodels.tools.eval_measures import aic, bic

import os
import pickle

In [32]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
np.random.seed(42)

In [3]:
dataroute=os.path.join("..",  "data")
dumproute=os.path.join("..",  "dump")
resultsroute=os.path.join("..",  "results")

## Data Retrieval

In [4]:
start='2013-01-01'
end="2023-06-01"

name=f'processed_dataset_{start}_{end}.pickle'
filename=os.path.join(dataroute, name)
with open(filename, 'rb') as handle:
    data=pickle.load(handle)
    
name=f'finaldf_dataset_{start}_{end}.pickle'
filename=os.path.join(dataroute, name)
with open(filename, 'rb') as handle:
    df=pickle.load(handle)

In [14]:
with open(os.path.join(dumproute, "tickerlist.pickle"), 'rb') as f:
    tickerlist=pickle.load(f)
tickerlist

['^MERV',
 'GGAL',
 'GGAL.BA',
 'YPF',
 'YPFD.BA',
 'EDN',
 'EDN.BA',
 'BMA',
 'BMA.BA']

In [15]:
df.head(3)

Unnamed: 0_level_0,^MERV_rets,^MERV_log_rets,^MERV_gk_vol,GGAL_rets,GGAL_log_rets,GGAL_gk_vol,GGAL.BA_rets,GGAL.BA_log_rets,GGAL.BA_gk_vol,YPF_rets,...,BMA_gk_vol,BMA.BA_rets,BMA.BA_log_rets,BMA.BA_gk_vol,USD_rets,USD_log_rets,USD_gk_vol,USD_^MERV_rets,USD_^MERV_log_rets,USD_^MERV_gk_vol
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-03,0.007552,0.007524,0.000129,-0.012748,-0.01283,0.001228,0.010616,0.01056,0.000677,-0.004016,...,0.000557,-0.007812,-0.007843,0.000857,0.011942,0.011871,2e-05,-0.002186,-0.002188,0.000129
2013-01-04,0.007092,0.007067,0.000158,-0.010043,-0.010094,0.000554,-0.006303,-0.006323,0.000208,-0.000672,...,0.000695,-0.027559,-0.027946,0.000692,0.021496,0.021269,0.000228,-0.008133,-0.008167,0.000158
2013-01-07,-0.001035,-0.001035,2.2e-05,-0.014493,-0.014599,0.000517,0.002114,0.002112,6.3e-05,0.00538,...,0.000862,0.012146,0.012073,0.000494,-0.004061,-0.004069,1.7e-05,-0.007749,-0.007779,2.2e-05


# VAR Training

In [16]:
log_rets_list=[column for column in df.columns if column.endswith("log_rets")]
vol_list=[column for column in df.columns if column.endswith("vol")]
simple_rets_list=[column for column in df.columns if (column.endswith("log_rets")) and (column not in log_rets_list)]

In [62]:
emptydf=pd.DataFrame(columns=["AIC", "BIC"], index=range(1,11))
results_dict_df={stock:emptydf for stock in tickerlist}

In [66]:
aic_best_model={stock:None for stock in tickerlist}
bic_best_model={stock:None for stock in tickerlist}

aic_best_residuals={stock:None for stock in tickerlist}
bic_best_residuals={stock:None for stock in tickerlist}

In [75]:
for stock in tickerlist:
    #columns = ['USD_log_rets', 'USD_gk_vol', f'{stock}_log_rets', f'{stock}_gk_vol']
    columns = [f'{stock}_log_rets', f'{stock}_gk_vol']
    stock_data = df[columns]
    
    for lag in range(1, 11):
        model = VAR(stock_data)
        results = model.fit(lag)

        results_dict_df[stock].loc[lag, "AIC"]=results.aic
        results_dict_df[stock].loc[lag, "BIC"]=results.bic

    best_aic_lag=results_dict_df[stock]["AIC"].astype(float).idxmin()
    best_bic_lag=results_dict_df[stock]["BIC"].astype(float).idxmin()

    aic_best_model[stock]=VAR(stock_data).fit(best_aic_lag)
    bic_best_model[stock]=VAR(stock_data).fit(best_bic_lag)

    aic_best_residuals[stock]=aic_best_model[stock].resid
    bic_best_residuals[stock]=bic_best_model[stock].resid

In [76]:
aic_best_residuals

{'^MERV':             ^MERV_log_rets  ^MERV_gk_vol
 Date                                    
 2013-01-10        0.011344     -0.000608
 2013-01-11       -0.003846     -0.000526
 2013-01-14       -0.005689     -0.000365
 2013-01-15       -0.006721     -0.000419
 2013-01-16        0.014602     -0.000215
 ...                    ...           ...
 2023-05-23        0.004112     -0.000105
 2023-05-24       -0.000369     -0.000423
 2023-05-29        0.011842     -0.000340
 2023-05-30       -0.011276     -0.000444
 2023-05-31       -0.004669     -0.000399
 
 [2518 rows x 2 columns],
 'GGAL':             GGAL_log_rets  GGAL_gk_vol
 Date                                  
 2013-01-16       0.013659    -0.000821
 2013-01-17       0.011666    -0.000518
 2013-01-18      -0.005103    -0.000522
 2013-01-21      -0.001485    -0.001321
 2013-01-22       0.051281     0.002471
 ...                   ...          ...
 2023-05-23       0.009051    -0.000294
 2023-05-24       0.021285     0.000332
 2023-05-

In [39]:
with open(os.path.join(resultsroute, "VARdict.pickle"), "wb") as f:
    pickle.dump(var_models, f, pickle.HIGHEST_PROTOCOL)

NameError: name 'var_models' is not defined

# TODO: 
Falta obtener,   
para [AIC, BIC]:  
    para cada stock:  
        residuo  
        

Hacer un diccionario  
VAR_aic_residuals.pickle  
VAR_bic_residuals.pickle
