## Startup

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from statsmodels.tsa.api import VAR
from statsmodels.tools.eval_measures import aic, bic

import os
import pickle

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
np.random.seed(42)

In [5]:
dataroute=os.path.join("..",  "data")
dumproute=os.path.join("..",  "dump")
resultsroute=os.path.join("..",  "results")

In [6]:
from scripts.params import get_params

params = get_params()

## Data Retrieval

In [7]:
name=f"""processed_train_{params["tablename"]}.pickle"""
filename=os.path.join(dataroute, name)
with open(filename, 'rb') as handle:
    data=pickle.load(handle)
    
name=f"""finaldf_train_{params["tablename"]}.pickle"""
filename=os.path.join(dataroute, name)
with open(filename, 'rb') as handle:
    df=pickle.load(handle)

In [8]:
tickerlist=params["tickerlist"]

In [9]:
df.head(3)

Unnamed: 0,^MERV_rets,^MERV_log_rets,^MERV_gk_vol,GGAL.BA_rets,GGAL.BA_log_rets,GGAL.BA_gk_vol,GGAL_rets,GGAL_log_rets,GGAL_gk_vol,YPFD.BA_rets,...,BBAR.BA_gk_vol,BBAR_rets,BBAR_log_rets,BBAR_gk_vol,USD_rets,USD_log_rets,USD_gk_vol,USD_^MERV_rets,USD_^MERV_log_rets,USD_^MERV_gk_vol
2013-01-03,0.007552,0.007524,0.000129,0.010616,0.01056,0.000677,-0.012748,-0.01283,0.001228,-0.006862,...,0.000169,-0.005725,-0.005742,0.00096,0.00883,0.008792,1.4e-05,0.001247,0.001246,0.000129
2013-01-04,0.007092,0.007067,0.000158,-0.006303,-0.006323,0.000208,-0.010043,-0.010094,0.000554,0.004936,...,0.000406,-0.019194,-0.019381,0.000635,0.018043,0.017883,0.000133,-0.005727,-0.005744,0.000158
2013-01-07,-0.001035,-0.001035,2.2e-05,0.002114,0.002112,6.3e-05,-0.014493,-0.014599,0.000517,0.010805,...,0.000492,0.015656,0.015534,0.000511,-0.002489,-0.002492,4.8e-05,-0.009769,-0.009817,2.2e-05


# VAR Training

In [10]:
emptydf=pd.DataFrame(columns=["AIC", "BIC"], index=range(1,11))
results_dict_df={stock:emptydf for stock in tickerlist}

In [11]:
aic_best_model={stock:None for stock in tickerlist}
bic_best_model={stock:None for stock in tickerlist}

aic_best_residuals={stock:None for stock in tickerlist}
bic_best_residuals={stock:None for stock in tickerlist}

In [12]:
for stock in tickerlist:
    columns = [f'{stock}_log_rets', f'{stock}_gk_vol']
    stock_data = df[columns]
    
    for lag in range(1, 11):
        model = VAR(stock_data)
        results = model.fit(lag)

        results_dict_df[stock].loc[lag, "AIC"]=results.aic
        results_dict_df[stock].loc[lag, "BIC"]=results.bic

    best_aic_lag=results_dict_df[stock]["AIC"].astype(float).idxmin()
    best_bic_lag=results_dict_df[stock]["BIC"].astype(float).idxmin()

    aic_best_model[stock]=VAR(stock_data).fit(best_aic_lag)
    bic_best_model[stock]=VAR(stock_data).fit(best_bic_lag)

    aic_best_residuals[stock]=aic_best_model[stock].resid
    bic_best_residuals[stock]=bic_best_model[stock].resid

In [13]:
bic_best_model

{'^MERV': <statsmodels.tsa.vector_ar.var_model.VARResultsWrapper at 0x1a75cdf2b90>,
 'GGAL.BA': <statsmodels.tsa.vector_ar.var_model.VARResultsWrapper at 0x1a75cdcac90>,
 'GGAL': <statsmodels.tsa.vector_ar.var_model.VARResultsWrapper at 0x1a75cde3610>,
 'YPFD.BA': <statsmodels.tsa.vector_ar.var_model.VARResultsWrapper at 0x1a75cdfaa90>,
 'YPF': <statsmodels.tsa.vector_ar.var_model.VARResultsWrapper at 0x1a75cde86d0>,
 'EDN.BA': <statsmodels.tsa.vector_ar.var_model.VARResultsWrapper at 0x1a75cdeb910>,
 'EDN': <statsmodels.tsa.vector_ar.var_model.VARResultsWrapper at 0x1a75ce48b10>,
 'BMA.BA': <statsmodels.tsa.vector_ar.var_model.VARResultsWrapper at 0x1a75ce4b3d0>,
 'BMA': <statsmodels.tsa.vector_ar.var_model.VARResultsWrapper at 0x1a75ce64390>,
 'BBAR.BA': <statsmodels.tsa.vector_ar.var_model.VARResultsWrapper at 0x1a75ce66fd0>,
 'BBAR': <statsmodels.tsa.vector_ar.var_model.VARResultsWrapper at 0x1a75ce6ae50>}

bic_best_model

In [None]:
# TODO: comparar mismas cantidades de información
# https://github.com/alfsn/regime-switching-hmm/issues/38

In [None]:
def generate_VAR_samples_residuals(model, insample_data, oos_data):
        # pseudocodigo
    # agarra el mejor modelo (esto con una cantidad optima de params ya esta)
    # k = cantidad de params
    # fittear t-j con t-j-252d
    split_date = oos_data.index[0]
    dates_to_forecast = len(oos_data.index)

    oos_data = pd.concat([insample_data, oos_data])
    del insample_data

    index = oos_data.index
    end_loc = np.where(index >= split_date)[0].min()

    rolling_window = 252

    forecasts = {}

    for i in range(1, dates_to_forecast):
        date_of_first_forecast = oos_data.index[end_loc + i]

        stock_data= # TODO:definir esto

        model = VAR(stock_data)

        results = model.fit(lag) # TODO:extraer lag de acá





çççççççççççççççççççççç

    for i in range(1, dates_to_forecast):
        date_of_first_forecast = oos_data.index[end_loc + i]

        res = model.fit(
            first_obs=end_loc - rolling_window + i, last_obs=end_loc + i, disp="off"
        )

        forecast = res.forecast(
            horizon=1, start=date_of_first_forecast, method="simulation"
        ).mean.iloc[0]

        forecasts[forecast.name]=forecast
        
    forecasts=pd.DataFrame(forecasts).T
    forecasts.columns=oos_data.columns

    residuals=(oos_data-forecasts).dropna()
    
    return forecasts, residuals



In [12]:
with open(os.path.join(resultsroute, f"""VAR_univ_{params["tablename"]}_aic_bestmodels.pickle"""), "wb") as output_file:
    pickle.dump(aic_best_model, output_file)

with open(os.path.join(resultsroute, f"""VAR_univ_{params["tablename"]}_bic_bestmodels.pickle"""), "wb") as output_file:
    pickle.dump(bic_best_model, output_file)

# Residuals

In [13]:
name = f'finaldf_test_{params["tablename"]}.pickle'
filename = os.path.join(dataroute, name)
with open(filename, "rb") as handle:
    df_test = pickle.load(handle)

Los modelos sirven los residuos NO!
https://github.com/alfsn/regime-switching-hmm/issues/27

In [13]:
with open(os.path.join(resultsroute, f"""VAR_univ_{params["tablename"]}_aic_residuals.pickle"""), "wb") as output_file:
    pickle.dump(aic_best_residuals, output_file)

with open(os.path.join(resultsroute, f"""VAR_univ_{params["tablename"]}_bic_residuals.pickle"""), "wb") as output_file:
    pickle.dump(bic_best_residuals, output_file)

# with USD

In [14]:
emptydf=pd.DataFrame(columns=["AIC", "BIC"], index=range(1,11))
results_dict_df={stock:emptydf for stock in tickerlist}

In [15]:
aic_best_model={stock:None for stock in tickerlist}
bic_best_model={stock:None for stock in tickerlist}

aic_best_residuals={stock:None for stock in tickerlist}
bic_best_residuals={stock:None for stock in tickerlist}

In [16]:
for stock in tickerlist:
    columns = ['USD_log_rets', 'USD_gk_vol', f'{stock}_log_rets', f'{stock}_gk_vol']
    stock_data = df[columns]
    
    for lag in range(1, 11):
        model = VAR(stock_data)
        results = model.fit(lag)

        results_dict_df[stock].loc[lag, "AIC"]=results.aic
        results_dict_df[stock].loc[lag, "BIC"]=results.bic

    best_aic_lag=results_dict_df[stock]["AIC"].astype(float).idxmin()
    best_bic_lag=results_dict_df[stock]["BIC"].astype(float).idxmin()

    aic_best_model[stock]=VAR(stock_data).fit(best_aic_lag)
    bic_best_model[stock]=VAR(stock_data).fit(best_bic_lag)

    aic_best_residuals[stock]=aic_best_model[stock].resid
    bic_best_residuals[stock]=bic_best_model[stock].resid

In [17]:
with open(os.path.join(resultsroute, f"""VAR_multiv_{params["tablename"]}_aic_bestmodels.pickle"""), "wb") as output_file:
    pickle.dump(aic_best_model, output_file)

with open(os.path.join(resultsroute, f"""VAR_multiv_{params["tablename"]}_bic_bestmodels.pickle"""), "wb") as output_file:
    pickle.dump(bic_best_model, output_file)

Los modelos sirven los residuos NO!
https://github.com/alfsn/regime-switching-hmm/issues/27

In [18]:
with open(os.path.join(resultsroute, f"""VAR_multiv_{params["tablename"]}_aic_residuals.pickle"""), "wb") as output_file:
    pickle.dump(aic_best_residuals, output_file)

with open(os.path.join(resultsroute, f"""VAR_multiv_{params["tablename"]}_bic_residuals.pickle"""), "wb") as output_file:
    pickle.dump(bic_best_residuals, output_file)