## Startup

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from statsmodels.tsa.api import VAR
from statsmodels.tools.eval_measures import aic, bic

import copy
import os
import pickle

In [2]:
import warnings

warnings.filterwarnings("ignore")

In [3]:
np.random.seed(42)

In [4]:
dataroute = os.path.join("..", "data")
dumproute = os.path.join("..", "dump")
resultsroute = os.path.join("..", "results")

In [5]:
from scripts.params import get_params

params = get_params()

## Data Retrieval

In [6]:
name = f"""finaldf_train_{params["tablename"]}.pickle"""
filename = os.path.join(dataroute, name)
with open(filename, "rb") as handle:
    df = pickle.load(handle)

name = f'finaldf_test_{params["tablename"]}.pickle'
filename = os.path.join(dataroute, name)
with open(filename, "rb") as handle:
    df_test = pickle.load(handle)

In [7]:
tickerlist = params["tickerlist"]

In [8]:
df.head(1)

Unnamed: 0,^MERV_rets,^MERV_log_rets,^MERV_gk_vol,GGAL.BA_rets,GGAL.BA_log_rets,GGAL.BA_gk_vol,GGAL_rets,GGAL_log_rets,GGAL_gk_vol,YPFD.BA_rets,...,BBAR.BA_gk_vol,BBAR_rets,BBAR_log_rets,BBAR_gk_vol,USD_rets,USD_log_rets,USD_gk_vol,USD_^MERV_rets,USD_^MERV_log_rets,USD_^MERV_gk_vol
2013-01-03,0.007552,0.007524,0.000129,0.010616,0.01056,0.000677,-0.012748,-0.01283,0.001228,-0.006863,...,0.000169,-0.005725,-0.005742,0.00096,0.00883,0.008791,1.4e-05,0.001247,0.001246,0.000129


In [9]:
df_test.head(1)

Unnamed: 0,^MERV_rets,^MERV_log_rets,^MERV_gk_vol,GGAL.BA_rets,GGAL.BA_log_rets,GGAL.BA_gk_vol,GGAL_rets,GGAL_log_rets,GGAL_gk_vol,YPFD.BA_rets,...,BBAR.BA_gk_vol,BBAR_rets,BBAR_log_rets,BBAR_gk_vol,USD_rets,USD_log_rets,USD_gk_vol,USD_^MERV_rets,USD_^MERV_log_rets,USD_^MERV_gk_vol
2023-06-02,-0.001236,-0.001237,0.000117,-0.021765,-0.022005,0.001171,-0.022333,-0.022586,0.00161,0.014547,...,0.0,-0.006608,-0.00663,0.000876,-0.005835,-0.005852,5.2e-05,-0.001004,-0.001004,0.000117


In [10]:
def generate_columns(stock: str, contains_vol: bool, contains_USD: bool):
    """Devuelve una lista con los nombres de columnas para distintas especificaciones"""
    columns = []
    columns.append(f"{stock}_log_rets")

    if contains_vol:
        columns.append(f"{stock}_gk_vol")

    if contains_USD:
        columns.append(f"USD_log_rets")
        columns.append(f"USD_gk_vol")

    return columns

In [11]:
selected_orders = VAR(df[["BBAR_log_rets", "BBAR_gk_vol"]]).select_order(
    maxlags=None, trend="c"
)
selected_orders.selected_orders

{'aic': 6, 'bic': 2, 'hqic': 4, 'fpe': 6}

In [54]:
def generate_VAR_samples_residuals(
    stock: str,
    lags: int,
    insample_data: pd.DataFrame,
    oos_data: pd.DataFrame,
    contains_vol: bool,
    contains_USD: bool,
):
    # fittear t-j con t-j-252d
    columns = generate_columns(
        stock=stock, contains_vol=contains_vol, contains_USD=contains_USD
    )

    split_date = oos_data.index[0]
    dates_to_forecast = len(oos_data.index)

    oos_data = pd.concat([insample_data[columns], oos_data[columns]])
    del insample_data

    index = oos_data.index
    end_loc = np.where(index >= split_date)[0].min()

    rolling_window = 252

    forecasts = pd.DataFrame()
    residuals = pd.DataFrame()

    for i in range(1, dates_to_forecast):
        fitstart = end_loc - rolling_window + i
        fitend = end_loc + i

        stock_data = oos_data.iloc[fitstart:fitend]
        print(f"[{fitend}-{lags}:{fitend}]")

        model = VAR(stock_data)
        results = model.fit(lags)

        fcast = results.forecast(y=oos_data[(fitend - lags) : fitend], steps=1)
        resid = results.resid.iloc[-1:]

        forecasts = pd.concat([forecasts, fcast], axis=0)
        residuals = pd.concat([residuals, resid], axis=0)

    return forecasts, residuals

In [55]:
split_date = df_test.index[0]
df_test2 = pd.concat([df, df_test])

index = df_test2.index

lags = 6
fitend = np.where(index >= split_date)[0].min()

In [56]:
split_date

datetime.date(2023, 6, 2)

In [57]:
df_test2[(2525 - 6) : 2525]

Unnamed: 0,^MERV_rets,^MERV_log_rets,^MERV_gk_vol,GGAL.BA_rets,GGAL.BA_log_rets,GGAL.BA_gk_vol,GGAL_rets,GGAL_log_rets,GGAL_gk_vol,YPFD.BA_rets,...,BBAR.BA_gk_vol,BBAR_rets,BBAR_log_rets,BBAR_gk_vol,USD_rets,USD_log_rets,USD_gk_vol,USD_^MERV_rets,USD_^MERV_log_rets,USD_^MERV_gk_vol
2023-05-23,0.00533,0.005315,0.000464,0.017734,0.017579,0.000697,0.007916,0.007884,0.001497,0.038068,...,0.0,0.030733,0.03027,0.002791,0.020107,0.019908,0.000301,-0.010028,-0.010079,0.000464
2023-05-24,0.002593,0.00259,0.000161,0.017608,0.017455,0.000874,0.026178,0.025841,0.00206,0.011437,...,0.0,0.011468,0.011403,0.00146,-0.011325,-0.011389,0.000328,0.005566,0.005551,0.000161
2023-05-29,0.013755,0.013661,0.000283,0.023162,0.022897,0.00062,0.0,0.0,0.0,0.010033,...,0.0,0.0,0.0,0.0,0.009787,0.009739,3.5e-05,0.008141,0.008108,0.000283
2023-05-30,-0.008595,-0.008632,0.000265,0.004404,0.004394,0.001201,0.036815,0.036154,0.002979,-0.020036,...,0.0,0.018182,0.018019,0.001212,-0.020286,-0.020495,0.000227,-0.005688,-0.005705,0.000265
2023-05-31,-0.003825,-0.003832,0.000113,0.021749,0.021516,0.001402,0.01836,0.018193,0.001315,-0.022577,...,0.0,-0.022322,-0.022574,0.000603,0.005885,0.005868,7e-05,0.004127,0.004118,0.000113
2023-06-02,-0.001236,-0.001237,0.000117,-0.021765,-0.022005,0.001171,-0.022333,-0.022586,0.00161,0.014547,...,0.0,-0.006608,-0.00663,0.000876,-0.005835,-0.005852,5.2e-05,-0.001004,-0.001004,0.000117


In [58]:
def estimate_best_residuals(
    stock: str,
    criterion: str,
    insample_data: pd.DataFrame,
    oos_data: pd.DataFrame,
    contains_vol: bool,
    contains_USD: bool,
):
    columns = generate_columns(
        stock=stock, contains_vol=contains_vol, contains_USD=contains_USD
    )

    selected_orders = VAR(insample_data[columns]).select_order(maxlags=15, trend="c")
    best_lag = selected_orders.selected_orders[criterion]

    forecasts, residuals = generate_VAR_samples_residuals(
        stock=stock,
        lags=best_lag,
        insample_data=insample_data,
        oos_data=oos_data,
        contains_vol=contains_vol,
        contains_USD=contains_USD,
    )

    assert type(residuals) == pd.DataFrame

    return best_lag, forecasts, residuals

In [59]:
def save_as_pickle(data, contains_USD: bool, criterion: str, type_save: str):
    if contains_USD:
        string = "multiv"
    else:
        string = "with_vol"

    with open(
        os.path.join(
            resultsroute,
            f"""VAR_{string}_{params["tablename"]}_{criterion}_best_{type_save}.pickle""",
        ),
        "wb",
    ) as output_file:
        pickle.dump(data, output_file)

In [60]:
best_lags = {
    "aic": {"contains_USD=True": {}, "contains_USD=False": {}},
    "bic": {"contains_USD=True": {}, "contains_USD=False": {}},
}
best_forecasts = copy.deepcopy(best_lags)
best_residuals = copy.deepcopy(best_lags)

for criterion in ["aic", "bic"]:
    for contains_USD in [True, False]:
        usdstring = f"contains_USD={contains_USD}"

        for stock in tickerlist:
            best_lag, forecasts, residuals = estimate_best_residuals(
                stock=stock,
                criterion=criterion,
                insample_data=df,
                oos_data=df_test,
                contains_vol=True,
                contains_USD=contains_USD,
            )

            pct_nan = forecasts.iloc[:, 0].isna().sum() / len(forecasts.index) * 100

            if pct_nan > 5:
                warnings.warn(f"{stock} % na: {pct_nan}")

            forecasts.fillna(method="ffill", inplace=True)
            residuals.fillna(method="ffill", inplace=True)

            best_lags[criterion][usdstring][stock] = best_lag
            best_forecasts[criterion][usdstring][stock] = forecasts
            best_residuals[criterion][usdstring][stock] = residuals

        save_as_pickle(
            data=best_lags[criterion][usdstring],
            contains_USD=contains_USD,
            criterion=criterion,
            type_save="lags",
        )
        save_as_pickle(
            data=best_forecasts[criterion][usdstring],
            contains_USD=contains_USD,
            criterion=criterion,
            type_save="forecasts",
        )
        save_as_pickle(
            data=best_residuals[criterion][usdstring],
            contains_USD=contains_USD,
            criterion=criterion,
            type_save="residuals",
        )

[2525-6:2525]


KeyError: -1