## Startup

In [21]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import arch

import os
import pickle
import warnings

In [22]:
np.random.seed(42)

In [23]:
from scripts.params import get_params

params = get_params()

In [24]:
dataroute = os.path.join("..", "data")
processedroute = os.path.join("...", "processed")
resultsroute = os.path.join("..", "results")

## Data Retrieval

In [25]:
name = f'finaldf_train_{params["tablename"]}.pickle'
filename = os.path.join(dataroute, name)
with open(filename, "rb") as handle:
    df = pickle.load(handle)

In [26]:
df.head()

Unnamed: 0,^MERV_rets,^MERV_log_rets,^MERV_gk_vol,GGAL.BA_rets,GGAL.BA_log_rets,GGAL.BA_gk_vol,GGAL_rets,GGAL_log_rets,GGAL_gk_vol,YPFD.BA_rets,...,BBAR.BA_gk_vol,BBAR_rets,BBAR_log_rets,BBAR_gk_vol,USD_rets,USD_log_rets,USD_gk_vol,^MERV_USD_rets,^MERV_USD_log_rets,^MERV_USD_gk_vol
2013-01-03,0.007552,0.007524,0.000129,0.010616,0.01056,0.000677,-0.012748,-0.01283,0.001228,-0.006863,...,0.000169,-0.005725,-0.005742,0.00096,0.008791,0.008753,1.3e-05,0.001209,0.001208,0.000129
2013-01-04,0.007092,0.007067,0.000158,-0.006303,-0.006323,0.000208,-0.010043,-0.010094,0.000554,0.004936,...,0.000406,-0.019194,-0.01938,0.000635,0.016787,0.016648,0.000113,-0.004947,-0.004959,0.000158
2013-01-07,-0.001035,-0.001035,2.2e-05,0.002114,0.002112,6.3e-05,-0.014493,-0.014599,0.000517,0.010805,...,0.000492,0.015656,0.015534,0.000511,-0.002796,-0.0028,4.8e-05,-0.009049,-0.00909,2.2e-05
2013-01-08,0.008285,0.008251,8.2e-05,-0.008439,-0.008475,0.000153,-0.016177,-0.016309,0.001085,0.049563,...,0.000438,-0.015414,-0.015534,0.000642,0.015757,0.015634,7.1e-05,-0.001409,-0.00141,8.2e-05
2013-01-09,0.017826,0.017669,0.000273,0.0,0.0,0.0,0.011958,0.011887,0.005238,0.0,...,0.0,-0.003914,-0.003922,0.000147,-0.008145,-0.008178,0.000984,0.017152,0.017006,0.000273


## GARCH Training
Warning: this section only uses log_rets as y variables. See:
https://github.com/alfsn/regime-switching-hmm/issues/35

In [27]:
# Define the range of p and q values
alpha_values = [1, 2, 3, 4]  # estos son los valores de los lags in mean del AR.
p_values = [1, 2, 3]  # Example: p values
q_values = [0, 1, 2, 3]  # Example: q values
# all models with q=0 are exclusively ARCH (non-GARCH)

In [28]:
models = {}
predict = {}

In [29]:
best_aic = {}
best_bic = {}

In [30]:
def check_best_aic(key, model, previous_best: float, p: int, q: int, dist: str):
    """
    AIC is better when lower.
    """
    if model == None:
        pass
    else:
        if model.aic < previous_best:
            best_aic[key] = {
                "model": model,
                "aic": model.aic,
                "p": p,
                "q": q,
                "dist": dist,
            }

In [31]:
def check_best_bic(key, model, previous_best: float, p: int, q: int, dist: str):
    """
    BIC is better when lower.
    """
    if model == None:
        pass
    else:
        if model.aic < previous_best:
            best_bic[key] = {
                "model": model,
                "bic": model.bic,
                "p": p,
                "q": q,
                "dist": dist,
            }

In [32]:
# Estimate ARMA-ARCH and ARMA-GARCH models for different p and q values
nonconverged_models = 0
ok_models = 0

for key in params["tickerlist"]:
    returns = df[f"{key}_log_rets"]

    models[key] = {}
    predict[key] = {}

    best_aic[key] = {"aic": np.inf}
    best_bic[key] = {"bic": np.inf}

    for p in p_values:
        for q in q_values:
            for dist in ["Normal", "StudentsT"]:
                model = arch.arch_model(
                    returns,
                    mean="AR",
                    lags=1,
                    vol="Garch",
                    p=p,
                    q=q,
                    dist=dist,
                    rescale=False,
                )
                results = model.fit(
                    options={"maxiter": 2000}, disp="off", show_warning=False
                )

                if results.convergence_flag != 0:
                    # 0 is converged successfully
                    # see https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.fmin_slsqp.html
                    results = None
                    nonconverged_models += 1
                else:
                    ok_models += 1

                check_best_aic(
                    key=key,
                    model=results,
                    previous_best=best_aic[key]["aic"],
                    p=p,
                    q=q,
                    dist=dist,
                )
                check_best_bic(
                    key=key,
                    model=results,
                    previous_best=best_bic[key]["bic"],
                    p=p,
                    q=q,
                    dist=dist,
                )

                models[key][(p, q, dist)] = results

print()
print(f"ok: {ok_models}")
print(f"nonconverged: {nonconverged_models}")


ok: 252
nonconverged: 12


# Residuals

In [33]:
name = f'finaldf_test_{params["tablename"]}.pickle'
filename = os.path.join(dataroute, name)
with open(filename, "rb") as handle:
    df_test = pickle.load(handle)

In [34]:
def generate_GARCH_samples_residuals(
    model_dict: dict, insample_data: pd.DataFrame, oos_data: pd.DataFrame
):
    """
    Esta función come archmodelresults (que vienen del diccionario best_aic y best_bic),
    y hace pronósticos rolling (con ventana de 1 año (252 días habiles)),
    lo que devuelve samples y residuos.
    El método de pronóstico es de simulación

    Args:
        model_dict (_type_): _description_
        pd (_type_): _description_

    Returns:
        _type_: _description_
    """
    split_date = insample_data.index[-1]
    dates_to_forecast = len(oos_data.index)

    full_data = pd.concat([insample_data, oos_data])
    del insample_data

    # vamos a implementar recursive window forecasting
    # https://arch.readthedocs.io/en/latest/univariate/forecasting.html
    # https://arch.readthedocs.io/en/latest/univariate/univariate_volatility_forecasting.html#Recursive-Forecast-Generation

    index = full_data.index
    end_loc = np.where(index >= split_date)[0].min()
    # esto es un int del iloc
    # preciso usar ints de iloc porque el timedelta se me va a romper con el fin de semana
    rolling_window = 252

    forecasts = {}

    model = arch.arch_model(
        y=full_data,
        mean="AR",
        lags=1,
        vol="Garch",
        p=model_dict["p"],
        q=model_dict["q"],
        dist=model_dict["dist"],
        rescale=False,
    )

    for i in range(0, dates_to_forecast):
        date_of_first_forecast = full_data.index[end_loc + i]

        res = model.fit(
            first_obs=end_loc - rolling_window + i, last_obs=end_loc + i, disp="off"
        )

        forecast = res.forecast(
            horizon=1, start=date_of_first_forecast, method="simulation"
        ).mean.iloc[0]

        forecasts[forecast.name] = forecast

    forecasts = pd.DataFrame(forecasts).T
    forecasts.columns = full_data.columns

    pct_nan = forecasts.iloc[:, 0].isna().sum() / len(forecasts.index) * 100

    if pct_nan > 5:
        warnings.warn(f"{full_data.columns[0]} % na: {pct_nan}")

    forecasts.fillna(method="ffill", inplace=True)

    residuals = oos_data - forecasts

    return forecasts, residuals

In [35]:
def save_as_pickle(data, criterion: str, type_save: str):    
    with open(
        os.path.join(
            resultsroute,
            f"""GARCH_{params["tablename"]}_{criterion}_best_{type_save}.pickle""",
        ),
        "wb",
    ) as output_file:
        pickle.dump(data, output_file)

In [36]:
forecasts_dict={"aic":{}, "bic":{}}
residuals_dict={"aic":{}, "bic":{}}

for criterion, dictionary in zip(["aic", "bic"], [best_aic, best_bic]):
    for stock in dictionary.keys():
        forecasts, residuals = generate_GARCH_samples_residuals(
            dictionary[stock],
            pd.DataFrame(df[f"{stock}_log_rets"]),
            pd.DataFrame(df_test[f"{stock}_log_rets"])
            )

        forecasts_dict[criterion][stock]=forecasts
        residuals_dict[criterion][stock]=residuals     


  forecasts.fillna(method="ffill", inplace=True)
Positive directional derivative for linesearch
See scipy.optimize.fmin_slsqp for code meaning.

  forecasts.fillna(method="ffill", inplace=True)
  forecasts.fillna(method="ffill", inplace=True)
  forecasts.fillna(method="ffill", inplace=True)
  forecasts.fillna(method="ffill", inplace=True)
  forecasts.fillna(method="ffill", inplace=True)
  forecasts.fillna(method="ffill", inplace=True)
Positive directional derivative for linesearch
See scipy.optimize.fmin_slsqp for code meaning.

  forecasts.fillna(method="ffill", inplace=True)
  forecasts.fillna(method="ffill", inplace=True)
Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Positive directional derivative for linesearch
See scipy.optimize.fmin_slsqp for code meaning.

Positive directional derivative for linesearch
See scipy.optimize.fmin_slsqp for code meaning.

Positive directional derivative for linesearch
See scipy.optimize.fmin_slsqp for code meaning.

  fore

In [37]:
for criterion, bestmodels in zip(["aic", "bic"],[best_aic, best_bic]):
    save_as_pickle(forecasts_dict[criterion], criterion=criterion, type_save="forecasts")
    save_as_pickle(residuals_dict[criterion], criterion=criterion, type_save="residuals")
    save_as_pickle(bestmodels, criterion=criterion, type_save="models")

# Plotting
## TODO: Esto aun está feo: tengo que armar que esto devuelva el plotteo de returns y los predicts uno encima del otro

In [38]:
def plot_close_rets(data, model, key, name):
    fig = plt.figure(figsize=(20, 20))
    plt.tight_layout()
    plt.title(f"{key} Log returns")

    plt.subplot(1, 1, 1)

    x = data[key]["log_rets"]
    y = data[key].index

    plt.plot(x, y, ".", c="red")
    # plt.plot(x, model.predict(x), '.', c="blue")

    plt.grid(True)
    plt.xlabel("datetime", fontsize=16)
    plt.ylabel("log rets", fontsize=16)

    plt.savefig(
        os.path.join(resultsroute, "graphs", f"GARCH", f"{key}_model_{name}.png")
    )

In [39]:
# for key in data.keys():
#    print(key)
#    plot_close_rets(data, key)
# plt.show()