## Startup

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import arch

import os
import pickle

In [2]:
np.random.seed(42)

In [3]:
from scripts.params import get_params

params = get_params()

In [4]:
dataroute=os.path.join("..",  "data")
processedroute=os.path.join("...", "processed")
resultsroute=os.path.join("..",  "results")

## Data Retrieval

In [5]:
name=f'finaldf_train_{params["tablename"]}.pickle'
filename=os.path.join(dataroute, name)
with open(filename, 'rb') as handle:
    df=pickle.load(handle)

## GARCH Training

In [6]:
# Define the range of p and q values
p_values = [1, 2, 3]  # Example: p values
q_values = [0, 1, 2, 3]  # Example: q values
# all models with q=0 are exclusively ARCH (non-GARCH)

In [7]:
models = {}
predict = {}

In [8]:
best_aic={}
best_bic={}

In [9]:
def check_best_aic(key, model, previous_best:float):
    """
    AIC is better when lower.
    """
    if model==None:
        pass
    else:
        if model.aic<previous_best:
            best_aic[key]=(model, model.aic)

In [10]:
def check_best_bic(key, model, previous_best:float):
    """
    BIC is better when lower.
    """
    if model==None:
        pass
    else:
        if model.aic<previous_best:
            best_bic[key]=(model, model.bic)

In [11]:
# Estimate ARMA-ARCH and ARMA-GARCH models for different p and q values
nonconverged_models=0
ok_models=0

for key in params["tickerlist"]:
    returns = df[f'{key}_log_rets']
    
    models[key] = {}
    predict[key] = {}

    best_aic[key]=(None, np.inf)
    best_bic[key]=(None, np.inf)

    for p in p_values:
        for q in q_values:
            for dist in ['Normal', 'StudentsT']:
                model = arch.arch_model(returns, 
                                        mean="AR",
                                        lags=1,
                                        vol='Garch', 
                                        p=p, q=q, dist=dist, 
                                        rescale=False)
                results = model.fit(options={"maxiter":2000}, 
                                        disp="off", 
                                        show_warning=False)

                if results.convergence_flag!=0:
                    # 0 is converged successfully
                    # see https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.fmin_slsqp.html
                    results=None                
                    nonconverged_models+=1
                else:
                    ok_models+=1
                
                check_best_aic(key=key, model=results, previous_best=best_aic[key][1])
                check_best_bic(key=key, model=results, previous_best=best_bic[key][1])

                models[key][(p, q, dist)] = results

print()
print(f"ok: {ok_models}")
print(f"nonconverged: {nonconverged_models}")


ok: 254
nonconverged: 10


# Residuals

In [13]:
name=f'finaldf_test_{params["tablename"]}.pickle'
filename=os.path.join(dataroute, name)
with open(filename, 'rb') as handle:
    df_test=pickle.load(handle)

In [14]:
df_test.index[-1]+pd.Timedelta(days=1)

datetime.date(2023, 12, 1)

In [22]:
best_aic["^MERV"][0].model

In [16]:
def generate_GARCH_samples_residuals(model, insample_data:pd.DataFrame, oos_data:pd.DataFrame):
    """
    Esta función come archmodelresults (que vienen del diccionario best_aic y best_bic), 
    y hace re 

    Args:
        model (_type_): _description_
        insample_data (pd.DataFrame): _description_
        oos_data (pd.DataFrame): _description_
    """
    model=model.model 
    # como este es un archmodelresult, podemos ir al objeto archmodel usando archmodelresult.model. 
    # Sobre ese objeto podemos fittear()
    
    split_date = insample_data.index[-1]
    
    oos_data = pd.concat([insample_data, oos_data])    
    del insample_data
    
    samples=pd.DataFrame(columns=oos_data.columns)
    residuals=pd.DataFrame(columns=oos_data.columns)

    # vamos a implementar recursive window forecasting
    # https://arch.readthedocs.io/en/latest/univariate/forecasting.html
    # https://arch.readthedocs.io/en/latest/univariate/univariate_volatility_forecasting.html#Recursive-Forecast-Generation
    
    index = oos_data.index
    start_loc = 0
    end_loc = np.where(index >= split_date)[0].min() # esto es un int del iloc
    
    res = model.fit(last_obs=split_date) 
    
    return end_loc

In [17]:
generate_GARCH_samples_residuals(None, df, df_test)

2523

In [13]:
aic_residuals={}
bic_residuals={}

for key in best_aic.keys():
    aic_residuals[key]=best_aic[key][0].resid
    bic_residuals[key]=best_bic[key][0].resid

# Saving best models and residuals

In [14]:
with open(os.path.join(resultsroute, f"""GARCH_{params["tablename"]}_aic_bestmodels.pickle"""), "wb") as output_file:
    pickle.dump(best_aic, output_file)

with open(os.path.join(resultsroute, f"""GARCH_{params["tablename"]}_bic_bestmodels.pickle"""), "wb") as output_file:
    pickle.dump(best_bic, output_file)

Los modelos sirven los residuos NO!
https://github.com/alfsn/regime-switching-hmm/issues/27

In [15]:
with open(os.path.join(resultsroute, f"""GARCH_{params["tablename"]}_aic_residuals.pickle"""), "wb") as output_file:
    pickle.dump(aic_residuals, output_file)

with open(os.path.join(resultsroute, f"""GARCH_{params["tablename"]}_bic_residuals.pickle"""), "wb") as output_file:
    pickle.dump(bic_residuals, output_file)

# Model prediction
# NB this is currently unused and will only be used in the OOS part 

Function documentation: https://arch.readthedocs.io/en/latest/univariate/generated/generated/arch.univariate.base.ARCHModelResult.forecast.html#arch.univariate.base.ARCHModelResult.forecast

In [16]:
for key, ohlc_df in data.items():
    for p in p_values:
        for q in q_values:
            for dist in ['Normal', 'StudentsT']:
                # Predictions on the training data
                pred = results.forecast()
                predict[key][(p, q, dist)] = predict

The default for reindex is True. After September 2021 this will change to
False. Set reindex to True or False to silence this message. Alternatively,
you can use the import comment

from arch.__future__ import reindexing


The default for reindex is True. After September 2021 this will change to
False. Set reindex to True or False to silence this message. Alternatively,
you can use the import comment

from arch.__future__ import reindexing


The default for reindex is True. After September 2021 this will change to
False. Set reindex to True or False to silence this message. Alternatively,
you can use the import comment

from arch.__future__ import reindexing


The default for reindex is True. After September 2021 this will change to
False. Set reindex to True or False to silence this message. Alternatively,
you can use the import comment

from arch.__future__ import reindexing


The default for reindex is True. After September 2021 this will change to
False. Set reindex to True or Fals

# Plotting
## TODO: Esto aun está feo: tengo que armar que esto devuelva el plotteo de returns y los predicts uno encima del otro

In [17]:
def plot_close_rets(data, model, key, name):
    fig=plt.figure(figsize = (20, 20))
    plt.tight_layout()
    plt.title(f"{key} Log returns")
    
    plt.subplot(1, 1, 1)

    x = data[key]["log_rets"]
    y = data[key].index
    
    plt.plot(x, y, '.', c="red")
    #plt.plot(x, model.predict(x), '.', c="blue")        
        
    plt.grid(True)
    plt.xlabel("datetime", fontsize=16)
    plt.ylabel("log rets", fontsize=16)
            
    plt.savefig(os.path.join(resultsroute, "graphs", 
                             f"GARCH", 
                             f"{key}_model_{name}.png"))

In [19]:
#for key in data.keys():
#    print(key)
#    plot_close_rets(data, key)
#plt.show()