In [None]:
'''
Código adaptado de:
Modelos ARIMA y SARIMAX con Python por Joaquín Amat Rodrigo y Javier Escobar Ortiz,
disponible con licencia Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0 DEED
en https://www.cienciadedatos.net/documentos/py51-modelos-arima-sarimax-python.html
'''

In [None]:
from parameters import get_parameters
from utils_sarimax import *

from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from skforecast.sarimax import Sarimax
from skforecast.recursive import ForecasterSarimax
from skforecast.model_selection import TimeSeriesFold
from skforecast.model_selection import backtesting_sarimax
from skforecast.model_selection import grid_search_sarimax

params = get_parameters()
target = params["target"]
source_model = params["source_model"]
source_results = params["source_results"]
start_year = params["start_year"]
validation_year = params["validation_year"]
test_year = params["test_year"]


PROJECT_ROOT = Path().resolve().parent
print(f"Project root: {PROJECT_ROOT}")

## Config

In [None]:
exog = False

In [None]:
use_log = False

In [None]:
if exog:
    folder = "SARIMAX_exog"
if not exog:
    folder = "SARIMAX"
print(folder)

In [None]:
steps = 4
assessment_metric = "mean_squared_error"
param_grid = {
    'order': [(0, 1, 0), (1, 1, 0), (0, 1, 1), (1, 1, 1), (1, 0, 0), (1, 0, 1)],
    'seasonal_order': [(0, 0, 0, 0), (1, 1, 0, 52), (1, 1, 0, 18), (1, 1, 1, 52), (1, 1, 1, 18)],
    'trend': ['n']
}

In [None]:
params = {
    "exog": exog,
    "add_outlier": True,
    "use_log": use_log,
    "target_mode": "absolute",
}

df = pd.read_pickle(f"{PROJECT_ROOT}/{source_model}/data.pkl")
df = df.drop(columns=["CASES_LAG1", "CASES_LAG18", "CASES_LAG16", "CASES_LAG3"], axis=1)
df = format_df(df, **params)
print(df)

## Grid search - Validation

In [None]:
train_val_df, train_df, val_df, test_df = split_by_date(df, validation_year, test_year)

X_train = train_df.drop(columns=["CASES"])
y_train = train_df[target]
X_test = val_df.drop(columns=["CASES"])
y_test = val_df[target]

scaler_x = MinMaxScaler()
X_train[:] = scaler_x.fit_transform(X_train)
X_test[:] = scaler_x.transform(X_test)

In [None]:
if exog:
    exog_data = train_val_df.drop(columns=["CASES"])
else:
    exog_data = None

In [None]:
# Grid search forecaster
forecaster = ForecasterSarimax(
    regressor=Sarimax(
        order=(_, _, _),
        seasonal_order=(_, _, _, _),
        maxiter=200
    )
)

cv = TimeSeriesFold(
    steps              = steps,
    initial_train_size = len(y_train),
    refit              = False,
    fixed_train_size   = False,
)

resultados_grid = grid_search_sarimax(
    forecaster            = forecaster,
    y                     = train_val_df["CASES"],
    exog                  = exog_data,
    cv                    = cv,
    param_grid            = param_grid,
    metric                = assessment_metric,
    return_best           = False,
    n_jobs                = "auto",
    suppress_warnings_fit = True,
    verbose               = False,
    show_progress         = True
)

resultados_grid[f"RMSE"] = resultados_grid[assessment_metric] ** (1/2)
resultados_grid.head(5)

In [None]:
# Backtest forecaster
forecaster = ForecasterSarimax(
    regressor=Sarimax(
        **resultados_grid.at[0, "params"],
        maxiter=200
    )
)

cv = TimeSeriesFold(
    steps              = steps,
    initial_train_size = len(y_train),
    refit              = True,
    fixed_train_size   = False,
)

metric, y_pred = backtesting_sarimax(
    forecaster            = forecaster,
    y                     = pd.concat([y_train, y_test], ignore_index=False),
    exog                  = exog_data,
    cv                    = cv,
    metric                = assessment_metric,
    n_jobs                = "auto",
    suppress_warnings_fit = True,
    verbose               = False,
    show_progress         = True
)

print(metric)
print(y_pred.head(4))

In [None]:
y_true = val_df["CASES"]
y_pred = y_pred.rename(columns={"pred": "CASES"})["CASES"]

In [None]:
val_dir = f"{PROJECT_ROOT}/{source_results}/{folder}/validation"
save_val_data(val_dir, X_train, X_test, val_df, y_true, y_pred, resultados_grid)

## Test

In [None]:
X_train = train_val_df.drop(columns=["CASES"])
y_train = train_val_df[target]
X_test = test_df.drop(columns=["CASES"])
y_test = test_df[target]

In [None]:
scaler_x = MinMaxScaler()
X_train[:] = scaler_x.fit_transform(X_train)
X_test[:]  = scaler_x.transform(X_test)

In [None]:
if exog:
    exog_data = pd.concat([train_val_df.drop(columns=["CASES"]), test_df.drop(columns=["CASES"])])
else:
    exog_data = None

In [None]:
# Backtest forecaster
forecaster = ForecasterSarimax(
    regressor=Sarimax(
        order=(1, 1, 1),
        seasonal_order=(1, 1, 1, 12),
        maxiter=200
    )
)

cv = TimeSeriesFold(
    steps              = steps,
    initial_train_size = len(y_train),
    refit              = True,
    fixed_train_size   = False,
)

metric, y_pred = backtesting_sarimax(
    forecaster            = forecaster,
    y                     = pd.concat([train_val_df["CASES"], test_df["CASES"]]),
    exog                  = exog_data,
    cv                    = cv,
    metric                = assessment_metric,
    n_jobs                = "auto",
    suppress_warnings_fit = True,
    verbose               = False,
    show_progress         = True
)

print(metric)
print(y_pred.head(4))

In [None]:
y_true = test_df["CASES"]
y_pred = y_pred.rename(columns={"pred": "CASES"})["CASES"]

In [None]:
test_dir = f"{PROJECT_ROOT}/{source_results}/{folder}/test"
save_test_data(test_dir, X_train, X_test, test_df, y_true, y_pred)