In [None]:
from typing import Dict

from src.base.forecasting.models import TimeSeriesModel
from src.projects.fagradalsfjall.common.project_settings import CV_HORIZON_SAMPLES, TS_CV_SPLITTER, TS_PRIMARY_METRIC
from src.base.forecasting.models.time_series import TimeSeriesModelAutoRegressiveOLS, TimeSeriesModelAutoRegressivePLS
from src.projects.fagradalsfjall.common.dataset import load_train_data_numpy
from src.projects.fagradalsfjall.common.model_eval import ModelEvalResult
from src.projects.fagradalsfjall.common.model_repo import ModelRepo

# 1. Training - Cross-validation - Evaluation

In [None]:
# -------------------------------------------------------------------------
#  Load TRAINING set
# -------------------------------------------------------------------------

print("Loading training dataset...    ", end="")
x_train = load_train_data_numpy()
print("Done.")

In [None]:
# -------------------------------------------------------------------------
#  OLS - Define & tune
# -------------------------------------------------------------------------
#
# # --- grid settings -----------------------------------
# p_range = [1, 2, 4, 8, 16, 32, 48, 64, 4 * 24, 4 * 36, 4 * 48]
# n_range = [1, 2, 4, 8, 16, 32, 48, 64, 4 * 24, 4 * 36, 4 * 48]
# alpha_range = [0.0, 0.1, 1, 10]
#
# # --- define model ------------------------------------
# model_ols = TimeSeriesModelAutoRegressiveOLS(p=1, n=1)
#
# ols_models = {
#     "linear-ols": model_ols,
# }  # type: Dict[str, TimeSeriesModel]
#
# # --- run cross-validation ----------------------------
# param_grid = {"p": p_range, "n": n_range, "alpha": alpha_range}
#
# model_ols.cv.grid_search(
#     x=x_train,
#     param_grid=param_grid,
#     metric=TS_PRIMARY_METRIC,
#     ts_cv_splitter=TS_CV_SPLITTER,
#     hor=CV_HORIZON_SAMPLES,
#     retrain=True,
# )
#
# # --- model validation ------------------------------------
# ols_model_eval_results = ModelEvalResult.eval_many(ols_models)
#
# # --- save results ----------------------------------------
# ModelRepo.save_models(ols_model_eval_results)

In [None]:
# -------------------------------------------------------------------------
#  PLS - Define & tune
# -------------------------------------------------------------------------

# --- grid settings -----------------------------------
param_grid = {
    "p": [2, 4, 8, 16, 32, 64, 96, 144, 192],
    "n": [2, 4, 8, 16, 32, 64, 96, 144, 192],
    "n_components": [1, 2, 3, 4, 6, 8, 10, 12, 16, 24, 32, 48, 64, 96, 144, 192],
}


def param_validator(param_set: dict) -> bool:
    p, n, n_components = param_set["p"], param_set["n"], param_set["n_components"]
    # n_components is limited to min(n_samples, n_features)   (not n_targets!!!)
    # See: https://scikit-learn.org/stable/modules/cross_decomposition.html#plsregression
    return n_components <= p


# --- define model ------------------------------------
model_pls = TimeSeriesModelAutoRegressivePLS(p=1, n=1, n_components=1)

pls_models = {
    "linear-pls": model_pls,
}  # type: Dict[str, TimeSeriesModel]

# --- run cross-validation ----------------------------
model_pls.cv.grid_search(
    x=x_train,
    param_grid=param_grid,
    param_validator=param_validator,
    metric=TS_PRIMARY_METRIC,
    ts_cv_splitter=TS_CV_SPLITTER,
    hor=CV_HORIZON_SAMPLES,
    retrain=True,
    n_jobs=6,
)

# --- model validation ------------------------------------
pls_model_eval_results = ModelEvalResult.eval_many(pls_models)

# --- save results ----------------------------------------
ModelRepo.save_models(pls_model_eval_results)