In [None]:
import os
import torch
from typing import List
import pandas as pd
import gluonts
import numpy as np
import nnts
import nnts.data
from nnts import utils
import nnts.torch.preprocessing
import nnts.torch.models
import trainers as project_trainers
import nnts.metrics
import nnts.torch.datasets
import nnts.loggers
import nnts.datasets
import nnts
from features import LagScenario
import nnts.torch.preprocessing
import nnts.torch.utils
import nnts.trainers
import deepar


torch.set_printoptions(precision=8, sci_mode=False)
%load_ext autoreload
%autoreload 2

In [None]:
data_path = "data"
model_name = "bettter-deepar"
dataset_name = "tourism_monthly"
results_path = "ablation-results"

df_orig, metadata = nnts.datasets.load_dataset(dataset_name)
PATH = os.path.join(results_path, model_name, metadata.dataset)

params = utils.Hyperparams(
    optimizer=torch.optim.Adam,
    loss_fn=torch.nn.L1Loss,
    batches_per_epoch=50,
    batch_size=32,
    training_method=utils.TrainingMethod.FREE_RUNNING,
)
utils.makedirs_if_not_exists(PATH)

In [None]:
def create_time_features(df_orig: pd.DataFrame):
    df_orig["day_of_week"] = df_orig["ds"].dt.day_of_week
    df_orig["hour"] = df_orig["ds"].dt.hour
    df_orig["week"] = df_orig["ds"].dt.isocalendar().week
    df_orig["week"] = df_orig["week"].astype(np.float32)

    df_orig["month"] = df_orig["ds"].dt.month
    df_orig["month"] = np.cos(df_orig["month"] * 2 * np.pi / 12)

    df_orig["unix_timestamp"] = (
        df_orig["ds"] - pd.Timestamp("1970-01-01")
    ) // pd.Timedelta("1s")

    # GlounTS uses the following code to generate the month covariate used in the tourism dataset
    # the month value is extracted from the date column and then scaled to a value between -0.5 and 0.5
    # here we do this is on the whole dataset in one go
    max_min_scaler = nnts.torch.preprocessing.MaxMinScaler()
    max_min_scaler.fit(df_orig, ["unix_timestamp"])
    df_orig = max_min_scaler.transform(df_orig, ["unix_timestamp"])

    return df_orig

df_orig = create_time_features(df_orig)

In [None]:
ax = df_orig[df_orig['unique_id'] == 'T1'].set_index('ds').tail(36)['month'].plot(figsize=(20, 5))
fig = ax.get_figure()

In [None]:
lag_seq = gluonts.time_feature.lag.get_lags_for_frequency(metadata.freq)
lag_seq = [lag - 1 for lag in lag_seq if lag > 1]

In [None]:
scaled_covariates = ["month", "unix_timestamp", nnts.torch.models.deepar.FEAT_SCALE]

scaled_covariate_selection_matrix = [
    [0,0,1],
    [0,1,0],
    [0,1,1],
    [1,0,0],
    [1,0,1],
    [1,1,0],
    [1,1,1],
]
scenario_list: List[LagScenario] = []

for seed in [42, 43, 44, 45, 46]:
    for row in scaled_covariate_selection_matrix:
        selected_combination = [
            covariate
            for covariate, select in zip(scaled_covariates, row)
            if select == 1
        ]
        scenario_list.append(
            LagScenario(
                metadata.prediction_length,
                conts=[
                    cov
                    for cov in selected_combination
                    if cov != nnts.torch.models.deepar.FEAT_SCALE
                ],
                scaled_covariates=selected_combination,
                lag_seq=lag_seq,
                seed=seed,
                dataset=metadata.dataset,
            )
        )

# BASELINE
scenario_list = []
for seed in [42, 43, 44, 45, 46]:
    scenario = LagScenario(
        metadata.prediction_length,
        conts=["month"],
        scaled_covariates=["month"],
        lag_seq=lag_seq,
        seed=seed,
        dataset=metadata.dataset,
    )
    scenario_list.append(scenario)

In [None]:
for scenario in scenario_list:
    nnts.torch.utils.seed_everything(scenario.seed)
    df = df_orig.copy()
    context_length = metadata.context_length + max(scenario.lag_seq)
    dataset_options = {
        "context_length": metadata.context_length,
        "prediction_length": metadata.prediction_length,
        "conts": scenario.conts,
        "lag_seq": scenario.lag_seq,
    }

    trn_dl, test_dl = nnts.torch.utils.create_dataloaders(
        df,
        nnts.datasets.split_test_train_last_horizon,
        context_length,
        metadata.prediction_length,
        Dataset=nnts.torch.datasets.TimeseriesLagsDataset,
        dataset_options=dataset_options,
        Sampler=nnts.torch.datasets.TimeSeriesSampler,
    )
    logger = nnts.loggers.LocalFileRun(
        project=f"{model_name}-{metadata.dataset}",
        name=scenario.name,
        config={
            **params.__dict__,
            **metadata.__dict__,
            **scenario.__dict__,
        },
        path=PATH,
    )
    net = deepar.DeepARPoint(
        nnts.torch.models.LinearModel,
        params,
        nnts.torch.preprocessing.masked_mean_abs_scaling,
        1,
        lag_seq=lag_seq,
        scaled_features=scenario.scaled_covariates,
    )
    trner = project_trainers.TorchEpochTrainer(
        nnts.trainers.TrainerState(), 
        net, 
        params, 
        metadata, 
        os.path.join(PATH, f"{scenario.name}.pt"),
    )
    logger.configure(trner.events)

    evaluator = trner.train(trn_dl)
    y_hat, y = evaluator.evaluate(
        test_dl, scenario.prediction_length, metadata.context_length
    )
    test_metrics = nnts.metrics.calc_metrics(
        y, y_hat, nnts.metrics.calculate_seasonal_error(trn_dl, metadata.seasonality)
    )
    logger.log(test_metrics)
    print(test_metrics)
    logger.finish()

In [None]:
csv_aggregator = utils.CSVFileAggregator(PATH, "results")
results = csv_aggregator()

In [None]:
results = pd.read_csv(f"{PATH}/results.csv")
results

In [None]:
results[['smape', 'mase']].mean()