In [None]:
import os
import torch
from typing import List
import pandas as pd
import gluonts
import nnts
import nnts.data
import nnts.torch.preprocessing
import nnts.torch.models
import trainers as project_trainers
import nnts.metrics
import nnts.torch.datasets
import nnts.loggers
import nnts.datasets
import nnts
import nnts.torch.utils
from nnts import utils, trainers
import features
import deepar
import baseline

torch.set_printoptions(precision=8, sci_mode=False)
%load_ext autoreload
%autoreload 2

In [None]:
data_path = "data"
model_name = "distr-deepar"
dataset_name = "tourism_monthly"
results_path = "ablation-results"

df_orig, metadata = nnts.datasets.load_dataset(dataset_name)
PATH = os.path.join(results_path, model_name, metadata.dataset)

params = utils.Hyperparams(
    optimizer = torch.optim.Adam, 
    loss_fn=nnts.torch.models.deepar.distr_nll, 
    batches_per_epoch=50, 
    batch_size=32,
    training_method = utils.TrainingMethod.TEACHER_FORCING
)
utils.makedirs_if_not_exists(PATH)

In [None]:
df_orig = features.create_time_features(df_orig)
df_orig = features.create_dummy_unique_ids(df_orig)

In [None]:
ax = df_orig[df_orig['unique_id'] == 'T1'].set_index('ds').tail(36)['month'].plot(figsize=(20, 5))
fig = ax.get_figure()

In [None]:
lag_seq = gluonts.time_feature.lag.get_lags_for_frequency(metadata.freq)
lag_seq = [lag - 1 for lag in lag_seq if lag > 1]

In [None]:

scaled_covariates = ["month", "unix_timestamp", nnts.torch.models.deepar.FEAT_SCALE]

scaled_covariate_selection_matrix = [
    [0,0,1],
    [0,1,0],
    [0,1,1],
    [1,0,0],
    [1,0,1],
    [1,1,0],
    [1,1,1],
]

In [None]:
scenario_list: List[features.LagScenario] = []

for seed in [42, 43, 44, 45, 46]:
    for row in scaled_covariate_selection_matrix:
        selected_combination = [
            covariate
            for covariate, select in zip(scaled_covariates, row)
            if select == 1
        ]
        scenario_list.append(
            features.LagScenario(
                metadata.prediction_length,
                conts=[cov for cov in selected_combination if cov != nnts.torch.models.deepar.FEAT_SCALE],
                scaled_covariates=selected_combination,
                lag_seq=lag_seq,
                seed=seed,
                dataset=metadata.dataset,
            )
        )

In [None]:
# BASELINE
conts = [
    "month",
    "unix_timestamp",
    "unique_id_0",
]
scenario_list = []
for seed in [42, 43, 44, 45, 46]:
    scenario = features.LagScenario(
        metadata.prediction_length,
        conts=conts,
        scaled_covariates=conts+ [
                nnts.torch.models.deepar.FEAT_SCALE,
            ],
        lag_seq=lag_seq,
        seed=seed,
        dataset=metadata.dataset,
    )
    scenario_list.append(scenario)

In [None]:
for scenario in scenario_list:
    nnts.torch.utils.seed_everything(scenario.seed)
    df = df_orig.copy()
    context_length = metadata.context_length + max(scenario.lag_seq)
    dataset_options = {
        "context_length": metadata.context_length,
        "prediction_length": metadata.prediction_length,
        "conts": scenario.conts,
        "lag_seq": scenario.lag_seq,
    }

    trn_dl, test_dl = nnts.torch.utils.create_dataloaders(
        df,
        nnts.datasets.split_test_train_last_horizon,
        context_length,
        metadata.prediction_length,
        Dataset=nnts.torch.datasets.TimeseriesLagsDataset,
        dataset_options=dataset_options,
        Sampler=nnts.torch.datasets.TimeSeriesSampler,
    )
    logger = nnts.loggers.LocalFileRun(
        project=f"{model_name}-{metadata.dataset}",
        name=scenario.name,
        config={
            **params.__dict__,
            **metadata.__dict__,
            **scenario.__dict__,
        },
        path=PATH,
    )
    net = deepar.DistrDeepAR(
        baseline.StudentTHead,
        params,
        baseline.masked_mean_abs_scaling,
        1,
        lag_seq=lag_seq,
        scaled_features=scenario.scaled_covariates,
        context_length=metadata.context_length,
    )
    print(nnts.torch.utils.count_of_params_in(net))
    trner = project_trainers.TorchEpochTrainer(
        trainers.TrainerState(),
        net,
        params,
        metadata,
        os.path.join(PATH, f"{scenario.name}.pt"),
        loss_fn=baseline.distr_nll,
    )
    logger.configure(trner.events)

    evaluator = trner.train(trn_dl)
    y_hat, y = evaluator.evaluate(
        test_dl, scenario.prediction_length, metadata.context_length
    )
    test_metrics = nnts.metrics.calc_metrics(
        y, y_hat, nnts.metrics.calculate_seasonal_error(trn_dl, metadata.seasonality)
    )
    logger.log(test_metrics)
    print(test_metrics)
    logger.finish()

In [None]:
csv_aggregator = utils.CSVFileAggregator(PATH, "results")
results = csv_aggregator()

In [None]:
results = pd.read_csv(f"{PATH}/results.csv")
results

In [None]:
results[['smape', 'mase']].mean()