In [2]:
import matplotlib.pyplot as plt
import os
import torch
from typing import List
import pandas as pd
import gluonts
import numpy as np
import nnts
import nnts.data
import nnts.experiments
from nnts import utils, datasets
import nnts.torch.preprocessing
import nnts.torch.models
import trainers
import nnts.metrics
import nnts.torch.datasets
import nnts.loggers
import nnts.datasets
import nnts
import nnts.experiments.plotting
import projects.deepar.runner as runner
from features import LagScenario

torch.set_printoptions(precision=8, sci_mode=False)
%load_ext autoreload
%autoreload 2

ModuleNotFoundError: No module named 'projects'

In [4]:
data_path = "data"
model_name = "bettter-deepar"
base_model_name = "base-lstm"
dataset_name = "electricity_daily"
results_path = "ablation-results"
metadata_path = os.path.join(data_path, f"{base_model_name}-monash.json")
metadata = datasets.load_metadata(dataset_name, path=metadata_path)
datafile_path = os.path.join(data_path, metadata.filename)
PATH = os.path.join(results_path, model_name, metadata.dataset)

df_orig, *_ = nnts.datasets.read_tsf(datafile_path)
params = utils.Hyperparams(optimizer = torch.optim.Adam, loss_fn=torch.nn.L1Loss, batch_size=32, batches_per_epoch=50)


utils.makedirs_if_not_exists(PATH)

In [18]:
def create_time_features(df_orig: pd.DataFrame):
    df_orig["day_of_week"] = df_orig["ds"].dt.day_of_week
    df_orig["hour"] = df_orig["ds"].dt.hour
    df_orig["week"] = df_orig["ds"].dt.isocalendar().week
    df_orig["week"] = df_orig["week"].astype(np.float32)

    df_orig["month"] = df_orig["ds"].dt.month
    df_orig["month"] = np.cos(df_orig["month"] * 2 * np.pi / 12)

    df_orig["unix_timestamp"] = (
        df_orig["ds"] - pd.Timestamp("1970-01-01")
    ) // pd.Timedelta("1s")

    return df_orig

In [19]:
df_orig = create_time_features(df_orig)

In [20]:
df_orig["unix_timestamp"] = df_orig["unix_timestamp"]

In [22]:
np.log(df_orig["unix_timestamp"])

0      19.473559
1      19.481965
2      19.491190
3      19.500037
4      19.509097
         ...    
235    20.690412
236    20.693087
237    20.695844
238    20.698504
239    20.701246
Name: unix_timestamp, Length: 109280, dtype: float64

In [None]:
# GlounTS uses the following code to generate the month covariate used in the tourism dataset
# the month value is extracted from the date column and then scaled to a value between -0.5 and 0.5
# here we do this is on the whole dataset in one go
max_min_scaler = nnts.torch.data.preprocessing.MaxMinScaler()
max_min_scaler.fit(df_orig, ["unix_timestamp"])
df_orig = max_min_scaler.transform(df_orig, ["unix_timestamp"])

In [None]:
ax = df_orig[df_orig['unique_id'] == 'T1'].set_index('ds').tail(36)['month'].plot(figsize=(20, 5))
fig = ax.get_figure()

In [None]:
from dataclasses import dataclass, field
lag_seq = gluonts.time_feature.lag.get_lags_for_frequency(metadata.freq)
lag_seq = [lag - 1 for lag in lag_seq if lag > 1]

In [None]:

scaled_covariates = ["month", "unix_timestamp", nnts.torch.models.deepar.FEAT_SCALE]

scaled_covariate_selection_matrix = [
    [0,0,1],
    [0,1,0],
    [0,1,1],
    [1,0,0],
    [1,0,1],
    [1,1,0],
    [1,1,1],
]

In [None]:
scenario_list: List[nnts.experiments.Scenario] = []

for seed in [42, 43, 44, 45, 46]:
    for row in scaled_covariate_selection_matrix:
        selected_combination = [
            covariate
            for covariate, select in zip(scaled_covariates, row)
            if select == 1
        ]
        scenario_list.append(
            LagScenario(
                metadata.prediction_length,
                conts=[cov for cov in selected_combination if cov != nnts.torch.models.deepar.FEAT_SCALE],
                scaled_covariates=selected_combination,
                lag_seq=lag_seq,
                seed=seed,
                dataset=metadata.dataset,
            )
        )

In [None]:
#BASELINE
scenario_list = []
for seed in [42, 43, 44, 45, 46]:
    scenario = LagScenario(
        metadata.prediction_length,
        conts=['month'],
        scaled_covariates=['month'],
        lag_seq=lag_seq,
        seed=seed,
        dataset=metadata.dataset,
    )
    scenario_list.append(scenario)

In [None]:
# DeepAR uses Teacher Forcing but we can use Free Running
params.training_method = utils.TrainingMethod.FREE_RUNNING

In [None]:
for scenario in scenario_list:
    nnts.torch.utils.seed_everything(scenario.seed)
    df = df_orig.copy()
    context_length = metadata.context_length + max(scenario.lag_seq)
    split_data = nnts.datasets.split_test_train_last_horizon(
        df, context_length, metadata.prediction_length
    )
    trn_dl, test_dl = nnts.data.create_trn_test_dataloaders(
        split_data,
        metadata,
        scenario,
        params,
        nnts.torch.data.datasets.TorchTimeseriesLagsDataLoaderFactory(),
    )
    logger = nnts.loggers.LocalFileRun(
        project=f"{model_name}-{metadata.dataset}",
        name=scenario.name,
        config={
            **params.__dict__,
            **metadata.__dict__,
            **scenario.__dict__,
        },
        path=PATH,
    )
    net = nnts.torch.models.DeepAR(
        nnts.torch.models.LinearModel,
        params,
        nnts.torch.data.preprocessing.masked_mean_abs_scaling,
        1,
        lag_seq=lag_seq,
        scaled_features=scenario.scaled_covariates,
    )
    trner = trainers.TorchEpochTrainer(
        nnts.trainers.TrainerState(), 
        net, 
        params, 
        metadata, 
        os.path.join(PATH, f"{scenario.name}.pt"),
    )
    logger.configure(trner.events)

    evaluator = trner.train(trn_dl)
    y_hat, y = evaluator.evaluate(
        test_dl, scenario.prediction_length, metadata.context_length
    )
    test_metrics = nnts.metrics.calc_metrics(
        y, y_hat, nnts.metrics.calculate_seasonal_error(trn_dl, metadata.seasonality)
    )
    logger.log(test_metrics)
    print(test_metrics)
    logger.finish()

In [None]:
csv_aggregator = nnts.datasets.CSVFileAggregator(PATH, "results")
results = csv_aggregator()

In [41]:
results = pd.read_csv(f"{PATH}/results.csv")
results

FileNotFoundError: [Errno 2] No such file or directory: 'ablation-results/bettter-deepar/electricity/results.csv'

In [None]:
results[['smape', 'mase']].mean()

In [None]:
df_list = runner.add_y_hat(df, y_hat, scenario.prediction_length)
sample_preds = nnts.experiments.plotting.plot(df_list, scenario.prediction_length)