In [5]:
import os
import torch
from typing import List
import pandas as pd
import gluonts
import nnts
import nnts.data
import nnts.experiments
from nnts import utils, datasets
import nnts.torch.preprocessing
import nnts.torch.models
import trainers
import nnts.metrics
import nnts.torch.datasets
import nnts.loggers
import nnts
import nnts.experiments.plotting
import deepar
#from deepar import LagScenario

import torch.nn.functional as F

torch.set_printoptions(precision=8, sci_mode=False)
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
data_path = "data"
model_name = "deepar"
base_model_name = "base-lstm"
dataset_name = "tourism_monthly"
results_path = "ablation-results"
metadata_path = os.path.join(data_path, f"{base_model_name}-monash.json")
metadata = datasets.load_metadata(dataset_name, path=metadata_path)
datafile_path = os.path.join(data_path, metadata.filename)
PATH = os.path.join(results_path, model_name, metadata.dataset)

df_orig, *_ = datasets.read_tsf(datafile_path)
params = utils.Hyperparams(optimizer = torch.optim.Adam, loss_fn=F.smooth_l1_loss)

utils.makedirs_if_not_exists(PATH)

In [None]:
#Gluonts defaults
params.batch_size = 32
params.batches_per_epoch = 50

In [None]:
df_org = deepar.create_time_features(df_orig)

In [None]:
# GlounTS uses the following code to generate the month covariate used in the tourism dataset
# the month value is extracted from the date column and then scaled to a value between -0.5 and 0.5
# here we do this is on the whole dataset in one go
max_min_scaler = nnts.torch.data.preprocessing.MaxMinScaler()
max_min_scaler.fit(df_orig, ["month", "week", "day_of_week", "hour"])
df_orig = max_min_scaler.transform(df_orig, ["month", "week", "day_of_week", "hour"])

In [None]:
ax = df_orig[df_orig['unique_id'] == 'T1'].set_index('ds').tail(36)['month'].plot(figsize=(20, 5))
fig = ax.get_figure()

In [None]:
from dataclasses import dataclass, field
lag_seq = gluonts.time_feature.lag.get_lags_for_frequency(metadata.freq)
lag_seq = [lag - 1 for lag in lag_seq if lag > 1]

In [None]:
scaled_covariates = ["month", "unix_timestamp", nnts.torch.models.deepar.FEAT_SCALE]
scaled_covariate_selection_matrix = [
    [0,0,1],
    [0,1,0],
    [0,1,1],
    [1,0,0],
    [1,0,1],
    [1,1,0],
    [1,1,1],
]

In [None]:
scenario_list: List[nnts.experiments.Scenario] = []

for seed in [42, 43, 44, 45, 46]:
    for row in scaled_covariate_selection_matrix:
        selected_combination = [
            covariate
            for covariate, select in zip(scaled_covariates, row)
            if select == 1
        ]
        scenario_list.append(
            LagScenario(
                metadata.prediction_length,
                conts=[cov for cov in selected_combination if cov != nnts.torch.models.deepar.FEAT_SCALE],
                scaled_covariates=selected_combination,
                lag_seq=lag_seq,
                seed=seed,
                dataset=metadata.dataset,
            )
        )

In [None]:
# Add the baseline scenarios
scenario_list: List[nnts.experiments.Scenario] = []
for seed in [42, 43, 44, 45, 46]:
    scenario = LagScenario(
        metadata.prediction_length,
        conts=[],
        scaled_covariates=[],
        lag_seq=lag_seq,
        seed=seed,
        dataset=metadata.dataset,
    )
    scenario_list.append(scenario)

In [None]:
# DeepAR uses Teacher Forcing
params.training_method = utils.Hyperparams.TrainingMethod.TEACHER_FORCING

In [None]:
for scenario in scenario_list:
    nnts.torch.utils.seed_everything(scenario.seed)
    df = df_orig.copy()
    context_length = metadata.context_length + max(scenario.lag_seq)
    split_data = nnts.pandas.split_test_train_last_horizon(
        df, context_length, metadata.prediction_length
    )
    trn_dl, test_dl = nnts.data.create_trn_test_dataloaders(
        split_data,
        metadata,
        scenario,
        params,
        nnts.torch.data.datasets.TorchTimeseriesLagsDataLoaderFactory(),
    )
    logger = nnts.loggers.LocalFileRun(
        project=f"{model_name}-{metadata.dataset}",
        name=scenario.name,
        config={
            **params.__dict__,
            **metadata.__dict__,
            **scenario.__dict__,
        },
        path=PATH,
    )
    net = nnts.torch.models.DeepAR(
        nnts.torch.models.LinearModel,
        params,
        nnts.torch.data.preprocessing.masked_mean_abs_scaling,
        1,
        lag_seq=lag_seq,
        scaled_features=scenario.scaled_covariates,
    )
    trner = trainers.TorchEpochTrainer(
        nnts.trainers.TrainerState(), 
        net, 
        params, 
        metadata, 
        os.path.join(PATH, f"{scenario.name}.pt")
    )
    logger.configure(trner.events)

    evaluator = trner.train(trn_dl)
    y_hat, y = evaluator.evaluate(
        test_dl, scenario.prediction_length, metadata.context_length
    )
    test_metrics = nnts.metrics.calc_metrics(
        y, y_hat, nnts.metrics.calculate_seasonal_error(trn_dl, metadata)
    )
    logger.log(test_metrics)
    print(test_metrics)
    logger.finish()

In [None]:
csv_aggregator = nnts.utils.CSVFileAggregator(PATH, "results")
results = csv_aggregator()

In [None]:
results = pd.read_csv(f"{PATH}/results.csv")
results

In [None]:
df_list = deepar.add_y_hat(df, y_hat, scenario.prediction_length)
sample_preds = nnts.experiments.plotting.plot(df_list, scenario.prediction_length)

In [2]:
def generate_one_hot_matrix(n):
    # Total number of rows in the matrix
    num_rows = 2**n

    # Initialize the matrix
    one_hot_matrix = []

    # Generate each combination of binary values
    for i in range(num_rows):
        # Convert the number to its binary representation and fill with leading zeros
        binary_representation = format(i, f"0{n}b")
        # Convert the binary string to a list of integers
        one_hot_row = [int(bit) for bit in binary_representation]
        # Append the one-hot row to the matrix
        one_hot_matrix.append(one_hot_row)

    return one_hot_matrix


# Example usage for n=5
n = 5
one_hot_matrix = generate_one_hot_matrix(n)

# Print the first two rows to verify
print(one_hot_matrix)

[[0, 0, 0, 0, 0], [0, 0, 0, 0, 1], [0, 0, 0, 1, 0], [0, 0, 0, 1, 1], [0, 0, 1, 0, 0], [0, 0, 1, 0, 1], [0, 0, 1, 1, 0], [0, 0, 1, 1, 1], [0, 1, 0, 0, 0], [0, 1, 0, 0, 1], [0, 1, 0, 1, 0], [0, 1, 0, 1, 1], [0, 1, 1, 0, 0], [0, 1, 1, 0, 1], [0, 1, 1, 1, 0], [0, 1, 1, 1, 1], [1, 0, 0, 0, 0], [1, 0, 0, 0, 1], [1, 0, 0, 1, 0], [1, 0, 0, 1, 1], [1, 0, 1, 0, 0], [1, 0, 1, 0, 1], [1, 0, 1, 1, 0], [1, 0, 1, 1, 1], [1, 1, 0, 0, 0], [1, 1, 0, 0, 1], [1, 1, 0, 1, 0], [1, 1, 0, 1, 1], [1, 1, 1, 0, 0], [1, 1, 1, 0, 1], [1, 1, 1, 1, 0], [1, 1, 1, 1, 1]]
