In [None]:
import os
from typing import List
import pandas as pd
import nnts
import nnts.data
from nnts import utils
import nnts.torch.models
import trainers
import nnts.metrics
import nnts.torch.datasets
import nnts.loggers
import covs 
import nnts.datasets
import nnts
import torch.nn.functional as F
import nnts.torch.utils
import torch.optim
%load_ext autoreload
%autoreload 2

In [None]:
seg_lstm_context_lengths = {
    "us_births": 91,
    "tourism_monthly": 73,
    "hospital":37,
    "electricity_hourly": 505,
    "traffic_weekly": 65,
    "traffic_hourly":505,
}

In [None]:
model_name = "seg-lstm"
dataset_name = "hospital"
results_path = "nb-results"

df_orig, metadata = nnts.datasets.load_dataset(dataset_name)
metadata.context_length = seg_lstm_context_lengths[metadata.dataset]

PATH = os.path.join(results_path, model_name, metadata.dataset)
utils.makedirs_if_not_exists(PATH)
params = utils.Hyperparams(optimizer=torch.optim.AdamW, loss_fn=F.smooth_l1_loss)
params

In [None]:
scenario_list: List[covs.CovariateScenario] = []

# Add the baseline scenarios
for seed in [42, 43, 44, 45, 46]:
    scenario_list.append(
        covs.CovariateScenario(metadata.prediction_length, error=0.0, covariates=0, seed=seed)
    )

# Models for full forecast horizon with covariates
for covariates in [1, 2, 3]:
    for error in covs.errors[metadata.dataset]:
        scenario_list.append( 
            covs.CovariateScenario(
                metadata.prediction_length, error, covariates=covariates
            )
        )

scenario_list.append(
    covs.CovariateScenario(
        metadata.prediction_length, 0, covariates=3, skip=1
    )
)

In [None]:
for scenario in scenario_list[:1]:
    params.batches_per_epoch = 500
    nnts.torch.utils.seed_everything(scenario.seed)
    df, scenario = covs.prepare(df_orig.copy(), scenario)
    trn_dl, val_dl, test_dl = nnts.torch.utils.create_dataloaders(
        df,
        nnts.datasets.split_test_val_train_last_horizon,
        metadata.context_length,
        metadata.prediction_length,
        Dataset=nnts.torch.datasets.TimeseriesDataset,
        dataset_options={
            "context_length": metadata.context_length,
            "prediction_length": metadata.prediction_length,
            "conts": scenario.conts,
        },
        batch_size=params.batch_size,
    )
    logger = nnts.loggers.WandbRun(
        project=f"{model_name}-{metadata.dataset}",
        name=scenario.name,
        config={
            **params.__dict__,
            **metadata.__dict__,
            **scenario.__dict__,
        },
        path=PATH,
    )
    net = covs.model_factory(model_name, params, scenario, metadata)

    trner = trainers.ValidationTorchEpochTrainer(
        trainers.TrainerState(), 
        net, 
        params, 
        metadata, 
        os.path.join(PATH, f"{scenario.name}.pt"),
        loss_fn=F.smooth_l1_loss
    )
    logger.configure(trner.events)

    evaluator = trner.train(trn_dl, val_dl)
    handle = net.decoder.register_forward_hook(logger.log_activations)
    y_hat, y = evaluator.evaluate(
        test_dl, scenario.prediction_length, metadata.context_length, hooks=handle
    )
    handle.remove()
    test_metrics = nnts.metrics.calc_metrics(
        y, y_hat, nnts.metrics.calculate_seasonal_error(trn_dl, metadata.seasonality)
    )
    logger.log(test_metrics)
    logger.finish()

In [None]:
csv_aggregator = nnts.utils.CSVFileAggregator(PATH, "results")
results = csv_aggregator()

In [None]:
results = pd.read_csv(f"{PATH}/results.csv")
results

In [None]:
import nnts.experiments.plotting


df_list = covs.add_y_hat(df, y_hat, scenario.prediction_length)
sample_preds = nnts.experiments.plotting.plot(df_list, scenario.prediction_length)

In [None]:
univariate_results = results.loc[
    (results["covariates"] == 0)
    & (results["prediction_length"] == metadata.prediction_length),
    ["smape", "mape", "rmse", "mae"],
]

univariate_results.mean(), univariate_results.std(), univariate_results.count()

In [None]:
cols = ["dataset", "error", "pearson", "covariates", "prediction_length", "smape"]

In [None]:
results.loc[
    (results["covariates"] > 0)
    & (results["error"].isin([0.000000, 0.4714285714285714, 1.65])),
    cols,
].sort_values(by=['covariates', 'error'])

In [None]:
sorted(results["error"].unique().tolist())

In [None]:
import numpy as np
np.linspace(0, 0.6, 8).tolist()