In [3]:
import torch
from typing import List
import pandas as pd
import seaborn as sns
from functools import partial
import os

import nnts
import nnts.data
import nnts.experiments
from nnts import utils, datasets
import nnts.torch.models
import nnts.metrics
import nnts.torch.datasets
import nnts.loggers
import nnts.torch.trainers
import nnts.datasets
import covs 
import nnts.experiments.plotting
import metric_generator
import torch.optim
import torch.nn.functional as F

sns.set()

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
ARTICLE_PATH = "nb-results/figures"
results_path = "results"
data_path = "data"
utils.makedirs_if_not_exists(ARTICLE_PATH)
def load_metrics_from_path(path, name):
    return torch.load(os.path.join(path, f"{name}_metrics.pt"))
load_metrics = partial(load_metrics_from_path, results_path)

In [4]:
def create_scenarios(
    df_orig: pd.DataFrame, metadata: datasets.Metadata, covariates: int
):
    ''' create scenarios for a dataset with a given number of covariates'''
    scenario_list: List[nnts.experiments.CovariateScenario] = []
    # Models for full forecast horizon with covariates
    scenario_list.append(
        nnts.experiments.CovariateScenario(metadata.prediction_length, 0, covariates=0),
    )
    for error in covs.errors[metadata.dataset]:
        scenario_list.append(
            nnts.experiments.CovariateScenario(
                metadata.prediction_length, error, covariates=covariates
            )
        )

    scenario_list = covs.prepare_scenarios(df_orig, scenario_list)
    selected_scenarios = [scenario_list[i] for i in [0, 1, 3, 8]]
    return selected_scenarios

In [5]:
# Generate data for analysis - required once only after model training
for model_name in ['base-lstm', 'seg-lstm']:
    for dataset in ["hospital", "tourism", "traffic", "electricity"]:        
        df_orig, metadata = nnts.datasets.load(
            dataset, data_path, f"{model_name}-monash.json"
        )
        params = utils.Hyperparams(optimizer=torch.optim.AdamW, loss_fn=F.smooth_l1_loss)

        scenario_list: List[nnts.experiments.CovariateScenario] = []
        # Add the baseline scenarios
        scenario_list.append(
            nnts.experiments.CovariateScenario(
                metadata.prediction_length, error=0.0, covariates=0
            )
        )
        # Models for full forecast horizon with covariates
        for covariates in [1, 2, 3]:
            for error in covs.errors[metadata.dataset]:
                scenario_list.append(
                    nnts.experiments.CovariateScenario(
                        metadata.prediction_length, error, covariates=covariates
                    )
                )
        scenario_list.append(
            nnts.experiments.CovariateScenario(
                metadata.prediction_length, 0, covariates=3, skip=1
            )
        )
        metric_generator.generate(scenario_list, df_orig, metadata, params, model_name, os.path.join(results_path, model_name, dataset))

AttributeError: module 'nnts.datasets' has no attribute 'LastHorizonSplitter'

In [None]:
# plot model vs model comparison of smape as a function forecast horizon by PCC with covariates fixed
# 5.1.2 Comparing PCC over longer forecast horizons
for dataset_name in ["hospital", "tourism", "traffic", "electricity"]:
    df_orig, *_ = nnts.datasets.read_tsf(os.path.join(data_path, covs.file_map[dataset_name]))
    for covariates in [1, 2, 3]:
        for model_name in ["base-lstm", "seg-lstm"]:
            metadata = datasets.load_metadata(dataset_name, path=os.path.join(data_path, f'{model_name}-monash.json'))
            selected_scenarios = create_scenarios(df_orig, metadata, covariates)
            nnts.experiments.plotting.plot_forecast_horizon_trajectories(
                selected_scenarios,
                metadata.dataset,
                covariates,
                [model_name],
                loader_fn=load_metrics,
                path = ARTICLE_PATH
            )

In [None]:
# 5.1.3 Comparing base-lstm and seg-lstm performance
for dataset_name in ["hospital", "tourism", "traffic", "electricity"]:
    df_orig, metadata = nnts.datasets.load(
        dataset_name, data_path, metadata_filename='base-lstm-monash.json'
    )
    for covariates in [1, 2, 3]:
        selected_scenarios = create_scenarios(df_orig, metadata, covariates)
        nnts.experiments.plotting.plot_forecast_horizon_trajectories(
            selected_scenarios,
            metadata.dataset,
            covariates,
            ["base-lstm", "seg-lstm"],
            loader_fn=load_metrics,
            path=ARTICLE_PATH,
        )

In [None]:
# 5.1.4 Comparing covariates across Forecast Horizon trajectories
# plot model vs model comparison of smape as a function forecast horizon by covariates with PCC fixed
for dataset_name in ["hospital", "tourism", "traffic", "electricity"]:
    df_orig, metadata = nnts.datasets.load(
        dataset_name, data_path, "base-lstm-monash.json"
    )
    scenario_list: List[nnts.experiments.CovariateScenario] = []
    covariate_list = [0, 1, 2, 3]
    for covariates in covariate_list:
        scenario_list.append(
            nnts.experiments.CovariateScenario( 
                metadata.prediction_length, covs.errors[dataset_name][0], covariates=covariates
            )
        )
    scenario_list.append(
        nnts.experiments.CovariateScenario(
            metadata.prediction_length, 0, covariates=3, skip=1
        ),
    )
    scenario_list = covs.prepare_scenarios(df_orig, scenario_list)
    nnts.experiments.plotting.plot_forecast_horizon_trajectories(
        scenario_list, dataset_name, covariate_list, ["base-lstm", "seg-lstm"], loader_fn=load_metrics, path=ARTICLE_PATH
    )

In [None]:
dataset_list = ["hospital", "tourism", "traffic", "electricity"]

for model_name in ["base-lstm", "seg-lstm"]:
    for scenario_covariate in [1,2,3]:
        fig = covs.plot_pcc_charts(
            model_name, scenario_covariate, dataset_list, path=ARTICLE_PATH, results_path=results_path, data_path=data_path
        )

In [None]:
run_times = []
for model_name in ["base-lstm", "seg-lstm"]:
    for dataset in ["hospital", "tourism", "traffic", "electricity"]:
        csv_aggregator = nnts.datasets.CSVFileAggregator(f"{results_path}/{model_name}/{dataset}", "results")
        results = csv_aggregator()
        df_orig, metadata = nnts.datasets.load(
            dataset,
            data_path,
            metadata_filename=f"{model_name}-monash.json",
        )
        run_times.append((model_name, dataset, results["run_time"].sum()))

run_times_df = pd.DataFrame(run_times, columns=["model", "dataset", "run_time"])
run_times_df = run_times_df.pivot(index="dataset", columns="model", values="run_time")

In [None]:
dataset = "electricity"
model_name = "seg-lstm"
csv_aggregator = nnts.datasets.CSVFileAggregator(f'{results_path}/{model_name}/{dataset}', "results")
results = csv_aggregator()
df_orig, metadata = nnts.datasets.load(
    dataset, data_path, metadata_filename=f"{model_name}-monash.json"
)

In [None]:
results.loc[
    (results["covariates"] == 0)
    & (results["prediction_length"] == metadata.prediction_length),
    ["smape", "mae", "rmse"],
].agg(["mean", "std"])

In [None]:
cols = ["dataset", "error", "pearson", "covariates", "prediction_length", "rmse"]
results.loc[
    (results["covariates"] > 0)
    & (results["error"].isin([0.000000, 0.17142857142857143, 0.6]))
    & (results["skip"] == 0),
    cols,
].sort_values(by=["covariates", "error"]).pivot_table(
    index=["dataset", "covariates"], columns=["pearson"], values="rmse"
)

In [None]:
sorted(results['error'].unique().tolist())