In [1]:
import matplotlib.pyplot as plt
import torch
from typing import List
import seaborn as sns

import nnts
import nnts.data
import nnts.experiments
import nnts.models
import nnts.torch.data.preprocessing as preprocessing
import nnts.torch.models
import nnts.torch.models.trainers as trainers
import nnts.metrics
import nnts.torch.data
import nnts.torch.data.datasets
import nnts.loggers
import nnts.pandas
import covs 

sns.set()



In [2]:
import nnts.pandas

df, *_ = nnts.pandas.read_tsf(
    "traffic_weekly_dataset.tsf",
    "https://zenodo.org/records/4656135/files/traffic_weekly_dataset.zip",
)

In [3]:
df

Unnamed: 0,y,ds,unique_id
0,6.4875,2015-01-04,T1
1,7.8092,2015-01-11,T1
2,6.8881,2015-01-18,T1
3,7.1613,2015-01-25,T1
4,8.3065,2015-02-01,T1
...,...,...,...
99,5.6545,2016-11-27,T862
100,6.4925,2016-12-04,T862
101,6.4907,2016-12-11,T862
102,6.8985,2016-12-18,T862


In [4]:
import nnts.loggers
import nnts.pandas

ARTICLE_PATH = "nb-results/figures"
# ARTICLE_PATH = "/Users/garethdavies/Development/workspaces/garethmd.github.io/articles/figures"
data_paths = {
    "traffic": "data/traffic_weekly_dataset.tsf",
    "electricity": "data/electricity_hourly_dataset.tsf",
    "tourism": "data/tourism_monthly_dataset.tsf",
    "hospital": "data/hospital_dataset.tsf",
}
results_path = "nb-results"
metadata_path = "monash.json"
nnts.pandas.read_tsf(path=data_paths["traffic"])

Unnamed: 0,y,ds,unique_id
0,6.4875,2015-01-04,T1
1,7.8092,2015-01-11,T1
2,6.8881,2015-01-18,T1
3,7.1613,2015-01-25,T1
4,8.3065,2015-02-01,T1
...,...,...,...
99,5.6545,2016-11-27,T862
100,6.4925,2016-12-04,T862
101,6.4907,2016-12-11,T862
102,6.8985,2016-12-18,T862


In [5]:
ARTICLE_PATH = "nb-results/figures"
# ARTICLE_PATH = "/Users/garethdavies/Development/workspaces/garethmd.github.io/articles/figures"
data_paths = {
    "traffic": "data/traffic_weekly_dataset.tsf",
    "electricity": "data/electricity_hourly_dataset.tsf",
    "tourism": "data/tourism_monthly_dataset.tsf",
    "hospital": "data/hospital_dataset.tsf",
}
results_path = "nb-results"
metadata_path = "monash.json"
nnts.loggers.makedirs_if_not_exists(ARTICLE_PATH)

In [6]:
dataset_name = "traffic"
df_orig, metadata = nnts.pandas.load(
    dataset_name, metadata_path=metadata_path
)

params = nnts.models.Hyperparams()
params.training_method = nnts.models.hyperparams.TrainingMethod.FREE_RUNNING
splitter = nnts.data.PandasSplitter()
model_name = "unrolled-lstm"
PATH = f"results/{model_name}/{metadata.dataset}"


In [7]:
scenario_list: List[nnts.experiments.CovariateScenario] = []

# Add the baseline scenarios
for seed in [42, 43, 44, 45, 46]:
    scenario_list.append(
        nnts.experiments.CovariateScenario(metadata.prediction_length, error=0.0, covariates=0, seed=seed)
    )

In [8]:

## Models for full forecast horizon with covariates

for covariates in [1, 2, 3]:
    for error in covs.errors[metadata.dataset]:
        scenario_list.append( 
            nnts.experiments.CovariateScenario(
                metadata.prediction_length, error, covariates=covariates
            )
        )

scenario_list.append( 
    nnts.experiments.CovariateScenario(
        metadata.prediction_length, 0, covariates=3, skip=1
    )
)

In [9]:
for scenario in scenario_list:
    nnts.torch.data.datasets.seed_everything(scenario.seed)
    df, scenario = covs.prepare(df_orig.copy(), scenario)
    split_data = splitter.split(df, metadata)
    trn_dl, val_dl, test_dl = nnts.data.map_to_dataloaders(
        split_data,
        metadata,
        scenario,
        params,
        nnts.torch.data.TorchTimeseriesDataLoaderFactory(),
    )
    logger = nnts.loggers.ProjectRun(
        nnts.loggers.JsonFileHandler(
            path=PATH, filename=f"{scenario.name}.json"
        ),
        #nnts.loggers.PrintHandler(),
        project=f"{model_name}-{metadata.dataset}",
        run=scenario.name,
        config={
            **params.__dict__,
            **metadata.__dict__,
            **scenario.__dict__,
        },
    )

    net = nnts.torch.models.UnrolledLSTM(
        nnts.torch.models.LinearModel,
        params,
        preprocessing.masked_mean_abs_scaling,
        scenario.covariates + 1,
    )
    trner = trainers.TorchEpochTrainer(
        nnts.models.TrainerState(), 
        net, 
        params, 
        metadata, 
        f"{PATH}/{scenario.name}.pt",
        logger=logger
    )
    evaluator = trner.train(trn_dl, val_dl)
    y_hat, y  = evaluator.evaluate(
        test_dl, scenario.prediction_length, metadata.context_length
    )
    test_metrics = nnts.metrics.calc_metrics(
        y, y_hat, metadata.freq, metadata.seasonality
    )
    logger.log(test_metrics)
    logger.finish()

UnrolledLSTM(
  (decoder): UnrolledLSTMDecoder(
    (rnn): LSTM(2, 40, num_layers=2, batch_first=True, dropout=0.1)
  )
  (distribution): LinearModel(
    (main): Sequential(
      (0): Linear(in_features=40, out_features=40, bias=True)
      (1): ReLU()
      (2): Linear(in_features=40, out_features=1, bias=True)
    )
  )
)
Artifact saved to results/unrolled-lstm/tourism/cov-0-pearsn-0-pl-24-seed-42.pt
{'train_loss': 1629.0181884765625, 'valid_loss': 5415.83935546875, 'elapsed_time': 12.651964166667312}
Artifact saved to results/unrolled-lstm/tourism/cov-0-pearsn-0-pl-24-seed-42.pt
{'train_loss': 856.6263427734375, 'valid_loss': 5385.62548828125, 'elapsed_time': 11.181081290822476}
Artifact saved to results/unrolled-lstm/tourism/cov-0-pearsn-0-pl-24-seed-42.pt
{'train_loss': 809.2552490234375, 'valid_loss': 5358.765625, 'elapsed_time': 11.286123166792095}
{'train_loss': 807.2147827148438, 'valid_loss': 5400.69287109375, 'elapsed_time': 11.20332658290863}
{'train_loss': 786.5203247070

KeyboardInterrupt: 

In [2]:
csv_aggregator = covs.CSVFileAggregator(PATH, "results")
results = csv_aggregator()

NameError: name 'covs' is not defined

In [3]:
#results = pd.read_csv(f"{PATH}/results.csv")

for metric in ["smape", "mape", "rmse", "mae"]:
    fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(20, 5), sharey=True)
    covs.get_chart_data(results, metadata.prediction_length, 1, metric).plot(
        kind="line",
        ax=axes[0],
        title=f"{metadata.dataset} {metric} covariates = 1, forecast horizon = {metadata.prediction_length}",
    )
    covs.get_chart_data(results, metadata.prediction_length, 2, metric).plot(
        kind="line",
        ax=axes[1],
        title=f"{metadata.dataset} {metric} covariates = 2, forecast horizon = {metadata.prediction_length}",
    )
    covs.get_chart_data(results, metadata.prediction_length, 3, metric).plot(
        kind="line",
        ax=axes[2],
        title=f"{metadata.dataset} {metric} covariates = 3, forecast horizon = {metadata.prediction_length}",
    )
    fig.tight_layout()
    fig.savefig(f"{PATH}/{metric}.png")

NameError: name 'plt' is not defined

In [None]:
df_list = covs.add_y_hat(df, y_hat, scenario.prediction_length)
sample_preds = covs.plot(df_list, scenario.prediction_length)

In [None]:
univariate_results = results.loc[
    (results["covariates"] == 0)
    & (results["prediction_length"] == metadata.prediction_length),
    ["smape", "mape", "rmse", "mae"],
]

univariate_results.mean(), univariate_results.std(), univariate_results.count()

In [None]:
cols = ["dataset", "error", "pearson", "covariates", "prediction_length", "smape"]

In [None]:
results.loc[
    (results["covariates"] > 0)
    & (results["error"].isin([0.000000, 0.4714285714285714, 1.65])),
    cols,
].sort_values(by=["covariates", "error"])

In [None]:
sorted(results["error"].unique().tolist())

Unnamed: 0,y,ds,unique_id
0,1149.8700,1979-01-31,T1
1,1053.8002,1979-02-28,T1
2,1388.8798,1979-03-31,T1
3,1783.3702,1979-04-30,T1
4,1921.0252,1979-05-31,T1
...,...,...,...
235,7778.0000,2000-08-31,T366
236,7859.0000,2000-09-30,T366
237,4802.0000,2000-10-31,T366
238,2426.0000,2000-11-30,T366
