In [48]:
import matplotlib.pyplot as plt
import torch
import torch.optim
import pandas as pd
import os

import nnts
import nnts.data
from nnts import utils
import nnts.torch.models
import nnts.torch.trainers
import nnts.metrics
import nnts.torch.datasets
import nnts.torch.utils
import nnts.loggers
from nnts import datasets
torch.set_printoptions(precision=8, sci_mode=False)
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [49]:
data_path = "data"
model_name = ""
dataset_name = "weather"
results_path = "nb-results"
metadata_path = "informer.json"

metadata = datasets.load_metadata(dataset_name, path=metadata_path)
datafile_path = os.path.join(data_path, metadata.filename)
PATH = os.path.join(results_path, model_name, metadata.dataset)
df = pd.read_csv(datafile_path)
utils.makedirs_if_not_exists(PATH)

In [50]:
params = utils.Hyperparams(
    optimizer=torch.optim.Adam,
    loss_fn=torch.nn.L1Loss(),
    batch_size=32,
    batches_per_epoch=50,
    training_method=utils.TrainingMethod.DMS,
    model_file_path="logs",
    epochs=100,
    scheduler=utils.Scheduler.REDUCE_LR_ON_PLATEAU,
)

In [51]:
df = df.rename({"WetBulbCelsius": "y", "date": "ds"}, axis="columns")
df["unique_id"] = "T1"

In [52]:
# split lengths as per informer
trn_length = int(24 * 365.25 * 2)
val_test_length = int(24 * 365.25 * (10 / 12))
split_data = datasets.split_test_val_train(
    df, trn_length, val_test_length, val_test_length
)

In [53]:
nnts.torch.utils.seed_everything(42)
dataset_options = {
    "context_length": metadata.context_length,
    "prediction_length": metadata.prediction_length,
    "conts": [],
}

trn_dl, val_dl, test_dl = nnts.torch.utils.create_dataloaders_from_split_data(
    split_data,
    Dataset=nnts.torch.datasets.TimeseriesDataset,
    dataset_options=dataset_options,
    Sampler=nnts.torch.datasets.TimeSeriesSampler,
    batch_size=params.batch_size,
    transforms = [nnts.torch.preprocessing.StandardScaler()]
)

net = nnts.torch.models.DLinear(metadata)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[numeric_cols] = (numeric_data - self.mean) / self.std


In [56]:
trner = nnts.torch.trainers.ValidationTorchEpochTrainer(net, params, metadata)
evaluator = trner.train(trn_dl, val_dl)
y_hat, y = evaluator.evaluate(
    test_dl, metadata.prediction_length, metadata.context_length
)

test_metrics = nnts.metrics.calc_metrics(
    y_hat, y, nnts.metrics.calculate_seasonal_error(trn_dl, metadata.seasonality)
)
test_metrics

DLinear(
  (decompsition): series_decomp(
    (moving_avg): moving_avg(
      (avg): AvgPool1d(kernel_size=(25,), stride=(1,), padding=(0,))
    )
  )
  (Linear_Seasonal): ModuleList(
    (0): Linear(in_features=505, out_features=168, bias=True)
  )
  (Linear_Trend): ModuleList(
    (0): Linear(in_features=505, out_features=168, bias=True)
  )
  (Linear_Decoder): ModuleList(
    (0): Linear(in_features=505, out_features=168, bias=True)
  )
)
saving model to logs/best_model.pt
saving model to logs/best_model.pt
saving model to logs/best_model.pt
saving model to logs/best_model.pt
saving model to logs/best_model.pt
saving model to logs/best_model.pt
saving model to logs/best_model.pt
saving model to logs/best_model.pt
saving model to logs/best_model.pt
saving model to logs/best_model.pt


{'mse': 0.21180547773838043,
 'abs_error': 368254.8125,
 'abs_target_sum': 897140.6875,
 'abs_target_mean': 0.8050841093063354,
 'seasonal_error': 0.30613401532173157,
 'mean_mase': 1.0794873237609863,
 'mean_mape': 1.5616284608840942,
 'mean_smape': 0.7036300897598267,
 'mean_msmape': 0.5347234010696411,
 'mean_mae': 0.3304677903652191,
 'mean_rmse': 0.4061824083328247,
 'median_mase': 0.9741610884666443,
 'median_smape': 0.655884861946106,
 'median_msmape': 0.4708513617515564,
 'median_mae': 0.2982238531112671,
 'median_rmse': 0.37352558970451355}

In [None]:
for scenario in scenario_list[:1]:
    nnts.torch.utils.seed_everything(scenario.seed)
    df, scenario = prepare(df_orig.copy(), scenario)
    split_data = splitter.split(
        df, trn_length, val_test_length, val_test_length
    )
    trn_dl, val_dl, test_dl = nnts.data.create_trn_val_test_dataloaders(
        split_data,
        metadata,
        scenario,
        params,
        nnts.torch.data.TorchTimeseriesDataLoaderFactory(),
        [nnts.torch.data.preprocessing.StandardScaler()],
    )
    logger = nnts.loggers.WandbRun(
        project=f"{model_name}-{metadata.dataset}",
        name=scenario.name,
        config={
            **params.__dict__,
            **metadata.__dict__,
            **scenario.__dict__,
        },
        path=PATH,
    )

    net = nnts.torch.models.SegLSTM(
        nnts.torch.models.LinearModel,
        params,
        nnts.torch.data.preprocessing.masked_mean_abs_scaling,
        scenario.covariates + 1,
        24
    )
    trner = trainers.TorchEpochTrainer(
        nnts.trainers.TrainerState(), 
        net, 
        params, 
        metadata, 
        os.path.join(PATH, f"{scenario.name}.pt"),
    )
    logger.configure(trner.events)

    evaluator = trner.train(trn_dl, val_dl)
    handle = net.decoder.register_forward_hook(logger.log_activations)
    y_hat, y = evaluator.evaluate(
        test_dl, scenario.prediction_length, metadata.context_length, hooks=handle
    )
    handle.remove()
    test_metrics = nnts.metrics.calc_metrics(
        y_hat, y, nnts.metrics.calculate_seasonal_error(trn_dl, metadata.seasonality)
    )
    logger.log(test_metrics)
    logger.finish()

In [None]:
y_hat.shape, y.shape

In [None]:
nnts.metrics.calc_metrics(y_hat, y, trn_dl, metadata)

In [None]:
nnts.metrics.calc_metrics(y_hat[:, :1, :], y[:, :1, :], nnts.metrics.calculate_seasonal_error(trn_dl, metadata))

In [None]:
def save_results(y_hat, y, name):
    torch.save(y_hat, f"{PATH}/{name}_y_hat.pt")
    torch.save(y, f"{PATH}/{name}_y.pt")
save_results(y_hat, y, scenario.name)

In [None]:
covariate_name = f"cov-1-pearsn-0.68-pl-{str(scenario.prediction_length)}-seed-{scenario.seed}"
covariate_y_hat = torch.load(f"{PATH}/{covariate_name}_y_hat.pt")
covariate_y = torch.load(f"{PATH}/{covariate_name}_y.pt")

In [None]:
def calculate_forecast_horizon_metrics(y_hat, y, metadata, metric="mae"):
    forecast_horizon_metrics = []
    for i in range(1, metadata.prediction_length):
        metrics = nnts.metrics.calc_metrics(y[:, :i, :], y_hat[:, :i, :], metadata.freq, metadata.seasonality)
        forecast_horizon_metrics.append(metrics[metric])
    return forecast_horizon_metrics

forecast_horizon_metrics = calculate_forecast_horizon_metrics(y_hat, y, metadata, "mae")
covariate_forecast_horizon_metrics = calculate_forecast_horizon_metrics(covariate_y_hat, covariate_y, metadata, "mae")

In [None]:
import seaborn as sns
sns.set()

In [None]:
plt.plot(forecast_horizon_metrics, label='univariate')
plt.plot(covariate_forecast_horizon_metrics, label='covariate (0.68)')
plt.xlabel("Forecast Horizon")
plt.ylabel("Error (MAE)")
plt.legend()
plt.show()

In [None]:
csv_aggregator = nnts.datasets.CSVFileAggregator(PATH, "results")
results = csv_aggregator()

In [None]:
df.tail(metadata.prediction_length*50)['y'].plot()

In [None]:
y_hat_last = y_hat[:, :1, ...]
y_last = y[:, :1, ...]
df_test = df.tail(y_hat_last.shape[0])
df_test["y_check"] = y_last.squeeze()
df_test["y_hat"] = y_hat_last.squeeze()
df_test[["y", "y_check", "y_hat"]]
df_test.set_index("ds")[["y_check", "y_hat"]].iloc[4500:4500+336].plot(figsize=(20, 10))

In [None]:
test_metrics = nnts.metrics.calc_metrics(y_last, y_hat_last, metadata.freq, metadata.seasonality)
test_metrics