In [67]:
import torch
import torch.optim
import pandas as pd
import os

import nnts
import nnts.data
from nnts import utils
import nnts.torch.models
import nnts.torch.trainers
import nnts.metrics
import nnts.torch.datasets
import nnts.torch.utils
import nnts.loggers
from nnts import datasets
torch.set_printoptions(precision=8, sci_mode=False)
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [68]:
data_path = "data"
model_name = ""
dataset_name = "ETTh1"
results_path = "nb-results"
metadata_path = "informer.json"

metadata = datasets.load_metadata(dataset_name, path=metadata_path)
datafile_path = os.path.join(data_path, metadata.filename)
PATH = os.path.join(results_path, model_name, metadata.dataset)
df = pd.read_csv(datafile_path)
utils.makedirs_if_not_exists(PATH)

In [69]:
params = utils.Hyperparams(
    optimizer=torch.optim.Adam,
    loss_fn=torch.nn.L1Loss(),
    batch_size=32,
    batches_per_epoch=50,
    training_method=utils.TrainingMethod.DMS,
    model_file_path="logs",
    epochs=100,
    scheduler=utils.Scheduler.REDUCE_LR_ON_PLATEAU,
)

In [70]:
df = pd.melt(df, id_vars=["date"], value_vars=["HUFL", "HULL", "MUFL", "MULL", "LUFL", "LULL", "OT"], var_name="unique_id", value_name="y")
df = df.rename({"date": "ds"}, axis="columns")
df.head()

Unnamed: 0,ds,unique_id,y
0,2016-07-01 00:00:00,HUFL,5.827
1,2016-07-01 01:00:00,HUFL,5.693
2,2016-07-01 02:00:00,HUFL,5.157
3,2016-07-01 03:00:00,HUFL,5.09
4,2016-07-01 04:00:00,HUFL,5.358


In [71]:
df.shape

(121940, 3)

In [5]:
df = df.rename({"OT": "y", "date": "ds"}, axis="columns")
df["unique_id"] = "T1"

In [72]:
# split lengths as per informer
trn_length = int(24 * 365.25)
val_test_length = int(24 * 365.25 * (4 / 12))
split_data = datasets.split_test_val_train(
    df, trn_length, val_test_length, val_test_length
)

In [73]:
nnts.torch.utils.seed_everything(42)
dataset_options = {
    "context_length": metadata.context_length,
    "prediction_length": metadata.prediction_length,
    "conts": [],
}

trn_dl, val_dl, test_dl = nnts.torch.utils.create_dataloaders_from_split_data(
    split_data,
    Dataset=nnts.torch.datasets.TimeseriesDataset,
    dataset_options=dataset_options,
    Sampler=nnts.torch.datasets.TimeSeriesSampler,
    batch_size=params.batch_size,
    transforms = [nnts.torch.preprocessing.StandardScaler()]
)

net = nnts.torch.models.DLinear(metadata)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[numeric_cols] = (numeric_data - self.mean) / self.std


In [74]:
val_dl.dataset.X.shape

torch.Size([7, 2922, 1])

In [64]:
trner = nnts.torch.trainers.ValidationTorchEpochTrainer(net, params, metadata)
evaluator = trner.train(trn_dl, val_dl)
y_hat, y = evaluator.evaluate(
    test_dl, metadata.prediction_length, metadata.context_length
)


DLinear(
  (decompsition): series_decomp(
    (moving_avg): moving_avg(
      (avg): AvgPool1d(kernel_size=(25,), stride=(1,), padding=(0,))
    )
  )
  (Linear_Seasonal): ModuleList(
    (0): Linear(in_features=336, out_features=336, bias=True)
  )
  (Linear_Trend): ModuleList(
    (0): Linear(in_features=336, out_features=336, bias=True)
  )
  (Linear_Decoder): ModuleList(
    (0): Linear(in_features=336, out_features=336, bias=True)
  )
)
saving model to logs/best_model.pt
saving model to logs/best_model.pt
saving model to logs/best_model.pt
saving model to logs/best_model.pt
saving model to logs/best_model.pt
saving model to logs/best_model.pt
saving model to logs/best_model.pt
saving model to logs/best_model.pt
saving model to logs/best_model.pt
saving model to logs/best_model.pt
saving model to logs/best_model.pt
saving model to logs/best_model.pt
saving model to logs/best_model.pt
saving model to logs/best_model.pt


In [76]:
0.29441186785697937 **2

0.08667834793503548

In [65]:
y_hat.shape, y.shape

(torch.Size([15757, 336, 1]), torch.Size([15757, 336, 1]))

In [53]:
nnts.metrics.calculate_seasonal_error(trn_dl, metadata.seasonality)

tensor([[0.32819790],
        [0.11649176],
        [0.28753734],
        [0.09909239],
        [0.10163941],
        [0.03509720],
        [0.35242835]])

In [52]:

test_metrics = nnts.metrics.calc_metrics(
    y_hat, y, nnts.metrics.calculate_seasonal_error(trn_dl, metadata.seasonality)
)
test_metrics

RuntimeError: The size of tensor a (15757) must match the size of tensor b (7) at non-singleton dimension 0

In [21]:
0.3049468696117401**2

0.09299259328599963

In [None]:
nnts.metrics.calc_metrics(y_hat[:, :1, :], y[:, :1, :], nnts.metrics.calculate_seasonal_error(trn_dl, metadata))

In [None]:
def save_results(y_hat, y, name):
    torch.save(y_hat, f"{PATH}/{name}_y_hat.pt")
    torch.save(y, f"{PATH}/{name}_y.pt")
save_results(y_hat, y, scenario.name)

In [None]:
covariate_name = f"cov-1-pearsn-0.68-pl-{str(scenario.prediction_length)}-seed-{scenario.seed}"
covariate_y_hat = torch.load(f"{PATH}/{covariate_name}_y_hat.pt")
covariate_y = torch.load(f"{PATH}/{covariate_name}_y.pt")

In [None]:
def calculate_forecast_horizon_metrics(y_hat, y, metadata, metric="mae"):
    forecast_horizon_metrics = []
    for i in range(1, metadata.prediction_length):
        metrics = nnts.metrics.calc_metrics(y[:, :i, :], y_hat[:, :i, :], metadata.freq, metadata.seasonality)
        forecast_horizon_metrics.append(metrics[metric])
    return forecast_horizon_metrics

forecast_horizon_metrics = calculate_forecast_horizon_metrics(y_hat, y, metadata, "mae")
covariate_forecast_horizon_metrics = calculate_forecast_horizon_metrics(covariate_y_hat, covariate_y, metadata, "mae")

In [None]:
import seaborn as sns
sns.set()

In [None]:
plt.plot(forecast_horizon_metrics, label='univariate')
plt.plot(covariate_forecast_horizon_metrics, label='covariate (0.68)')
plt.xlabel("Forecast Horizon")
plt.ylabel("Error (MAE)")
plt.legend()
plt.show()

In [None]:
csv_aggregator = nnts.datasets.CSVFileAggregator(PATH, "results")
results = csv_aggregator()

In [None]:
df.tail(metadata.prediction_length*50)['y'].plot()

In [None]:
y_hat_last = y_hat[:, :1, ...]
y_last = y[:, :1, ...]
df_test = df.tail(y_hat_last.shape[0])
df_test["y_check"] = y_last.squeeze()
df_test["y_hat"] = y_hat_last.squeeze()
df_test[["y", "y_check", "y_hat"]]
df_test.set_index("ds")[["y_check", "y_hat"]].iloc[4500:4500+336].plot(figsize=(20, 10))

In [None]:
test_metrics = nnts.metrics.calc_metrics(y_last, y_hat_last, metadata.freq, metadata.seasonality)
test_metrics