In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch import Tensor
from torch.nn.utils.rnn import pad_sequence

import pandas as pd
from datetime import datetime

import plotly.graph_objects as go
import numpy as np

from gas import GASModel
from utils import generate_dataset_multivariate

# Load dataset

We try on the traffic_hourly dataset, a dataset with data coming from 862 sensors (aka features) placed in California's highways (more info at https://forecastingdata.org/), used also in "Modeling Long- and Short-Term Temporal Patterns with Deep Neural Networks".

 The "standard" number of timesteps to predict is 48; no standard for the length of the pieces of the time series to feed to the NN.

In [2]:
from datasets import load_dataset

dataset = load_dataset("monash_tsf", "traffic_hourly")

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
train_dataset = dataset["train"]
test_dataset = dataset["test"]

freq = "1H"
prediction_length = 48

from functools import lru_cache
from functools import partial
@lru_cache(10_000)
def convert_to_pandas_period(date, freq):
    return pd.Period(date, freq)
def transform_start_field(batch, freq):
    batch["start"] = [convert_to_pandas_period(date, freq) for date in batch["start"]]
    return batch

train_dataset.set_transform(partial(transform_start_field, freq=freq))
test_dataset.set_transform(partial(transform_start_field, freq=freq))


In [4]:
from gluonts.dataset.multivariate_grouper import MultivariateGrouper

num_of_variates = len(train_dataset)

train_grouper = MultivariateGrouper(max_target_dim=num_of_variates)
test_grouper = MultivariateGrouper(
    max_target_dim=num_of_variates,
    num_test_dates=len(test_dataset) // num_of_variates, # number of rolling test windows
)

multi_variate_train_dataset = train_grouper(train_dataset)
multi_variate_test_dataset = test_grouper(test_dataset)



In [5]:
multi_variate_train_dataset[0]['target'].shape, len(multi_variate_train_dataset), multi_variate_test_dataset[0]['target'].shape, len(multi_variate_test_dataset)

((862, 17448), 1, (862, 17544), 1)

In [6]:
ts_length = 100
x_train, y_train = generate_dataset_multivariate(multi_variate_train_dataset[0]['target'], ts_length, prediction_length)
print(x_train.shape, y_train.shape)

torch.Size([117, 862, 100]) torch.Size([117, 862, 48])


# Define Model and Train

In [7]:
n_features = x_train.shape[1]

# initialize gas params
eta_mu = 0.999
eta_sigma2 = 0.999

# each input record is expected as (batch(=1!), n_features, ts_length)
# the encoder of the time series is just a flattener of the time dimension
ts_encoder = nn.Flatten()
# the output model is a feedforward network
# the output of ts_encoder in this case is n_feature * ts_length ##a flattener
# the additional info is (batch, ts_length * 2 * n_features) ##A LOT!!
# these vectors are concatenated, so the input_dim of the output net is:
output_net_input_dim = n_features * ts_length + ts_length * 2 * n_features

HID_SIZE_1 = 100
HID_SIZE_2 = 100
output_model = nn.Sequential(nn.Linear(output_net_input_dim, HID_SIZE_1),
                                nn.ReLU(),
                                nn.Linear(HID_SIZE_1, HID_SIZE_2),
                                nn.ReLU(),
                                nn.Linear(HID_SIZE_2, n_features * prediction_length)
                                )

model = GASModel(ts_encoder, eta_mu, eta_sigma2, output_model)

In [8]:
criterion = nn.MSELoss()
LR = 1e-4
optimizer = optim.Adam(model.parameters(), lr=LR)


running_loss = 0.0

for inputs, labels in zip(x_train, y_train):
    # the first dimension must be batch_size (i.e. 1)
    inputs = inputs.unsqueeze(0)
    labels = labels.unsqueeze(0)

    # zero the parameter gradients
    optimizer.zero_grad()

    # forward + backward + optimize
    outputs = model(inputs.float())
    # this output is (batch, n_features * prediction_length), we must reshape it
    outputs = outputs.reshape((1, n_features, prediction_length))
    loss = criterion(outputs, labels.float())
    loss.backward()
    optimizer.step()

    # print statistics
    running_loss += loss.item()
print('[%d] loss: %.10f' % (1, running_loss / y_train.shape[0]))


[1] loss: 0.0103528826
