In [4]:
from gas import GASModel
from utils import generate_timeseries, generate_dataset

import torch.nn as nn
import torch.optim as optim

import plotly.graph_objects as go
import numpy as np

In [1]:
import torch
x = torch.load('traffic_hourly_train.pt')

RuntimeError: Invalid magic number; corrupt file?

Generate time series and dataset

In [5]:
GROWING = True
t, y = generate_timeseries(GROWING)

In [6]:
# split the timeseries
MAX_INDEX_TRAIN = 1500
MAX_INDEX_TEST = 2500
y_train = y[:MAX_INDEX_TRAIN]
y_test = y[MAX_INDEX_TRAIN: MAX_INDEX_TEST]

In [7]:
# generate the dataset as tensor of shape 
# (n_timeseries, ts_length, n_features) for inputs
# (n_timeseries, el_to_predict) for labels
TS_LENGTH = 200
EL_TO_PREDICT = 50
y_train, lab_train = generate_dataset(y_train, TS_LENGTH, EL_TO_PREDICT)
y_test, lab_test = generate_dataset(y_test, TS_LENGTH, EL_TO_PREDICT)

Initialized the model

In [8]:
# initialize gas params
eta_mu = 0.999
eta_sigma2 = 0.999

# the encoder of the time series is just a flattener of the time dimension
ts_encoder = nn.Flatten()
# the output model is a feedforward network
# the output of ts_encoder is (batch, ts_length)
# the additional info is (batch, ts_length * 2 * n_features)
ts_embedding_dim = TS_LENGTH + 2*TS_LENGTH   

HID_SIZE_1 = 100
HID_SIZE_2 = 100
output_model = nn.Sequential(nn.Linear(ts_embedding_dim, HID_SIZE_1),
                                nn.ReLU(),
                                nn.Linear(HID_SIZE_1, HID_SIZE_2),
                                nn.ReLU(),
                                nn.Linear(HID_SIZE_2, EL_TO_PREDICT)
                                )

model = GASModel(ts_encoder, eta_mu, eta_sigma2, output_model)

Define train, evaluate and plot result functions

In [9]:
def train_model(model, criterion, optimizer, epochs, y_train, lab_train):

    for epoch in range(epochs):  # loop over the dataset multiple times

        running_loss = 0.0

        for inputs, labels in zip(y_train, lab_train):
            # the first dimension must be batch_size (i.e. 1)
            inputs = inputs.unsqueeze(0)
            labels = labels.unsqueeze(0)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs.float())
            loss = criterion(outputs, labels.float())
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
        print('[%d] loss: %.10f' %
                (epoch + 1, running_loss / y_train.shape[0]))

    print('Finished Training')

    return model


def eval_model(model, y_all):

    model.eval()
    y_pred = []
    for inputs in y_all:
        inputs = inputs.unsqueeze(0)    # first dimension must be batch_size
        outputs = model(inputs.float())
        outputs = outputs.squeeze()   # no need for batch_size here
        y_pred.append(outputs.detach().numpy())

    y_pred = np.array(y_pred)
    #y_pred = y_pred.reshape(-1)

    return y_pred


def plot_results(t, y, y_pred, max_index_test, max_index_train, ts_length):
    #plot with plotly with a line where the training set ends
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=t[0:max_index_test], y=y, mode='lines', name='Actual'))
    fig.add_trace(go.Scatter(x=t[ts_length:max_index_test], y=y_pred, mode='lines', name='Predicted'))
    fig.add_trace(go.Scatter(x=[t[max_index_train], t[max_index_train]], y=[-20, 150], mode='lines', name='Training Set End'))
    fig.update_layout(title='Dampened Sinusoid', xaxis_title='Time (s)', yaxis_title='Amplitude')
    fig.show()

Run the experiment

In [10]:
# train the model
criterion = nn.MSELoss()
LR = 1e-4
optimizer = optim.Adam(model.parameters(), lr=LR)
epochs = 400

model = train_model(model, criterion, optimizer, epochs, y_train, lab_train)

[1] loss: 2240.5108971229
[2] loss: 1683.2636131873
[3] loss: 969.0713618352
[4] loss: 453.9730130709
[5] loss: 206.8542333750
[6] loss: 96.2332986685
[7] loss: 56.6985925528
[8] loss: 44.8392334718
[9] loss: 41.2561446887
[10] loss: 39.6769336370
[11] loss: 38.6195575641
[12] loss: 37.6771610517
[13] loss: 36.7620688769
[14] loss: 35.8557845079
[15] loss: 34.9555754845
[16] loss: 34.0633565829
[17] loss: 33.1894164636
[18] loss: 32.3129578004
[19] loss: 31.4503328984
[20] loss: 30.5991178109
[21] loss: 29.7587265235
[22] loss: 28.9316369570
[23] loss: 28.1228256134
[24] loss: 27.3163325603
[25] loss: 26.5268651797
[26] loss: 25.7500879214
[27] loss: 24.9983527018
[28] loss: 24.2484442546
[29] loss: 23.5165962348
[30] loss: 22.7994652620
[31] loss: 22.1010294969
[32] loss: 21.4345033352
[33] loss: 20.7506437852
[34] loss: 20.0992734157
[35] loss: 19.4669488027
[36] loss: 18.8557694600
[37] loss: 18.2535909323
[38] loss: 17.6725446169
[39] loss: 17.1087814478
[40] loss: 16.5696251667
[4

In [11]:
# evaluate the model on the entire dataset
y_all, lab_all = generate_dataset(y, TS_LENGTH, EL_TO_PREDICT)

y_pred = eval_model(model, y_all)
y_all.shape, lab_all.shape, y_pred.shape

(torch.Size([46, 200, 1]), torch.Size([46, 50]), (46, 50))

In [12]:
y_pred = y_pred.reshape(-1)
plot_results(t, y, y_pred, MAX_INDEX_TEST, MAX_INDEX_TRAIN, TS_LENGTH)

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [None]:
#Compute the total error on the test set.
error = 0
for i in range(MAX_INDEX_TRAIN, MAX_INDEX_TEST):
    error = error + (y[i] - y_pred[i-TS_LENGTH])**2
error = error/(MAX_INDEX_TEST - MAX_INDEX_TRAIN)
print(error)

434.03922102573944
