In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

device = 'cuda' if torch.cuda.is_available() else 'cpu'


import numpy as np
import plotly.graph_objects as go


from dain import DAINLayer
from utils import generate_timeseries, generate_dataset

Try the dataset with just one feature

In [2]:
GROWING = True
t, y = generate_timeseries(GROWING)

type(t), type(y), t.shape, y.shape

(numpy.ndarray, numpy.ndarray, (2500,), (2500,))

In [3]:
# split the timeseries
MAX_INDEX_TRAIN = 1500
MAX_INDEX_TEST = 2500

y_train = y[:MAX_INDEX_TRAIN]
y_test = y[MAX_INDEX_TRAIN: MAX_INDEX_TEST]

In [4]:
# generate the dataset as tensor of shape 
# (n_timeseries, ts_length, n_features) for inputs
# (n_timeseries, el_to_predict) for labels

TS_LENGTH = 200
EL_TO_PREDICT = 50
y_train, lab_train = generate_dataset(y_train, TS_LENGTH, EL_TO_PREDICT)
y_test, lab_test = generate_dataset(y_test, TS_LENGTH, EL_TO_PREDICT)

type(y_train), y_train.shape, lab_train.shape, y_test.shape, lab_test.shape

(torch.Tensor,
 torch.Size([26, 200, 1]),
 torch.Size([26, 50]),
 torch.Size([16, 200, 1]),
 torch.Size([16, 50]))

Define training, evaluate and plotting functions

In [5]:
def train_model(model, criterion, optimizer, epochs, y_train, lab_train):
    for epoch in range(epochs):

        running_loss = 0.0

        for inputs, labels in zip(y_train, lab_train):
            # it would be better to use dataloader
            # we must unsqueeze dim 0 'cause model expect (batch, ts_length, n_feat)
            inputs = inputs.unsqueeze(0)
            labels = labels.unsqueeze(0)

            # zero the parameter gradients
            optimizer.zero_grad()
            
            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

        print(f'[{epoch + 1}] loss: {running_loss / y_train.shape[0]}')

In [6]:
def evaluate_model(model, y_all):

    model.eval()
    y_pred = []
    for inputs in y_all:
        # it would be better to use dataloader
        # we must unsqueeze dim 0 'cause model expect (batch, ts_length, n_feat)
        inputs = inputs.unsqueeze(0)    
        
        outputs = model(inputs.float())
        outputs = outputs.squeeze()   # no need for batch_size here
        y_pred.append(outputs.detach().numpy())

    y_pred = np.array(y_pred)
    y_pred = y_pred.reshape(-1)     # the model returns (batch, el_to_pred) and we have 50 el to predict

    return y_pred

In [7]:
def plot_results(t, y, y_pred, max_index_test, max_index_train, ts_length):
    #plot with plotly with a line where the training set ends
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=t[0:max_index_test], y=y, mode='lines', name='Actual'))
    fig.add_trace(go.Scatter(x=t[ts_length:max_index_test], y=y_pred, mode='lines', name='Predicted'))
    fig.add_trace(go.Scatter(x=[t[max_index_train], t[max_index_train]], y=[-20, 150], mode='lines', name='Training Set End'))
    fig.update_layout(title='Dampened Sinusoid', xaxis_title='Time (s)', yaxis_title='Amplitude')
    fig.show()

Try the model, with means first for various values of mode

We must create a wrapper class of DAIN to handle the tuple returned if return_means is True

In [8]:
MEAN_LR = 1e-06
STD_LR = 0.001
GATE_LR = 10

n_features = y_train.shape[-1]      # shape is (batch, ts_length, n_features)

# let's say we want two hidden layers
HIDDEN_DIM_1 = 100
HIDDEN_DIM_2 = 100     

output_dim = lab_train.shape[-1]    # shape is (batch, el_to_predict)

return_means = True

# we will simply flatten the time series and concatenate the mean vector
input_dim = y_train.shape[1] * n_features + n_features

In [9]:
class DAINWrapper(nn.Module):

    def __init__(self, n_features, mode, MEAN_LR, STD_LR, GATE_LR, return_means):
        super(DAINWrapper, self).__init__()

        self.dain = DAINLayer(n_features, mode, MEAN_LR, STD_LR, GATE_LR, return_means)

    def forward(self, x):
        x, mu = self.dain(x)
        # flatten the output to get (batch, ts_length)
        x = x.reshape((x.shape[0], -1))
        # concatenate with mu, rememer mu's shape is (batch, n_features)
        x = torch.cat((x, mu), dim=1)
        return x

MODE = None does not make sense

MODE = adaptive average

In [10]:
mode = 'adaptive_avg'

dain = DAINWrapper(n_features, mode, MEAN_LR, STD_LR, GATE_LR, return_means)

model = nn.Sequential(dain,
                      nn.Flatten(),
                      nn.Linear(input_dim, HIDDEN_DIM_1),
                      nn.ReLU(),
                      nn.Linear(HIDDEN_DIM_1, HIDDEN_DIM_2),
                      nn.ReLU(),
                      nn.Linear(HIDDEN_DIM_2, output_dim))
model

Sequential(
  (0): DAINWrapper(
    (dain): DAINLayer(
      (mean_layer): Linear(in_features=1, out_features=1, bias=False)
      (scaling_layer): Linear(in_features=1, out_features=1, bias=False)
      (gating_layer): Linear(in_features=1, out_features=1, bias=True)
    )
  )
  (1): Flatten(start_dim=1, end_dim=-1)
  (2): Linear(in_features=201, out_features=100, bias=True)
  (3): ReLU()
  (4): Linear(in_features=100, out_features=100, bias=True)
  (5): ReLU()
  (6): Linear(in_features=100, out_features=50, bias=True)
)

In [11]:
criterion = nn.MSELoss()
LR = 1e-4
optimizer = optim.Adam(model.parameters(), lr=LR)
EPOCHS = 1000

train_model(model, criterion, optimizer, EPOCHS, y_train, lab_train)

[1] loss: 2514.4158700796274
[2] loss: 2477.393498347356
[3] loss: 2428.0356022761416
[4] loss: 2349.9149874173677
[5] loss: 2224.752716064453
[6] loss: 2031.982668363131
[7] loss: 1758.0152752216045


[8] loss: 1415.0620005680964
[9] loss: 1056.158194908729
[10] loss: 756.355353135329
[11] loss: 548.3216940072866
[12] loss: 405.4958886366624
[13] loss: 298.9814378298246
[14] loss: 219.3319925895104
[15] loss: 161.2905605022724
[16] loss: 119.36644290043758
[17] loss: 89.66467065077562
[18] loss: 69.02766986993643
[19] loss: 53.99210166931152
[20] loss: 43.38541658108051
[21] loss: 35.48699989685645
[22] loss: 29.44214052420396
[23] loss: 24.736334305543167
[24] loss: 20.974705916184647
[25] loss: 17.9933068385491
[26] loss: 15.612552074285654
[27] loss: 13.729736016346859
[28] loss: 12.225011568803053
[29] loss: 11.018382925253649
[30] loss: 10.034436473479637
[31] loss: 9.221868955172026
[32] loss: 8.54979867201585
[33] loss: 7.997275948524475
[34] loss: 7.540443778038025
[35] loss: 7.160910294606135
[36] loss: 6.847146960405203
[37] loss: 6.5757177242865925
[38] loss: 6.331663681910588
[39] loss: 6.120530898754414
[40] loss: 5.934256627009465
[41] loss: 5.771698557413542
[42] loss

In [12]:
# evaluate the model on the entire dataset
y_all, lab_all = generate_dataset(y, TS_LENGTH, EL_TO_PREDICT)

y_pred = evaluate_model(model, y_all)
y_pred.shape

(2300,)

In [13]:
plot_results(t, y, y_pred, MAX_INDEX_TEST, MAX_INDEX_TRAIN, TS_LENGTH)

In [14]:
#Compute the total error on the test set
error = 0
for i in range(MAX_INDEX_TRAIN, MAX_INDEX_TEST):
    error = error + (y[i] - y_pred[i-TS_LENGTH])**2
error = error/(MAX_INDEX_TEST - MAX_INDEX_TRAIN)
print(error)

6464.368985552614


MODE = adaptive scale

In [15]:
mode = 'adaptive_scale'

dain = DAINWrapper(n_features, mode, MEAN_LR, STD_LR, GATE_LR, return_means)

model = nn.Sequential(dain,
                      nn.Flatten(),
                      nn.Linear(input_dim, HIDDEN_DIM_1),
                      nn.ReLU(),
                      nn.Linear(HIDDEN_DIM_1, HIDDEN_DIM_2),
                      nn.ReLU(),
                      nn.Linear(HIDDEN_DIM_2, output_dim))
model

Sequential(
  (0): DAINWrapper(
    (dain): DAINLayer(
      (mean_layer): Linear(in_features=1, out_features=1, bias=False)
      (scaling_layer): Linear(in_features=1, out_features=1, bias=False)
      (gating_layer): Linear(in_features=1, out_features=1, bias=True)
    )
  )
  (1): Flatten(start_dim=1, end_dim=-1)
  (2): Linear(in_features=201, out_features=100, bias=True)
  (3): ReLU()
  (4): Linear(in_features=100, out_features=100, bias=True)
  (5): ReLU()
  (6): Linear(in_features=100, out_features=50, bias=True)
)

In [16]:
criterion = nn.MSELoss()
LR = 1e-4
optimizer = optim.Adam(model.parameters(), lr=LR)
EPOCHS = 1000

train_model(model, criterion, optimizer, EPOCHS, y_train, lab_train)

[1] loss: 2510.8404587965747
[2] loss: 2489.431847205529
[3] loss: 2457.8409705528848
[4] loss: 2407.761545034555
[5] loss: 2328.992206280048
[6] loss: 2209.6020085261416
[7] loss: 2033.3149789663462
[8] loss: 1784.2975510817307
[9] loss: 1454.7078904371995
[10] loss: 1055.9742267315205
[11] loss: 640.6165067232572
[12] loss: 304.45174114520734
[13] loss: 131.8454359494723
[14] loss: 90.527635207543
[15] loss: 83.08202075958252
[16] loss: 75.5163627404433
[17] loss: 68.58693229235135
[18] loss: 63.05877214211684
[19] loss: 58.53240236869225
[20] loss: 54.69608029952416
[21] loss: 51.340536502691414
[22] loss: 48.336318694628204
[23] loss: 45.61179894667406
[24] loss: 43.10651219808138
[25] loss: 40.7791903935946
[26] loss: 38.60982940747188
[27] loss: 36.5933262018057
[28] loss: 34.71665580456074
[29] loss: 32.9936687029325
[30] loss: 31.416741847991943
[31] loss: 29.955119866591232
[32] loss: 28.608297311342678
[33] loss: 27.360846060972946
[34] loss: 26.193042094890888
[35] loss: 25.

In [17]:
# evaluate the model on the entire dataset
y_all, lab_all = generate_dataset(y, TS_LENGTH, EL_TO_PREDICT)

y_pred = evaluate_model(model, y_all)
y_pred.shape

(2300,)

In [18]:
plot_results(t, y, y_pred, MAX_INDEX_TEST, MAX_INDEX_TRAIN, TS_LENGTH)

In [19]:
#Compute the total error on the test set
error = 0
for i in range(MAX_INDEX_TRAIN, MAX_INDEX_TEST):
    error = error + (y[i] - y_pred[i-TS_LENGTH])**2
error = error/(MAX_INDEX_TEST - MAX_INDEX_TRAIN)
print(error)

1870.3542168491385


MODE = full

In [20]:
mode = 'full'

dain = DAINWrapper(n_features, mode, MEAN_LR, STD_LR, GATE_LR, return_means)

model = nn.Sequential(dain,
                      nn.Flatten(),
                      nn.Linear(input_dim, HIDDEN_DIM_1),
                      nn.ReLU(),
                      nn.Linear(HIDDEN_DIM_1, HIDDEN_DIM_2),
                      nn.ReLU(),
                      nn.Linear(HIDDEN_DIM_2, output_dim))
model

Sequential(
  (0): DAINWrapper(
    (dain): DAINLayer(
      (mean_layer): Linear(in_features=1, out_features=1, bias=False)
      (scaling_layer): Linear(in_features=1, out_features=1, bias=False)
      (gating_layer): Linear(in_features=1, out_features=1, bias=True)
    )
  )
  (1): Flatten(start_dim=1, end_dim=-1)
  (2): Linear(in_features=201, out_features=100, bias=True)
  (3): ReLU()
  (4): Linear(in_features=100, out_features=100, bias=True)
  (5): ReLU()
  (6): Linear(in_features=100, out_features=50, bias=True)
)

In [21]:
criterion = nn.MSELoss()
LR = 1e-4
optimizer = optim.Adam(model.parameters(), lr=LR)
EPOCHS = 1000

train_model(model, criterion, optimizer, EPOCHS, y_train, lab_train)

[1] loss: 2511.745314378005
[2] loss: 2497.414320725661


[3] loss: 2474.889394906851
[4] loss: 2437.663841834435
[5] loss: 2379.050213153546
[6] loss: 2290.87839449369
[7] loss: 2162.3478017953726
[8] loss: 1981.6420100285457
[9] loss: 1738.7740666316106
[10] loss: 1429.2223275991587
[11] loss: 1066.891585129958
[12] loss: 692.9271193284255
[13] loss: 379.3525202824519
[14] loss: 198.94570438678448
[15] loss: 144.43735738900992
[16] loss: 133.60459074607263
[17] loss: 124.01038896120511
[18] loss: 114.09548550385695
[19] loss: 105.44138976243826
[20] loss: 97.98644012671251
[21] loss: 91.55896560962384
[22] loss: 85.82712255991422
[23] loss: 80.68342918616075
[24] loss: 76.01972814706656
[25] loss: 71.73506140708923
[26] loss: 67.80031338104835
[27] loss: 64.16455327547513
[28] loss: 60.80682815038241
[29] loss: 57.67905134421129
[30] loss: 54.78136963110704
[31] loss: 52.09641517125643
[32] loss: 49.58976037685688
[33] loss: 47.25898986596327
[34] loss: 45.098325564311104
[35] loss: 43.0836789057805
[36] loss: 41.221611756544846
[37] loss: 

In [22]:
# evaluate the model on the entire dataset
y_all, lab_all = generate_dataset(y, TS_LENGTH, EL_TO_PREDICT)

y_pred = evaluate_model(model, y_all)
y_pred.shape

(2300,)

In [23]:
plot_results(t, y, y_pred, MAX_INDEX_TEST, MAX_INDEX_TRAIN, TS_LENGTH)

In [24]:
#Compute the total error on the test set
error = 0
for i in range(MAX_INDEX_TRAIN, MAX_INDEX_TEST):
    error = error + (y[i] - y_pred[i-TS_LENGTH])**2
error = error/(MAX_INDEX_TEST - MAX_INDEX_TRAIN)
print(error)

2000.2611473212548
