In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

device = 'cuda' if torch.cuda.is_available() else 'cpu'


import numpy as np
import plotly.graph_objects as go


from dain import DAINLayer
from utils import generate_timeseries, generate_dataset

Try the dataset with just one feature

In [2]:
GROWING = True
t, y = generate_timeseries(GROWING)

type(t), type(y), t.shape, y.shape

(numpy.ndarray, numpy.ndarray, (2500,), (2500,))

In [3]:
# split the timeseries
MAX_INDEX_TRAIN = 1500
MAX_INDEX_TEST = 2500

y_train = y[:MAX_INDEX_TRAIN]
y_test = y[MAX_INDEX_TRAIN: MAX_INDEX_TEST]

In [4]:
# generate the dataset as tensor of shape 
# (n_timeseries, ts_length, n_features) for inputs
# (n_timeseries, el_to_predict) for labels

TS_LENGTH = 200
EL_TO_PREDICT = 50
y_train, lab_train = generate_dataset(y_train, TS_LENGTH, EL_TO_PREDICT)
y_test, lab_test = generate_dataset(y_test, TS_LENGTH, EL_TO_PREDICT)

type(y_train), y_train.shape, lab_train.shape, y_test.shape, lab_test.shape

(torch.Tensor,
 torch.Size([26, 200, 1]),
 torch.Size([26, 50]),
 torch.Size([16, 200, 1]),
 torch.Size([16, 50]))

Define training, evaluate and plotting functions

In [5]:
def train_model(model, criterion, optimizer, epochs, y_train, lab_train):
    for epoch in range(epochs):

        running_loss = 0.0

        for inputs, labels in zip(y_train, lab_train):
            # it would be better to use dataloader
            # we must unsqueeze dim 0 'cause model expect (batch, ts_length, n_feat)
            inputs = inputs.unsqueeze(0)
            labels = labels.unsqueeze(0)

            # zero the parameter gradients
            optimizer.zero_grad()
            
            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

        print(f'[{epoch + 1}] loss: {running_loss / y_train.shape[0]}')

In [6]:
def evaluate_model(model, y_all):

    model.eval()
    y_pred = []
    for inputs in y_all:
        # it would be better to use dataloader
        # we must unsqueeze dim 0 'cause model expect (batch, ts_length, n_feat)
        inputs = inputs.unsqueeze(0)    
        
        outputs = model(inputs.float())
        outputs = outputs.squeeze()   # no need for batch_size here
        y_pred.append(outputs.detach().numpy())

    y_pred = np.array(y_pred)
    y_pred = y_pred.reshape(-1)     # the model returns (batch, el_to_pred) and we have 50 el to predict

    return y_pred

In [7]:
def plot_results(t, y, y_pred, max_index_test, max_index_train, ts_length):
    #plot with plotly with a line where the training set ends
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=t[0:max_index_test], y=y, mode='lines', name='Actual'))
    fig.add_trace(go.Scatter(x=t[ts_length:max_index_test], y=y_pred, mode='lines', name='Predicted'))
    fig.add_trace(go.Scatter(x=[t[max_index_train], t[max_index_train]], y=[-20, 150], mode='lines', name='Training Set End'))
    fig.update_layout(title='Dampened Sinusoid', xaxis_title='Time (s)', yaxis_title='Amplitude')
    fig.show()

Try the model, without means first for various values of mode

In [8]:
MEAN_LR = 1e-06
STD_LR = 0.001
GATE_LR = 10

n_features = y_train.shape[-1]      # shape is (batch, ts_length, n_features)

# let's say we want two hidden layers
HIDDEN_DIM_1 = 100
HIDDEN_DIM_2 = 100     

output_dim = lab_train.shape[-1]    # shape is (batch, el_to_predict)

return_means = False

# we will simply flatten the time series and won't add means
input_dim = y_train.shape[1] * n_features

MODE = None

In [9]:
mode = None

dain = DAINLayer(n_features, mode, MEAN_LR, STD_LR, GATE_LR, return_means)

model = nn.Sequential(dain,
                      nn.Flatten(),
                      nn.Linear(input_dim, HIDDEN_DIM_1),
                      nn.ReLU(),
                      nn.Linear(HIDDEN_DIM_1, HIDDEN_DIM_2),
                      nn.ReLU(),
                      nn.Linear(HIDDEN_DIM_2, output_dim))
model

Sequential(
  (0): DAINLayer(
    (mean_layer): Linear(in_features=1, out_features=1, bias=False)
    (scaling_layer): Linear(in_features=1, out_features=1, bias=False)
    (gating_layer): Linear(in_features=1, out_features=1, bias=True)
  )
  (1): Flatten(start_dim=1, end_dim=-1)
  (2): Linear(in_features=200, out_features=100, bias=True)
  (3): ReLU()
  (4): Linear(in_features=100, out_features=100, bias=True)
  (5): ReLU()
  (6): Linear(in_features=100, out_features=50, bias=True)
)

In [10]:
criterion = nn.MSELoss()
LR = 1e-4
optimizer = optim.Adam(model.parameters(), lr=LR)
EPOCHS = 1000

train_model(model, criterion, optimizer, EPOCHS, y_train, lab_train)

[1] loss: 2305.436265211839


[2] loss: 1779.9872553898738
[3] loss: 1038.7980816180889
[4] loss: 408.3103719858023
[5] loss: 127.88299091045673
[6] loss: 55.24046912560096
[7] loss: 42.576695919036865
[8] loss: 40.344540623518135
[9] loss: 39.36518176702353
[10] loss: 38.47439670562744
[11] loss: 37.571755033272964
[12] loss: 36.667309220020584
[13] loss: 35.779606342315674
[14] loss: 34.90853172999162
[15] loss: 34.04369464287391
[16] loss: 33.18873114769276
[17] loss: 32.337066586201004
[18] loss: 31.496202597251305
[19] loss: 30.655849502636837
[20] loss: 29.826070721332844
[21] loss: 29.000982761383057
[22] loss: 28.185288365070637
[23] loss: 27.37884533405304
[24] loss: 26.57722700559176
[25] loss: 25.793034966175373
[26] loss: 25.01287317276001
[27] loss: 24.243531924027664
[28] loss: 23.487140618837795
[29] loss: 22.74520143178793
[30] loss: 21.999706552578854
[31] loss: 21.276777029037476
[32] loss: 20.555517059106094
[33] loss: 19.853020282892082
[34] loss: 19.159241089454063
[35] loss: 18.490846863159767

In [11]:
# evaluate the model on the entire dataset
y_all, lab_all = generate_dataset(y, TS_LENGTH, EL_TO_PREDICT)

y_pred = evaluate_model(model, y_all)
y_pred.shape

(2300,)

In [12]:
plot_results(t, y, y_pred, MAX_INDEX_TEST, MAX_INDEX_TRAIN, TS_LENGTH)

In [13]:
#Compute the total error on the test set
error = 0
for i in range(MAX_INDEX_TRAIN, MAX_INDEX_TEST):
    error = error + (y[i] - y_pred[i-TS_LENGTH])**2
error = error/(MAX_INDEX_TEST - MAX_INDEX_TRAIN)
print(error)

526.4795907032799


MODE = simple average

In [14]:
mode = 'avg'

dain = DAINLayer(n_features, mode, MEAN_LR, STD_LR, GATE_LR, return_means)

model = nn.Sequential(dain,
                      nn.Flatten(),
                      nn.Linear(input_dim, HIDDEN_DIM_1),
                      nn.ReLU(),
                      nn.Linear(HIDDEN_DIM_1, HIDDEN_DIM_2),
                      nn.ReLU(),
                      nn.Linear(HIDDEN_DIM_2, output_dim))
model

Sequential(
  (0): DAINLayer(
    (mean_layer): Linear(in_features=1, out_features=1, bias=False)
    (scaling_layer): Linear(in_features=1, out_features=1, bias=False)
    (gating_layer): Linear(in_features=1, out_features=1, bias=True)
  )
  (1): Flatten(start_dim=1, end_dim=-1)
  (2): Linear(in_features=200, out_features=100, bias=True)
  (3): ReLU()
  (4): Linear(in_features=100, out_features=100, bias=True)
  (5): ReLU()
  (6): Linear(in_features=100, out_features=50, bias=True)
)

In [15]:
criterion = nn.MSELoss()
LR = 1e-4
optimizer = optim.Adam(model.parameters(), lr=LR)
EPOCHS = 1000

train_model(model, criterion, optimizer, EPOCHS, y_train, lab_train)

[1] loss: 2499.831251878005


[2] loss: 2470.8898503230166
[3] loss: 2425.345458984375
[4] loss: 2348.427466759315
[5] loss: 2222.417217548077
[6] loss: 2031.4173607459436
[7] loss: 1776.8521206195537
[8] loss: 1497.3304155789888
[9] loss: 1254.7280200077937
[10] loss: 1082.281593322754
[11] loss: 980.0578651428223
[12] loss: 925.9709912813627
[13] loss: 896.2443860860972
[14] loss: 877.596793101384
[15] loss: 864.6627316108116
[16] loss: 854.6377721933218
[17] loss: 845.8047414926382
[18] loss: 838.4326698596661
[19] loss: 831.3622527489296
[20] loss: 824.9850494678204
[21] loss: 818.7488093742958
[22] loss: 812.7819984142596
[23] loss: 807.0349022791936
[24] loss: 801.1683037831233
[25] loss: 795.5088504644541
[26] loss: 790.0356490795428
[27] loss: 784.1100818927472
[28] loss: 778.5563489473783
[29] loss: 772.8689619944646
[30] loss: 767.3040955616877
[31] loss: 761.2777971120981
[32] loss: 755.5168682245107
[33] loss: 749.8612062747662
[34] loss: 743.5891529229971
[35] loss: 737.9175422375018
[36] loss: 731.315

In [16]:
# evaluate the model on the entire dataset
y_all, lab_all = generate_dataset(y, TS_LENGTH, EL_TO_PREDICT)

y_pred = evaluate_model(model, y_all)
y_pred.shape

(2300,)

In [17]:
plot_results(t, y, y_pred, MAX_INDEX_TEST, MAX_INDEX_TRAIN, TS_LENGTH)

In [18]:
#Compute the total error on the test set
error = 0
for i in range(MAX_INDEX_TRAIN, MAX_INDEX_TEST):
    error = error + (y[i] - y_pred[i-TS_LENGTH])**2
error = error/(MAX_INDEX_TEST - MAX_INDEX_TRAIN)
print(error)

51179.68610832898


MODE = adaptive average

In [19]:
mode = 'adaptive_avg'

dain = DAINLayer(n_features, mode, MEAN_LR, STD_LR, GATE_LR, return_means)

model = nn.Sequential(dain,
                      nn.Flatten(),
                      nn.Linear(input_dim, HIDDEN_DIM_1),
                      nn.ReLU(),
                      nn.Linear(HIDDEN_DIM_1, HIDDEN_DIM_2),
                      nn.ReLU(),
                      nn.Linear(HIDDEN_DIM_2, output_dim))
model

Sequential(
  (0): DAINLayer(
    (mean_layer): Linear(in_features=1, out_features=1, bias=False)
    (scaling_layer): Linear(in_features=1, out_features=1, bias=False)
    (gating_layer): Linear(in_features=1, out_features=1, bias=True)
  )
  (1): Flatten(start_dim=1, end_dim=-1)
  (2): Linear(in_features=200, out_features=100, bias=True)
  (3): ReLU()
  (4): Linear(in_features=100, out_features=100, bias=True)
  (5): ReLU()
  (6): Linear(in_features=100, out_features=50, bias=True)
)

In [20]:
criterion = nn.MSELoss()
LR = 1e-4
optimizer = optim.Adam(model.parameters(), lr=LR)
EPOCHS = 1000

train_model(model, criterion, optimizer, EPOCHS, y_train, lab_train)

[1] loss: 2509.7760103665864
[2] loss: 2483.1566725510816
[3] loss: 2443.5657395582934
[4] loss: 2375.4827082707334


[5] loss: 2258.941901573768
[6] loss: 2072.994891826923
[7] loss: 1806.9816589355469
[8] loss: 1480.3890756460337
[9] loss: 1162.9560596759502
[10] loss: 918.100276360145
[11] loss: 744.8025558178241
[12] loss: 610.7710822178767
[13] loss: 494.35318103203406
[14] loss: 385.1007717572726
[15] loss: 289.09316356365497
[16] loss: 211.7880234351525
[17] loss: 155.5794346882747
[18] loss: 116.37871899971596
[19] loss: 88.53882404474112
[20] loss: 69.15512558130118
[21] loss: 55.040607965909516
[22] loss: 44.44429219686068
[23] loss: 36.5298165174631
[24] loss: 30.45710308735187
[25] loss: 25.76684232858511
[26] loss: 22.133846521377563
[27] loss: 19.24957638520461
[28] loss: 17.015359117434574
[29] loss: 15.208068627577562
[30] loss: 13.793918169461763
[31] loss: 12.631192491604732
[32] loss: 11.657085482890789
[33] loss: 10.842849227098318
[34] loss: 10.162185403016897
[35] loss: 9.56334871512193
[36] loss: 9.053055047988892
[37] loss: 8.612448206314674
[38] loss: 8.20969843864441
[39] los

In [21]:
# evaluate the model on the entire dataset
y_all, lab_all = generate_dataset(y, TS_LENGTH, EL_TO_PREDICT)

y_pred = evaluate_model(model, y_all)
y_pred.shape

(2300,)

In [22]:
plot_results(t, y, y_pred, MAX_INDEX_TEST, MAX_INDEX_TRAIN, TS_LENGTH)

In [23]:
#Compute the total error on the test set
error = 0
for i in range(MAX_INDEX_TRAIN, MAX_INDEX_TEST):
    error = error + (y[i] - y_pred[i-TS_LENGTH])**2
error = error/(MAX_INDEX_TEST - MAX_INDEX_TRAIN)
print(error)

4819.6656674581245


MODE = adaptive scale

In [24]:
mode = 'adaptive_scale'

dain = DAINLayer(n_features, mode, MEAN_LR, STD_LR, GATE_LR, return_means)

model = nn.Sequential(dain,
                      nn.Flatten(),
                      nn.Linear(input_dim, HIDDEN_DIM_1),
                      nn.ReLU(),
                      nn.Linear(HIDDEN_DIM_1, HIDDEN_DIM_2),
                      nn.ReLU(),
                      nn.Linear(HIDDEN_DIM_2, output_dim))
model

Sequential(
  (0): DAINLayer(
    (mean_layer): Linear(in_features=1, out_features=1, bias=False)
    (scaling_layer): Linear(in_features=1, out_features=1, bias=False)
    (gating_layer): Linear(in_features=1, out_features=1, bias=True)
  )
  (1): Flatten(start_dim=1, end_dim=-1)
  (2): Linear(in_features=200, out_features=100, bias=True)
  (3): ReLU()
  (4): Linear(in_features=100, out_features=100, bias=True)
  (5): ReLU()
  (6): Linear(in_features=100, out_features=50, bias=True)
)

In [25]:
criterion = nn.MSELoss()
LR = 1e-4
optimizer = optim.Adam(model.parameters(), lr=LR)
EPOCHS = 1000

train_model(model, criterion, optimizer, EPOCHS, y_train, lab_train)

[1] loss: 2517.662823016827
[2] loss: 2513.5614717923677
[3] loss: 2507.559833233173
[4] loss: 2496.68889441857


[5] loss: 2475.8428955078125
[6] loss: 2435.9910982572114
[7] loss: 2362.890399639423
[8] loss: 2237.2878840519834
[9] loss: 2035.4380821814905
[10] loss: 1734.5565842848557
[11] loss: 1323.895743736854
[12] loss: 836.8056787344126
[13] loss: 396.132688669058
[14] loss: 185.7771827991192
[15] loss: 147.24766635894775
[16] loss: 115.57575621971718
[17] loss: 90.16611422025241
[18] loss: 72.28797156994159
[19] loss: 59.0515797321613
[20] loss: 48.92038978063143
[21] loss: 41.2294874558082
[22] loss: 35.30796746107248
[23] loss: 30.745917815428513
[24] loss: 27.238875517478355
[25] loss: 24.488116777860203
[26] loss: 22.299142379027145
[27] loss: 20.529751722629253
[28] loss: 19.07384885274447
[29] loss: 17.848709693321815
[30] loss: 16.799082957781277
[31] loss: 15.882329885776226
[32] loss: 15.072756547194262
[33] loss: 14.350243696799645
[34] loss: 13.696934920090895
[35] loss: 13.10123740709745
[36] loss: 12.555628694020784
[37] loss: 12.057397851577171
[38] loss: 11.604274034500122
[

In [26]:
# evaluate the model on the entire dataset
y_all, lab_all = generate_dataset(y, TS_LENGTH, EL_TO_PREDICT)

y_pred = evaluate_model(model, y_all)
y_pred.shape

(2300,)

In [27]:
plot_results(t, y, y_pred, MAX_INDEX_TEST, MAX_INDEX_TRAIN, TS_LENGTH)

In [28]:
#Compute the total error on the test set
error = 0
for i in range(MAX_INDEX_TRAIN, MAX_INDEX_TEST):
    error = error + (y[i] - y_pred[i-TS_LENGTH])**2
error = error/(MAX_INDEX_TEST - MAX_INDEX_TRAIN)
print(error)

1668.8483362026645


MODE = full

In [29]:
mode = 'full'

dain = DAINLayer(n_features, mode, MEAN_LR, STD_LR, GATE_LR, return_means)

model = nn.Sequential(dain,
                      nn.Flatten(),
                      nn.Linear(input_dim, HIDDEN_DIM_1),
                      nn.ReLU(),
                      nn.Linear(HIDDEN_DIM_1, HIDDEN_DIM_2),
                      nn.ReLU(),
                      nn.Linear(HIDDEN_DIM_2, output_dim))
model

Sequential(
  (0): DAINLayer(
    (mean_layer): Linear(in_features=1, out_features=1, bias=False)
    (scaling_layer): Linear(in_features=1, out_features=1, bias=False)
    (gating_layer): Linear(in_features=1, out_features=1, bias=True)
  )
  (1): Flatten(start_dim=1, end_dim=-1)
  (2): Linear(in_features=200, out_features=100, bias=True)
  (3): ReLU()
  (4): Linear(in_features=100, out_features=100, bias=True)
  (5): ReLU()
  (6): Linear(in_features=100, out_features=50, bias=True)
)

In [30]:
criterion = nn.MSELoss()
LR = 1e-4
optimizer = optim.Adam(model.parameters(), lr=LR)
EPOCHS = 1000

train_model(model, criterion, optimizer, EPOCHS, y_train, lab_train)

[1] loss: 2517.4145226111777
[2] loss: 2513.697256234976
[3] loss: 2507.4022357647236


[4] loss: 2495.144038273738
[5] loss: 2470.725642277644
[6] loss: 2423.9437068058896
[7] loss: 2340.28752723107
[8] loss: 2199.754079965445
[9] loss: 1977.304908165565
[10] loss: 1650.6815162071814
[11] loss: 1217.5177518404448
[12] loss: 725.6931633582482
[13] loss: 323.12787158672626
[14] loss: 179.6110660112821
[15] loss: 160.7986944638766
[16] loss: 130.8442313487713
[17] loss: 106.12292931630061
[18] loss: 87.80906658906203
[19] loss: 73.64051936222957
[20] loss: 62.341902622809776
[21] loss: 53.38298968168405
[22] loss: 46.3038115134606
[23] loss: 40.654663434395424
[24] loss: 36.155359946764435
[25] loss: 32.51975793104906
[26] loss: 29.563959433482243
[27] loss: 27.134984529935398
[28] loss: 25.126959938269394
[29] loss: 23.447063455214867
[30] loss: 22.03319752216339
[31] loss: 20.83375817995805
[32] loss: 19.799388005183292
[33] loss: 18.89326190031492
[34] loss: 18.0926633981558
[35] loss: 17.374135164114143
[36] loss: 16.723285913467407
[37] loss: 16.12494705273555
[38] los

In [31]:
# evaluate the model on the entire dataset
y_all, lab_all = generate_dataset(y, TS_LENGTH, EL_TO_PREDICT)

y_pred = evaluate_model(model, y_all)
y_pred.shape

(2300,)

In [32]:
plot_results(t, y, y_pred, MAX_INDEX_TEST, MAX_INDEX_TRAIN, TS_LENGTH)

In [33]:
#Compute the total error on the test set
error = 0
for i in range(MAX_INDEX_TRAIN, MAX_INDEX_TEST):
    error = error + (y[i] - y_pred[i-TS_LENGTH])**2
error = error/(MAX_INDEX_TEST - MAX_INDEX_TRAIN)
print(error)

1700.8796259040369
