# Import

In [1]:
!pip install torchsummaryX



In [2]:
import torch
import numpy as np
import torch.nn as nn
from torchsummaryX import summary
from tqdm import tqdm

from sklearn.model_selection import train_test_split


# Constants

In [3]:
num_quantiles = 11 # H_in
look_back_window = 5 # L
hidden_size = 256
n = 50

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Load Data

In [4]:
data = np.load('./noisepage-forecast/param_training_data.npz', allow_pickle=True)
X = data["X"]
Y = data["Y"]
# Split into train test set
X_train, X_test, y_train, y_test = train_test_split(X, Y, shuffle=False, test_size=0.1)
print(X.dtype, X_train.dtype)

# X: (N, L, H_in) to (L, N, H_in);
X_train, X_test = np.transpose(X_train, (1, 0, 2)), np.transpose(X_test, (1, 0, 2))
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

float64 float64
(5, 2997, 11) (5, 333, 11) (2997, 11) (333, 11)


# Model

In [32]:
from torch.nn.modules import dropout
class Network(nn.Module):

    def __init__(self, input_size, output_size, hidden_size, num_layers=4): # You can add any extra arguments as you wish

        super(Network, self).__init__()
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, bidirectional=False, dropout=0.1)
        self.classification = nn.Sequential(
            nn.Linear(in_features=hidden_size, out_features=64),
            nn.LeakyReLU(),
            nn.Dropout(0.1),
            nn.Linear(in_features=64, out_features=output_size)
        )

    def forward(self, x):
        output, (h_n, c_n) = self.lstm(x)
        # output: (L, 1 * H_out)

        out = self.classification(output)
        return out

model = Network(num_quantiles, num_quantiles, hidden_size).to(device)
summary(model, torch.tensor(X_train[:, :1, :]).to(device).float())

                             Kernel Shape Output Shape   Params Mult-Adds
Layer                                                                    
0_lstm                                  -  [5, 1, 128]  468.48k  464.384k
1_classification.Linear_0       [128, 64]   [5, 1, 64]   8.256k    8.192k
2_classification.LeakyReLU_1            -   [5, 1, 64]        -         -
3_classification.Dropout_2              -   [5, 1, 64]        -         -
4_classification.Linear_3        [64, 11]   [5, 1, 11]    715.0     704.0
---------------------------------------------------------------------------
                        Totals
Total params          477.451k
Trainable params      477.451k
Non-trainable params       0.0
Mult-Adds              473.28k


  df_sum = df.sum()


Unnamed: 0_level_0,Kernel Shape,Output Shape,Params,Mult-Adds
Layer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0_lstm,-,"[5, 1, 128]",468480.0,464384.0
1_classification.Linear_0,"[128, 64]","[5, 1, 64]",8256.0,8192.0
2_classification.LeakyReLU_1,-,"[5, 1, 64]",,
3_classification.Dropout_2,-,"[5, 1, 64]",,
4_classification.Linear_3,"[64, 11]","[5, 1, 11]",715.0,704.0


# Model Parameters

In [33]:
epochs = 50
lr = 0.0001
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=(X_train.shape[1] * epochs))

# Model Training

In [None]:
from torch.nn.modules import dropout
class Network(nn.Module):

    def __init__(self, input_size, output_size, hidden_size, num_layers=4): # You can add any extra arguments as you wish

        super(Network, self).__init__()
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, bidirectional=False, dropout=0.1)
        self.classification = nn.Sequential(
            nn.Linear(in_features=hidden_size, out_features=64),
            nn.LeakyReLU(),
            nn.Dropout(0.1),
            nn.Linear(in_features=64, out_features=output_size)
        )

    def forward(self, x):
        output, (h_n, c_n) = self.lstm(x)
        # output: (L, 1 * H_out)

        out = self.classification(output)
        return out

model = Network(num_quantiles, num_quantiles, hidden_size).to(device)
summary(model, torch.tensor(X_train[:, :1, :]).to(device).float())

In [34]:
for epoch in range(epochs):
    model.train()
    arr = np.arange(X_train.shape[1])
    np.random.shuffle(arr)
    train_loss = 0
    batch_bar = tqdm(total=X_train.shape[1], dynamic_ncols=True, leave=False, position=0, desc='Train') 
    for ind in arr:
        seq = torch.tensor(X_train[:, ind:ind+1, :]).to(device).float()
        labels = torch.tensor(y_train[ind]).to(device).float()
        optimizer.zero_grad()
        y_pred = model(seq)
        single_loss = loss_function(y_pred[-1, -1, :], labels)
        single_loss.backward()
        optimizer.step()
        scheduler.step()
        train_loss += float(single_loss)

        batch_bar.set_postfix(
            loss="{:.04f}".format(float(train_loss / (ind + 1))),
            lr="{:.04f}".format(float(optimizer.param_groups[0]['lr'])))
        batch_bar.update()

    train_loss /= X_train.shape[1]
    batch_bar.close()
    
    # Validation loss
    model.eval()
    val_loss = 0
    batch_bar = tqdm(total=X_train.shape[1], dynamic_ncols=True, leave=False, position=0, desc='Train') 
    for ind in range(X_test.shape[1]):
        seq = torch.tensor(X_test[:, ind:ind+1, :]).to(device).float()
        labels = torch.tensor(y_test[ind]).to(device).float()

        with torch.no_grad():
            y_pred = model(seq)

        single_loss = loss_function(y_pred[-1, -1, :], labels)
        val_loss += float(single_loss)
        batch_bar.update()
    val_loss /= X_test.shape[1]
    batch_bar.close()

    print(f"[LSTM FIT]epoch: {epoch + 1:3}, train_loss: {train_loss:10.8f}, val_loss: {val_loss:10.8f}")



[LSTM FIT]epoch:   1, train_loss: 0.21282222, val_loss: 0.06435927




[LSTM FIT]epoch:   2, train_loss: 0.08292175, val_loss: 0.06323655




[LSTM FIT]epoch:   3, train_loss: 0.05967464, val_loss: 0.05333335




[LSTM FIT]epoch:   4, train_loss: 0.05137001, val_loss: 0.05697150




[LSTM FIT]epoch:   5, train_loss: 0.04723982, val_loss: 0.05132094




[LSTM FIT]epoch:   6, train_loss: 0.04646061, val_loss: 0.03813059




[LSTM FIT]epoch:   7, train_loss: 0.04246991, val_loss: 0.03704227




[LSTM FIT]epoch:   8, train_loss: 0.04032379, val_loss: 0.03227301




[LSTM FIT]epoch:   9, train_loss: 0.03644551, val_loss: 0.03087842




[LSTM FIT]epoch:  10, train_loss: 0.03364679, val_loss: 0.03125820




[LSTM FIT]epoch:  11, train_loss: 0.03156274, val_loss: 0.03042330




[LSTM FIT]epoch:  12, train_loss: 0.02908652, val_loss: 0.02615374




[LSTM FIT]epoch:  13, train_loss: 0.02625521, val_loss: 0.01838885




[LSTM FIT]epoch:  14, train_loss: 0.02549731, val_loss: 0.01821230




[LSTM FIT]epoch:  15, train_loss: 0.02374547, val_loss: 0.01888895




[LSTM FIT]epoch:  16, train_loss: 0.02300062, val_loss: 0.01559864




[LSTM FIT]epoch:  17, train_loss: 0.02202054, val_loss: 0.01548629




[LSTM FIT]epoch:  18, train_loss: 0.02148325, val_loss: 0.01583378




[LSTM FIT]epoch:  19, train_loss: 0.02059241, val_loss: 0.01596718




[LSTM FIT]epoch:  20, train_loss: 0.01907448, val_loss: 0.01566952




[LSTM FIT]epoch:  21, train_loss: 0.01828477, val_loss: 0.01467914




[LSTM FIT]epoch:  22, train_loss: 0.01774809, val_loss: 0.01439236




[LSTM FIT]epoch:  23, train_loss: 0.01718320, val_loss: 0.01779398




[LSTM FIT]epoch:  24, train_loss: 0.01658799, val_loss: 0.01519581




[LSTM FIT]epoch:  25, train_loss: 0.01616242, val_loss: 0.01392716




[LSTM FIT]epoch:  26, train_loss: 0.01610340, val_loss: 0.01665313




[LSTM FIT]epoch:  27, train_loss: 0.01590707, val_loss: 0.01627752




[LSTM FIT]epoch:  28, train_loss: 0.01586049, val_loss: 0.02019108




[LSTM FIT]epoch:  29, train_loss: 0.01522773, val_loss: 0.01568173




[LSTM FIT]epoch:  30, train_loss: 0.01549775, val_loss: 0.01805028




[LSTM FIT]epoch:  31, train_loss: 0.01533757, val_loss: 0.01396925




[LSTM FIT]epoch:  32, train_loss: 0.01534537, val_loss: 0.01571737




[LSTM FIT]epoch:  33, train_loss: 0.01478973, val_loss: 0.01484349




[LSTM FIT]epoch:  34, train_loss: 0.01477768, val_loss: 0.01478660




[LSTM FIT]epoch:  35, train_loss: 0.01456593, val_loss: 0.01611826




[LSTM FIT]epoch:  36, train_loss: 0.01423801, val_loss: 0.01788942




[LSTM FIT]epoch:  37, train_loss: 0.01356895, val_loss: 0.01800509




[LSTM FIT]epoch:  38, train_loss: 0.01430407, val_loss: 0.01609210




[LSTM FIT]epoch:  39, train_loss: 0.01416841, val_loss: 0.01660375




[LSTM FIT]epoch:  40, train_loss: 0.01428562, val_loss: 0.01749764




[LSTM FIT]epoch:  41, train_loss: 0.01366815, val_loss: 0.01642896




[LSTM FIT]epoch:  42, train_loss: 0.01314628, val_loss: 0.01645116




[LSTM FIT]epoch:  43, train_loss: 0.01385140, val_loss: 0.01578493




[LSTM FIT]epoch:  44, train_loss: 0.01378404, val_loss: 0.01590863




[LSTM FIT]epoch:  45, train_loss: 0.01357665, val_loss: 0.01562941


Train:  13%|█▎        | 403/2997 [00:06<00:42, 60.63it/s, loss=0.0030, lr=0.0000]

KeyboardInterrupt: ignored

In [38]:
import os
def save_checkpoint(ckpt_path, id, model, epoch, optimizer, scheduler):
    path = os.path.join(ckpt_path, f"{id}_{epoch}")

    if not os.path.exists(ckpt_path):
        os.makedirs(ckpt_path)

    save_dict = {'model_state': model.state_dict(),
                'epoch': epoch,
                 'id': id,
                 }

    if optimizer != None:
      save_dict['optimizer_state'] = optimizer.state_dict()
    if scheduler != None:
      save_dict['scheduler_state'] = scheduler.state_dict()

    torch.save(save_dict, path)
    print(f"=> saved the model {id} to {path}")

In [36]:
save_checkpoint("./", "model", model, epochs, optimizer, scheduler)

=> saved the model model to ./noisepage_forecast/model_50


In [37]:
from google.colab import files
files.download('./model') 

FileNotFoundError: ignored