# Import

In [1]:
!pip install torchsummaryX



In [2]:
import torch
import numpy as np
import torch.nn as nn
from torchsummaryX import summary
from tqdm import tqdm

from sklearn.model_selection import train_test_split


# Constants

In [3]:
num_quantiles = 11 # H_in
look_back_window = 5 # L
hidden_size = 128
n = 50

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Load Data

In [4]:
data = np.load('./noisepage-forecast/param_training_data.npz', allow_pickle=True)
X = data["X"]
Y = data["Y"]
# Split into train test set
X_train, X_test, y_train, y_test = train_test_split(X, Y, shuffle=False, test_size=0.1)
print(X.dtype, X_train.dtype)

# X: (N, L, H_in) to (L, N, H_in);
X_train, X_test = np.transpose(X_train, (1, 0, 2)), np.transpose(X_test, (1, 0, 2))
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

float64 float64
(5, 2997, 11) (5, 333, 11) (2997, 11) (333, 11)


# Model

In [24]:
class Network(nn.Module):

    def __init__(self, input_size, output_size, hidden_size, num_layers=4): # You can add any extra arguments as you wish

        super(Network, self).__init__()
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, bidirectional=False)
        self.classification = nn.Sequential(
                                             nn.Linear(in_features=hidden_size, out_features=64),
                                             nn.LeakyReLU(),
                                             nn.Linear(in_features=64, out_features=output_size)
        )

    def forward(self, x):
        output, (h_n, c_n) = self.lstm(x)
        # output: (L, 1 * H_out)

        out = self.classification(output)
        return out

model = Network(num_quantiles, num_quantiles, hidden_size).to(device)
summary(model, torch.tensor(X_train[:, :1, :]).to(device).float())

                             Kernel Shape Output Shape   Params Mult-Adds
Layer                                                                    
0_lstm                                  -  [5, 1, 128]  468.48k  464.384k
1_classification.Linear_0       [128, 64]   [5, 1, 64]   8.256k    8.192k
2_classification.LeakyReLU_1            -   [5, 1, 64]        -         -
3_classification.Linear_2        [64, 11]   [5, 1, 11]    715.0     704.0
---------------------------------------------------------------------------
                        Totals
Total params          477.451k
Trainable params      477.451k
Non-trainable params       0.0
Mult-Adds              473.28k


  df_sum = df.sum()


Unnamed: 0_level_0,Kernel Shape,Output Shape,Params,Mult-Adds
Layer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0_lstm,-,"[5, 1, 128]",468480.0,464384.0
1_classification.Linear_0,"[128, 64]","[5, 1, 64]",8256.0,8192.0
2_classification.LeakyReLU_1,-,"[5, 1, 64]",,
3_classification.Linear_2,"[64, 11]","[5, 1, 11]",715.0,704.0


# Model Parameters

In [25]:
epochs = 50
lr = 0.0001
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

# Model Training

In [None]:
for epoch in range(epochs):
    model.train()
    arr = np.arange(X_train.shape[1])
    np.random.shuffle(arr)
    train_loss = 0
    batch_bar = tqdm(total=X_train.shape[1], dynamic_ncols=True, leave=False, position=0, desc='Train') 
    for ind in arr:
        seq = torch.tensor(X_train[:, ind:ind+1, :]).to(device).float()
        labels = torch.tensor(y_train[ind]).to(device).float()
        optimizer.zero_grad()
        y_pred = model(seq)
        single_loss = loss_function(y_pred[-1, -1, :], labels)
        single_loss.backward()
        optimizer.step()
        train_loss += float(single_loss)

        batch_bar.set_postfix(
            loss="{:.04f}".format(float(train_loss / (ind + 1))),
            lr="{:.04f}".format(float(optimizer.param_groups[0]['lr'])))
        batch_bar.update()

    train_loss /= X_train.shape[1]
    batch_bar.close()
    
    # Validation loss
    model.eval()
    val_loss = 0
    batch_bar = tqdm(total=X_train.shape[1], dynamic_ncols=True, leave=False, position=0, desc='Train') 
    for ind in range(X_test.shape[1]):
        seq = torch.tensor(X_test[:, ind:ind+1, :]).to(device).float()
        labels = torch.tensor(y_test[ind]).to(device).float()

        with torch.no_grad():
            y_pred = model(seq)

        single_loss = loss_function(y_pred[-1, -1, :], labels)
        val_loss += float(single_loss)
        batch_bar.update()
    val_loss /= X_test.shape[1]
    batch_bar.close()

    print(f"[LSTM FIT]epoch: {epoch + 1:3}, train_loss: {train_loss:10.8f}, val_loss: {val_loss:10.8f}")

Train:  17%|█▋        | 506/2997 [00:08<00:39, 62.32it/s, loss=0.0073, lr=0.0001]