# Deep Convolutional Neural Network for Limit Order Books

In this notebook, we implement the work of Zhang et al. We further improve this work by adding a self attention layer and by training the model on Order Flow data.

### References:
[1] Ntakaris A, Magris M, Kanniainen J, Gabbouj M, Iosifidis A. Benchmark dataset for mid‐price forecasting of limit order book data with machine learning methods. Journal of Forecasting. 2018 Dec;37(8):852-66. https://arxiv.org/abs/1705.03233

[2] Zhang Z, Zohren S, Roberts S. DeepLOB: Deep convolutional neural networks for limit order books. IEEE Transactions on Signal Processing. 2019 Mar 25;67(11):3001-12. https://arxiv.org/abs/1808.03668

In [1]:
# load packages
import pandas as pd
import pickle
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from tqdm import tqdm 
from sklearn.metrics import accuracy_score, classification_report

import torch
import torch.nn.functional as F
from torch.utils import data
from torchinfo import summary
import torch.nn as nn
import torch.optim as optim

## Data Preparation

In [None]:
import os 
if not os.path.isfile('data.zip'):
    !wget https://github.com/Viccis33/DeepLOB_CNN_Order_Book/raw/refs/heads/master/data.zip
    !unzip -n data.zip
    print('Data downloaded.')
else:
    print('Data already existed.')

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [3]:
def prepare_x(data):
    df1 = data[:40, :].T
    return np.array(df1)

def get_label(data):
    lob = data[-5:, :].T
    return lob

def data_classification(X, Y, T):
    [N, D] = X.shape
    df = np.array(X)

    dY = np.array(Y)

    dataY = dY[T - 1:N]

    dataX = np.zeros((N - T + 1, T, D))
    for i in range(T, N + 1):
        dataX[i - T] = df[i - T:i, :]

    return dataX, dataY

def torch_data(x, y):
    x = torch.from_numpy(x)
    x = torch.unsqueeze(x, 1)
    y = torch.from_numpy(y)
    y = F.one_hot(y, num_classes=3)
    return x, y

In [4]:
class Dataset(data.Dataset):
    """Characterizes a dataset for PyTorch"""
    def __init__(self, data, k, num_classes, T):
        """Initialization""" 
        self.k = k
        self.num_classes = num_classes
        self.T = T
            
        x = prepare_x(data)
        y = get_label(data)
        x, y = data_classification(x, y, self.T)
        y = y[:,self.k] - 1
        self.length = len(x)

        x = torch.from_numpy(x)
        self.x = torch.unsqueeze(x, 1)
        self.y = torch.from_numpy(y)

    def __len__(self):
        """Denotes the total number of samples"""
        return self.length

    def __getitem__(self, index):
        """Generates samples of data"""
        return self.x[index], self.y[index]

We used no auction dataset that is normalised by decimal precision approach in their work. The first seven days are training data and the last three days are testing data. A validation set (20%) from the training set is used to monitor the overfitting behaviours.

The first 40 columns of the FI-2010 dataset are 10 levels ask and bid information for a limit order book and we only use these 40 features in our network. The last 5 columns of the FI-2010 dataset are the labels with different prediction horizons k (10,20,30,50,100).



In [5]:
# please change the data_path to your local path
# data_path = '/nfs/home/zihaoz/limit_order_book/data'

dec_data = np.loadtxt('/kaggle/input/fi-2010/Train_Dst_NoAuction_DecPre_CF_7.txt')
dec_train = dec_data[:, :int(np.floor(dec_data.shape[1] * 0.8))]
dec_val = dec_data[:, int(np.floor(dec_data.shape[1] * 0.8)):]

dec_test1 = np.loadtxt('/kaggle/input/fi-2010/Test_Dst_NoAuction_DecPre_CF_7.txt')
dec_test2 = np.loadtxt('/kaggle/input/fi-2010/Test_Dst_NoAuction_DecPre_CF_8.txt')
dec_test3 = np.loadtxt('/kaggle/input/fi-2010/Test_Dst_NoAuction_DecPre_CF_9.txt')
dec_test = np.hstack((dec_test1, dec_test2, dec_test3))

print(dec_train.shape, dec_val.shape, dec_test.shape)

(149, 203800) (149, 50950) (149, 139587)


In [6]:
batch_size = 32
k1=2

dataset_train = Dataset(data=dec_train, k=k1, num_classes=3, T=100)
dataset_val = Dataset(data=dec_val, k=k1, num_classes=3, T=100)
dataset_test = Dataset(data=dec_test, k=k1, num_classes=3, T=100)

train_loader = torch.utils.data.DataLoader(dataset=dataset_train, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(dataset=dataset_val, batch_size=batch_size, shuffle=False)
test_loader = torch.utils.data.DataLoader(dataset=dataset_test, batch_size=batch_size, shuffle=False)

print(dataset_train.x.shape, dataset_train.y.shape)

torch.Size([203701, 1, 100, 40]) torch.Size([203701])


In [7]:
tmp_loader = torch.utils.data.DataLoader(dataset=dataset_train, batch_size=1, shuffle=True)

for x, y in tmp_loader:
    print(x)
    print(y)
    print(x.shape, y.shape)
    break

tensor([[[[0.3412, 0.0036, 0.3406,  ..., 0.0010, 0.3385, 0.0010],
          [0.3412, 0.0036, 0.3406,  ..., 0.0010, 0.3390, 0.0032],
          [0.3411, 0.0015, 0.3406,  ..., 0.0094, 0.3390, 0.0032],
          ...,
          [0.3413, 0.0068, 0.3408,  ..., 0.0010, 0.3396, 0.0040],
          [0.3413, 0.0032, 0.3408,  ..., 0.0010, 0.3395, 0.0061],
          [0.3413, 0.0032, 0.3408,  ..., 0.0010, 0.3395, 0.0061]]]],
       dtype=torch.float64)
tensor([1.], dtype=torch.float64)
torch.Size([1, 1, 100, 40]) torch.Size([1])


## Deeplob

In [10]:
class deeplob(nn.Module):
    def __init__(self, y_len):
        super().__init__()
        self.y_len = y_len
        
        # convolution blocks
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=(1,2), stride=(1,2)),
            nn.LeakyReLU(negative_slope=0.01),
#             nn.Tanh(),
            nn.BatchNorm2d(32),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(4,1)),
            nn.LeakyReLU(negative_slope=0.01),
            nn.BatchNorm2d(32),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(4,1)),
            nn.LeakyReLU(negative_slope=0.01),
            nn.BatchNorm2d(32),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(1,2), stride=(1,2)),
            nn.Tanh(),
            nn.BatchNorm2d(32),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(4,1)),
            nn.Tanh(),
            nn.BatchNorm2d(32),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(4,1)),
            nn.Tanh(),
            nn.BatchNorm2d(32),
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(1,10)),
            nn.LeakyReLU(negative_slope=0.01),
            nn.BatchNorm2d(32),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(4,1)),
            nn.LeakyReLU(negative_slope=0.01),
            nn.BatchNorm2d(32),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(4,1)),
            nn.LeakyReLU(negative_slope=0.01),
            nn.BatchNorm2d(32),
        )
        
        # inception moduels
        self.inp1 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(1,1), padding='same'),
            nn.LeakyReLU(negative_slope=0.01),
            nn.BatchNorm2d(64),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3,1), padding='same'),
            nn.LeakyReLU(negative_slope=0.01),
            nn.BatchNorm2d(64),
        )
        self.inp2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(1,1), padding='same'),
            nn.LeakyReLU(negative_slope=0.01),
            nn.BatchNorm2d(64),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(5,1), padding='same'),
            nn.LeakyReLU(negative_slope=0.01),
            nn.BatchNorm2d(64),
        )
        self.inp3 = nn.Sequential(
            nn.MaxPool2d((3, 1), stride=(1, 1), padding=(1, 0)),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(1,1), padding='same'),
            nn.LeakyReLU(negative_slope=0.01),
            nn.BatchNorm2d(64),
        )
        
        # lstm layers
        self.lstm = nn.LSTM(input_size=192, hidden_size=64, num_layers=1, batch_first=True)
        self.fc1 = nn.Linear(64, self.y_len)

    def forward(self, x):
        # h0: (number of hidden layers, batch size, hidden size)
        h0 = torch.zeros(1, x.size(0), 64).to(device)
        c0 = torch.zeros(1, x.size(0), 64).to(device)
    
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        
        x_inp1 = self.inp1(x)
        x_inp2 = self.inp2(x)
        x_inp3 = self.inp3(x)  
        
        x = torch.cat((x_inp1, x_inp2, x_inp3), dim=1)
        
#         x = torch.transpose(x, 1, 2)
        x = x.permute(0, 2, 1, 3)
        x = torch.reshape(x, (-1, x.shape[1], x.shape[2]))
        
        x, _ = self.lstm(x, (h0, c0))
        x = x[:, -1, :]
        x = self.fc1(x)
        forecast_y = torch.softmax(x, dim=1)
        
        return forecast_y

In [11]:
model = deeplob(y_len = dataset_train.num_classes)
model.to(device)

deeplob(
  (conv1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(1, 2), stride=(1, 2))
    (1): LeakyReLU(negative_slope=0.01)
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(32, 32, kernel_size=(4, 1), stride=(1, 1))
    (4): LeakyReLU(negative_slope=0.01)
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(32, 32, kernel_size=(4, 1), stride=(1, 1))
    (7): LeakyReLU(negative_slope=0.01)
    (8): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv2): Sequential(
    (0): Conv2d(32, 32, kernel_size=(1, 2), stride=(1, 2))
    (1): Tanh()
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(32, 32, kernel_size=(4, 1), stride=(1, 1))
    (4): Tanh()
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(32, 32, kernel_size=(4, 1), stride

In [12]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [13]:
# Function to encapsulate the training loop with early stopping
def batch_gd(model, criterion, optimizer, train_loader, test_loader, epochs, patience=20):
    
    train_losses = np.zeros(epochs)
    test_losses = np.zeros(epochs)
    best_test_loss = np.inf
    best_test_epoch = 0
    no_improve_epochs = 0  # Counter for early stopping

    for it in tqdm(range(epochs)):
        
        model.train()
        t0 = datetime.now()
        train_loss = []
        for inputs, targets in train_loader:
            # Move data to GPU
            inputs, targets = inputs.to(device, dtype=torch.float), targets.to(device, dtype=torch.int64)
            
            # Zero the parameter gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            # Backward pass and optimization
            loss.backward()
            optimizer.step()
            train_loss.append(loss.item())

        # Compute mean train loss
        train_loss = np.mean(train_loss)

        # Evaluate on test set
        model.eval()
        test_loss = []
        with torch.no_grad():
            for inputs, targets in test_loader:
                inputs, targets = inputs.to(device, dtype=torch.float), targets.to(device, dtype=torch.int64)      
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                test_loss.append(loss.item())
        test_loss = np.mean(test_loss)

        # Save losses
        train_losses[it] = train_loss
        test_losses[it] = test_loss

        # Check for early stopping
        if test_loss < best_test_loss:
            torch.save(model, './best_val_model_pytorch_30')
            best_test_loss = test_loss
            best_test_epoch = it
            no_improve_epochs = 0  # Reset counter
            print('Model saved (New best validation loss)')
        else:
            no_improve_epochs += 1  # Increment counter

        dt = datetime.now() - t0
        print(f'Epoch {it+1}/{epochs}, Train Loss: {train_loss:.4f}, Validation Loss: {test_loss:.4f}, '
              f'Duration: {dt}, Best Val Epoch: {best_test_epoch}')
 
        # **Early stopping condition**
        if no_improve_epochs >= patience:
            print(f'Early stopping triggered after {it+1} epochs. No improvement for {patience} epochs.')
            break  # Stop training

    return train_losses[:it+1], test_losses[:it+1]  # Return only trained epochs


In [14]:
train_losses, val_losses = batch_gd(model, criterion, optimizer, 
                                    train_loader, val_loader, epochs=120)

  1%|          | 1/120 [01:21<2:41:25, 81.39s/it]

Model saved (New best validation loss)
Epoch 1/120, Train Loss: 1.0771, Validation Loss: 1.0753, Duration: 0:01:21.392893, Best Val Epoch: 0


  2%|▏         | 2/120 [02:40<2:37:54, 80.29s/it]

Epoch 2/120, Train Loss: 1.0796, Validation Loss: 1.0756, Duration: 0:01:19.519154, Best Val Epoch: 0


  2%|▎         | 3/120 [03:59<2:35:22, 79.68s/it]

Model saved (New best validation loss)
Epoch 3/120, Train Loss: 1.0744, Validation Loss: 1.0671, Duration: 0:01:18.944226, Best Val Epoch: 2


  3%|▎         | 4/120 [05:19<2:34:01, 79.67s/it]

Model saved (New best validation loss)
Epoch 4/120, Train Loss: 1.0747, Validation Loss: 1.0666, Duration: 0:01:19.654634, Best Val Epoch: 3


  4%|▍         | 5/120 [06:39<2:32:37, 79.63s/it]

Model saved (New best validation loss)
Epoch 5/120, Train Loss: 1.0735, Validation Loss: 1.0665, Duration: 0:01:19.570283, Best Val Epoch: 4


  5%|▌         | 6/120 [07:59<2:31:29, 79.73s/it]

Epoch 6/120, Train Loss: 1.0730, Validation Loss: 1.0680, Duration: 0:01:19.913628, Best Val Epoch: 4


  6%|▌         | 7/120 [09:18<2:29:52, 79.58s/it]

Epoch 7/120, Train Loss: 1.0727, Validation Loss: 1.0727, Duration: 0:01:19.262791, Best Val Epoch: 4


  7%|▋         | 8/120 [10:38<2:28:48, 79.72s/it]

Epoch 8/120, Train Loss: 1.0715, Validation Loss: 1.0669, Duration: 0:01:20.011234, Best Val Epoch: 4


  8%|▊         | 9/120 [11:58<2:27:31, 79.75s/it]

Model saved (New best validation loss)
Epoch 9/120, Train Loss: 1.0717, Validation Loss: 1.0664, Duration: 0:01:19.810169, Best Val Epoch: 8


  8%|▊         | 10/120 [13:17<2:26:07, 79.70s/it]

Model saved (New best validation loss)
Epoch 10/120, Train Loss: 1.0701, Validation Loss: 1.0660, Duration: 0:01:19.612001, Best Val Epoch: 9


  9%|▉         | 11/120 [14:38<2:25:13, 79.94s/it]

Model saved (New best validation loss)
Epoch 11/120, Train Loss: 1.0695, Validation Loss: 1.0650, Duration: 0:01:20.477976, Best Val Epoch: 10


 10%|█         | 12/120 [15:57<2:23:44, 79.86s/it]

Epoch 12/120, Train Loss: 1.0689, Validation Loss: 1.0663, Duration: 0:01:19.662416, Best Val Epoch: 10


 11%|█         | 13/120 [17:17<2:22:05, 79.68s/it]

Model saved (New best validation loss)
Epoch 13/120, Train Loss: 1.0681, Validation Loss: 1.0633, Duration: 0:01:19.271752, Best Val Epoch: 12


 12%|█▏        | 14/120 [18:36<2:20:27, 79.51s/it]

Model saved (New best validation loss)
Epoch 14/120, Train Loss: 1.0671, Validation Loss: 1.0632, Duration: 0:01:19.109649, Best Val Epoch: 13


 12%|█▎        | 15/120 [19:56<2:19:31, 79.73s/it]

Epoch 15/120, Train Loss: 1.0659, Validation Loss: 1.0670, Duration: 0:01:20.243235, Best Val Epoch: 13


 13%|█▎        | 16/120 [21:16<2:18:18, 79.79s/it]

Epoch 16/120, Train Loss: 1.0653, Validation Loss: 1.0647, Duration: 0:01:19.933400, Best Val Epoch: 13


 14%|█▍        | 17/120 [22:35<2:16:49, 79.70s/it]

Model saved (New best validation loss)
Epoch 17/120, Train Loss: 1.0645, Validation Loss: 1.0623, Duration: 0:01:19.485326, Best Val Epoch: 16


 15%|█▌        | 18/120 [23:55<2:15:16, 79.58s/it]

Epoch 18/120, Train Loss: 1.0639, Validation Loss: 1.0653, Duration: 0:01:19.287899, Best Val Epoch: 16


 16%|█▌        | 19/120 [25:14<2:13:36, 79.37s/it]

Epoch 19/120, Train Loss: 1.0634, Validation Loss: 1.0624, Duration: 0:01:18.888944, Best Val Epoch: 16


 17%|█▋        | 20/120 [26:32<2:11:44, 79.05s/it]

Epoch 20/120, Train Loss: 1.0614, Validation Loss: 1.0926, Duration: 0:01:18.291890, Best Val Epoch: 16


 18%|█▊        | 21/120 [27:50<2:10:09, 78.88s/it]

Model saved (New best validation loss)
Epoch 21/120, Train Loss: 1.0551, Validation Loss: 1.0346, Duration: 0:01:18.494483, Best Val Epoch: 20


 18%|█▊        | 22/120 [29:10<2:08:59, 78.98s/it]

Model saved (New best validation loss)
Epoch 22/120, Train Loss: 1.0203, Validation Loss: 0.9961, Duration: 0:01:19.202196, Best Val Epoch: 21


 19%|█▉        | 23/120 [30:29<2:07:51, 79.09s/it]

Model saved (New best validation loss)
Epoch 23/120, Train Loss: 0.9866, Validation Loss: 0.9800, Duration: 0:01:19.341287, Best Val Epoch: 22


 20%|██        | 24/120 [31:48<2:06:30, 79.07s/it]

Model saved (New best validation loss)
Epoch 24/120, Train Loss: 0.9645, Validation Loss: 0.9616, Duration: 0:01:19.032931, Best Val Epoch: 23


 21%|██        | 25/120 [33:07<2:05:06, 79.02s/it]

Model saved (New best validation loss)
Epoch 25/120, Train Loss: 0.9475, Validation Loss: 0.9476, Duration: 0:01:18.890493, Best Val Epoch: 24


 22%|██▏       | 26/120 [34:26<2:03:57, 79.12s/it]

Model saved (New best validation loss)
Epoch 26/120, Train Loss: 0.9289, Validation Loss: 0.9384, Duration: 0:01:19.364311, Best Val Epoch: 25


 22%|██▎       | 27/120 [35:47<2:03:12, 79.49s/it]

Epoch 27/120, Train Loss: 0.9009, Validation Loss: 0.9584, Duration: 0:01:20.357341, Best Val Epoch: 25


 23%|██▎       | 28/120 [37:07<2:02:08, 79.65s/it]

Model saved (New best validation loss)
Epoch 28/120, Train Loss: 0.8818, Validation Loss: 0.9374, Duration: 0:01:20.029953, Best Val Epoch: 27


 24%|██▍       | 29/120 [38:25<2:00:26, 79.41s/it]

Model saved (New best validation loss)
Epoch 29/120, Train Loss: 0.8710, Validation Loss: 0.9303, Duration: 0:01:18.834111, Best Val Epoch: 28


 25%|██▌       | 30/120 [39:44<1:58:54, 79.27s/it]

Model saved (New best validation loss)
Epoch 30/120, Train Loss: 0.8661, Validation Loss: 0.9220, Duration: 0:01:18.946023, Best Val Epoch: 29


 26%|██▌       | 31/120 [41:03<1:57:15, 79.06s/it]

Model saved (New best validation loss)
Epoch 31/120, Train Loss: 0.8597, Validation Loss: 0.9164, Duration: 0:01:18.554451, Best Val Epoch: 30


 27%|██▋       | 32/120 [42:23<1:56:15, 79.26s/it]

Epoch 32/120, Train Loss: 0.8555, Validation Loss: 0.9200, Duration: 0:01:19.749205, Best Val Epoch: 30


 28%|██▊       | 33/120 [43:43<1:55:18, 79.52s/it]

Model saved (New best validation loss)
Epoch 33/120, Train Loss: 0.8489, Validation Loss: 0.9017, Duration: 0:01:20.123079, Best Val Epoch: 32


 28%|██▊       | 34/120 [45:03<1:54:10, 79.66s/it]

Model saved (New best validation loss)
Epoch 34/120, Train Loss: 0.8441, Validation Loss: 0.8980, Duration: 0:01:19.968086, Best Val Epoch: 33


 29%|██▉       | 35/120 [46:22<1:52:43, 79.58s/it]

Model saved (New best validation loss)
Epoch 35/120, Train Loss: 0.8385, Validation Loss: 0.8939, Duration: 0:01:19.383956, Best Val Epoch: 34


 30%|███       | 36/120 [47:42<1:51:22, 79.55s/it]

Model saved (New best validation loss)
Epoch 36/120, Train Loss: 0.8349, Validation Loss: 0.8856, Duration: 0:01:19.489589, Best Val Epoch: 35


 31%|███       | 37/120 [49:01<1:49:57, 79.49s/it]

Epoch 37/120, Train Loss: 0.8320, Validation Loss: 0.8891, Duration: 0:01:19.342699, Best Val Epoch: 35


 32%|███▏      | 38/120 [50:21<1:48:39, 79.50s/it]

Model saved (New best validation loss)
Epoch 38/120, Train Loss: 0.8303, Validation Loss: 0.8825, Duration: 0:01:19.538006, Best Val Epoch: 37


 32%|███▎      | 39/120 [51:40<1:47:24, 79.57s/it]

Model saved (New best validation loss)
Epoch 39/120, Train Loss: 0.8270, Validation Loss: 0.8787, Duration: 0:01:19.710331, Best Val Epoch: 38


 33%|███▎      | 40/120 [52:59<1:45:50, 79.38s/it]

Model saved (New best validation loss)
Epoch 40/120, Train Loss: 0.8251, Validation Loss: 0.8781, Duration: 0:01:18.930986, Best Val Epoch: 39


 34%|███▍      | 41/120 [54:19<1:44:29, 79.36s/it]

Epoch 41/120, Train Loss: 0.8230, Validation Loss: 0.8786, Duration: 0:01:19.337369, Best Val Epoch: 39


 35%|███▌      | 42/120 [55:38<1:43:03, 79.27s/it]

Model saved (New best validation loss)
Epoch 42/120, Train Loss: 0.8211, Validation Loss: 0.8756, Duration: 0:01:19.062233, Best Val Epoch: 41


 36%|███▌      | 43/120 [56:58<1:41:59, 79.48s/it]

Epoch 43/120, Train Loss: 0.8200, Validation Loss: 0.8807, Duration: 0:01:19.958720, Best Val Epoch: 41


 37%|███▋      | 44/120 [58:18<1:40:51, 79.63s/it]

Model saved (New best validation loss)
Epoch 44/120, Train Loss: 0.8194, Validation Loss: 0.8736, Duration: 0:01:19.970193, Best Val Epoch: 43


 38%|███▊      | 45/120 [59:36<1:39:13, 79.38s/it]

Model saved (New best validation loss)
Epoch 45/120, Train Loss: 0.8175, Validation Loss: 0.8736, Duration: 0:01:18.787249, Best Val Epoch: 44


 38%|███▊      | 46/120 [1:00:55<1:37:37, 79.16s/it]

Model saved (New best validation loss)
Epoch 46/120, Train Loss: 0.8160, Validation Loss: 0.8711, Duration: 0:01:18.639574, Best Val Epoch: 45


 39%|███▉      | 47/120 [1:02:14<1:36:16, 79.13s/it]

Epoch 47/120, Train Loss: 0.8144, Validation Loss: 0.8765, Duration: 0:01:19.056073, Best Val Epoch: 45


 40%|████      | 48/120 [1:03:33<1:34:53, 79.08s/it]

Epoch 48/120, Train Loss: 0.8135, Validation Loss: 0.8721, Duration: 0:01:18.979207, Best Val Epoch: 45


 41%|████      | 49/120 [1:04:53<1:33:47, 79.26s/it]

Model saved (New best validation loss)
Epoch 49/120, Train Loss: 0.8126, Validation Loss: 0.8694, Duration: 0:01:19.675289, Best Val Epoch: 48


 42%|████▏     | 50/120 [1:06:12<1:32:33, 79.33s/it]

Epoch 50/120, Train Loss: 0.8105, Validation Loss: 0.8716, Duration: 0:01:19.498202, Best Val Epoch: 48


 42%|████▎     | 51/120 [1:07:32<1:31:23, 79.46s/it]

Epoch 51/120, Train Loss: 0.8094, Validation Loss: 0.8724, Duration: 0:01:19.772352, Best Val Epoch: 48


 43%|████▎     | 52/120 [1:08:51<1:29:59, 79.41s/it]

Model saved (New best validation loss)
Epoch 52/120, Train Loss: 0.8091, Validation Loss: 0.8682, Duration: 0:01:19.281700, Best Val Epoch: 51


 44%|████▍     | 53/120 [1:10:11<1:28:40, 79.41s/it]

Epoch 53/120, Train Loss: 0.8081, Validation Loss: 0.8706, Duration: 0:01:19.394860, Best Val Epoch: 51


 45%|████▌     | 54/120 [1:11:30<1:27:14, 79.31s/it]

Epoch 54/120, Train Loss: 0.8064, Validation Loss: 0.8795, Duration: 0:01:19.100362, Best Val Epoch: 51


 46%|████▌     | 55/120 [1:12:49<1:25:56, 79.34s/it]

Epoch 55/120, Train Loss: 0.8058, Validation Loss: 0.8690, Duration: 0:01:19.384479, Best Val Epoch: 51


 47%|████▋     | 56/120 [1:14:09<1:24:41, 79.40s/it]

Epoch 56/120, Train Loss: 0.8046, Validation Loss: 0.8692, Duration: 0:01:19.536842, Best Val Epoch: 51


 48%|████▊     | 57/120 [1:15:28<1:23:24, 79.43s/it]

Epoch 57/120, Train Loss: 0.8046, Validation Loss: 0.8685, Duration: 0:01:19.517194, Best Val Epoch: 51


 48%|████▊     | 58/120 [1:16:48<1:22:04, 79.43s/it]

Epoch 58/120, Train Loss: 0.8035, Validation Loss: 0.8731, Duration: 0:01:19.405230, Best Val Epoch: 51


 49%|████▉     | 59/120 [1:18:07<1:20:45, 79.43s/it]

Epoch 59/120, Train Loss: 0.8018, Validation Loss: 0.8741, Duration: 0:01:19.456267, Best Val Epoch: 51


 50%|█████     | 60/120 [1:19:27<1:19:29, 79.50s/it]

Epoch 60/120, Train Loss: 0.8015, Validation Loss: 0.8710, Duration: 0:01:19.634798, Best Val Epoch: 51


 51%|█████     | 61/120 [1:20:46<1:18:07, 79.46s/it]

Epoch 61/120, Train Loss: 0.8005, Validation Loss: 0.8753, Duration: 0:01:19.366377, Best Val Epoch: 51


 52%|█████▏    | 62/120 [1:22:05<1:16:43, 79.37s/it]

Epoch 62/120, Train Loss: 0.7995, Validation Loss: 0.8699, Duration: 0:01:19.152888, Best Val Epoch: 51


 52%|█████▎    | 63/120 [1:23:24<1:15:21, 79.33s/it]

Epoch 63/120, Train Loss: 0.7981, Validation Loss: 0.8731, Duration: 0:01:19.237352, Best Val Epoch: 51


 53%|█████▎    | 64/120 [1:24:44<1:14:01, 79.31s/it]

Epoch 64/120, Train Loss: 0.7982, Validation Loss: 0.8734, Duration: 0:01:19.265396, Best Val Epoch: 51


 54%|█████▍    | 65/120 [1:26:03<1:12:47, 79.40s/it]

Epoch 65/120, Train Loss: 0.7974, Validation Loss: 0.8705, Duration: 0:01:19.619864, Best Val Epoch: 51


 55%|█████▌    | 66/120 [1:27:22<1:11:22, 79.31s/it]

Epoch 66/120, Train Loss: 0.7969, Validation Loss: 0.8690, Duration: 0:01:19.105158, Best Val Epoch: 51


 56%|█████▌    | 67/120 [1:28:41<1:09:55, 79.16s/it]

Model saved (New best validation loss)
Epoch 67/120, Train Loss: 0.7964, Validation Loss: 0.8672, Duration: 0:01:18.812330, Best Val Epoch: 66


 57%|█████▋    | 68/120 [1:30:00<1:08:35, 79.14s/it]

Epoch 68/120, Train Loss: 0.7964, Validation Loss: 0.8685, Duration: 0:01:19.067681, Best Val Epoch: 66


 57%|█████▊    | 69/120 [1:31:19<1:07:14, 79.12s/it]

Epoch 69/120, Train Loss: 0.7960, Validation Loss: 0.8744, Duration: 0:01:19.069475, Best Val Epoch: 66


 58%|█████▊    | 70/120 [1:32:38<1:05:54, 79.09s/it]

Epoch 70/120, Train Loss: 0.7936, Validation Loss: 0.8714, Duration: 0:01:19.037621, Best Val Epoch: 66


 59%|█████▉    | 71/120 [1:33:57<1:04:31, 79.00s/it]

Epoch 71/120, Train Loss: 0.7930, Validation Loss: 0.8719, Duration: 0:01:18.791316, Best Val Epoch: 66


 60%|██████    | 72/120 [1:35:16<1:03:10, 78.97s/it]

Epoch 72/120, Train Loss: 0.7926, Validation Loss: 0.8726, Duration: 0:01:18.893295, Best Val Epoch: 66


 61%|██████    | 73/120 [1:36:36<1:02:04, 79.24s/it]

Epoch 73/120, Train Loss: 0.7924, Validation Loss: 0.8707, Duration: 0:01:19.876137, Best Val Epoch: 66


 62%|██████▏   | 74/120 [1:37:55<1:00:43, 79.22s/it]

Epoch 74/120, Train Loss: 0.7919, Validation Loss: 0.8754, Duration: 0:01:19.153058, Best Val Epoch: 66


 62%|██████▎   | 75/120 [1:39:14<59:24, 79.20s/it]  

Epoch 75/120, Train Loss: 0.7913, Validation Loss: 0.8734, Duration: 0:01:19.165291, Best Val Epoch: 66


 63%|██████▎   | 76/120 [1:40:34<58:06, 79.23s/it]

Epoch 76/120, Train Loss: 0.7911, Validation Loss: 0.8760, Duration: 0:01:19.306870, Best Val Epoch: 66


 64%|██████▍   | 77/120 [1:41:53<56:51, 79.34s/it]

Epoch 77/120, Train Loss: 0.7901, Validation Loss: 0.8729, Duration: 0:01:19.587654, Best Val Epoch: 66


 65%|██████▌   | 78/120 [1:43:12<55:26, 79.21s/it]

Epoch 78/120, Train Loss: 0.7897, Validation Loss: 0.8808, Duration: 0:01:18.892354, Best Val Epoch: 66


 66%|██████▌   | 79/120 [1:44:31<54:03, 79.11s/it]

Epoch 79/120, Train Loss: 0.7889, Validation Loss: 0.8740, Duration: 0:01:18.887108, Best Val Epoch: 66


 67%|██████▋   | 80/120 [1:45:50<52:44, 79.10s/it]

Epoch 80/120, Train Loss: 0.7892, Validation Loss: 0.8754, Duration: 0:01:19.080822, Best Val Epoch: 66


 68%|██████▊   | 81/120 [1:47:09<51:20, 78.98s/it]

Epoch 81/120, Train Loss: 0.7883, Validation Loss: 0.8752, Duration: 0:01:18.683674, Best Val Epoch: 66


 68%|██████▊   | 82/120 [1:48:27<49:56, 78.85s/it]

Epoch 82/120, Train Loss: 0.7879, Validation Loss: 0.8720, Duration: 0:01:18.552654, Best Val Epoch: 66


 69%|██████▉   | 83/120 [1:49:46<48:38, 78.87s/it]

Epoch 83/120, Train Loss: 0.7875, Validation Loss: 0.8752, Duration: 0:01:18.916909, Best Val Epoch: 66


 70%|███████   | 84/120 [1:51:05<47:18, 78.86s/it]

Epoch 84/120, Train Loss: 0.7873, Validation Loss: 0.8782, Duration: 0:01:18.828229, Best Val Epoch: 66


 71%|███████   | 85/120 [1:52:25<46:09, 79.14s/it]

Epoch 85/120, Train Loss: 0.7864, Validation Loss: 0.8760, Duration: 0:01:19.787337, Best Val Epoch: 66


 72%|███████▏  | 86/120 [1:53:43<44:45, 78.99s/it]

Epoch 86/120, Train Loss: 0.7858, Validation Loss: 0.8739, Duration: 0:01:18.655831, Best Val Epoch: 66


 72%|███████▏  | 86/120 [1:55:02<45:29, 80.27s/it]

Epoch 87/120, Train Loss: 0.7845, Validation Loss: 0.8748, Duration: 0:01:18.868023, Best Val Epoch: 66
Early stopping triggered after 87 epochs. No improvement for 20 epochs.





In [15]:
model = torch.load('/kaggle/working/best_val_model_pytorch_30')

n_correct = 0.
n_total = 0.
for inputs, targets in test_loader:
    # Move to GPU
    inputs, targets = inputs.to(device, dtype=torch.float), targets.to(device, dtype=torch.int64)

    # Forward pass
    outputs = model(inputs)
    
    # Get prediction
    # torch.max returns both max and argmax
    _, predictions = torch.max(outputs, 1)

    # update counts
    n_correct += (predictions == targets).sum().item()
    n_total += targets.shape[0]

test_acc = n_correct / n_total
print(f"Test acc: {test_acc:.4f}")

  model = torch.load('/kaggle/working/best_val_model_pytorch_30')


Test acc: 0.7645


In [16]:
all_targets = []
all_predictions = []

for inputs, targets in test_loader:
    # Move to GPU
    inputs, targets = inputs.to(device, dtype=torch.float), targets.to(device, dtype=torch.int64)

    # Forward pass
    outputs = model(inputs)
    
    # Get prediction
    # torch.max returns both max and argmax
    _, predictions = torch.max(outputs, 1)

    all_targets.append(targets.cpu().numpy())
    all_predictions.append(predictions.cpu().numpy())

all_targets = np.concatenate(all_targets)    
all_predictions = np.concatenate(all_predictions)    

In [17]:
print('accuracy_score:', accuracy_score(all_targets, all_predictions))
print(classification_report(all_targets, all_predictions, digits=4))

accuracy_score: 0.7644743633860978
              precision    recall  f1-score   support

           0     0.6980    0.6138    0.6532     31888
           1     0.8061    0.8860    0.8441     78297
           2     0.6969    0.6037    0.6469     29303

    accuracy                         0.7645    139488
   macro avg     0.7337    0.7012    0.7148    139488
weighted avg     0.7584    0.7645    0.7591    139488



## DeepLob Attention

In [18]:
class BahdanauAttention(nn.Module):

    def __init__(self, hidden_dim):
        super().__init__()
        self.encoder_proj = nn.Linear(hidden_dim, hidden_dim)
        self.query_proj   = nn.Linear(hidden_dim, hidden_dim)
        self.score        = nn.Linear(hidden_dim, 1)

    def forward(self, encoder_outputs, query):
     

        query = query.unsqueeze(1)  

        enc_proj = self.encoder_proj(encoder_outputs) 
        q_proj   = self.query_proj(query)             

        energies = torch.tanh(enc_proj + q_proj)

        alignment = self.score(energies)
        attn_weights = F.softmax(alignment.squeeze(-1), dim=1) 

        context = torch.bmm(attn_weights.unsqueeze(1), encoder_outputs) 
        context = context.squeeze(1) 

        return context, attn_weights

class deeplobAttention(nn.Module):
    def __init__(self, y_len=3, hidden_dim=64, dropout_rate=0.1):
  
        super().__init__()
        self.y_len = y_len
        self.hidden_dim = hidden_dim

        # ========== (1) Convolution Blocks ==========
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 32, (1, 2), stride=(1, 2)),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 32, (4, 1)),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 32, (4, 1)),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(32),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 32, (1, 2), stride=(1, 2)),
            nn.Tanh(),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 32, (4, 1)),
            nn.Tanh(),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 32, (4, 1)),
            nn.Tanh(),
            nn.BatchNorm2d(32),
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(32, 32, (1, 10)),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 32, (4, 1)),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 32, (4, 1)),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(32),
        )

        # ========== (2) Inception Modules ==========
        self.inp1 = nn.Sequential(
            nn.Conv2d(32, 64, (1,1), padding='same'),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(64),
            nn.Conv2d(64, 64, (3,1), padding='same'),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(64),
        )
        self.inp2 = nn.Sequential(
            nn.Conv2d(32, 64, (1,1), padding='same'),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(64),
            nn.Conv2d(64, 64, (5,1), padding='same'),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(64),
        )
        self.inp3 = nn.Sequential(
            nn.MaxPool2d((3,1), stride=(1,1), padding=(1,0)),
            nn.Conv2d(32, 64, (1,1), padding='same'),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(64),
        )

        self.lstm = nn.LSTM(input_size=192, hidden_size=hidden_dim, batch_first=True)
        self.dropout = nn.Dropout(dropout_rate)

        self.attn = BahdanauAttention(hidden_dim=hidden_dim)

        self.fc = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.LeakyReLU(0.01),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_dim, y_len),
        )

    def forward(self, x):
        """
        x: shape (B, 1, 100, 40)  # typical for FI-2010
        returns: (B, y_len)  # raw logits
        """
        # 1) CNN stack
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)

        # 2) Inception
        x_inp1 = self.inp1(x)
        x_inp2 = self.inp2(x)
        x_inp3 = self.inp3(x)
        x = torch.cat([x_inp1, x_inp2, x_inp3], dim=1)  # => (B, 192, T, 1)

        # Prepare for LSTM => (B, T, 192)
        x = x.permute(0, 2, 1, 3)  # => (B, T, 192, 1)
        x = x.reshape(x.size(0), x.size(1), x.size(2))  # => (B, T, 192)

        # 3) LSTM
        lstm_out, (h, c) = self.lstm(x)
        h_final = h.squeeze(0)

        lstm_out = self.dropout(lstm_out)
        h_final  = self.dropout(h_final)

        # 4) Attention
        context, attn_weights = self.attn(lstm_out, h_final)

        # 5) Classifier
        logits = self.fc(context)  # => (B, y_len)
        return logits

In [20]:

def batch_gd(model, criterion, optimizer, train_loader, test_loader, epochs, patience=20):
    """
    Train the model using mini-batch gradient descent with early stopping.
    Args:
      model, criterion, optimizer: your usual PyTorch objects
      train_loader, test_loader: data loaders for training and validation
      epochs: max number of epochs to train
      patience: how many epochs to wait before early stopping if no improvement

    Returns:
      train_losses, test_losses: arrays of recorded losses over training
    """
    train_losses = np.zeros(epochs)
    test_losses = np.zeros(epochs)
    best_test_loss = np.inf
    best_test_epoch = 0
    no_improve_epochs = 0

    for it in tqdm(range(epochs)):
        
        model.train()
        t0 = datetime.now()
        batch_train_losses = []
        
        # -- Training --
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            
            # Zero gradients
            optimizer.zero_grad()
            # Forward => raw logits
            outputs = model(inputs.float())   # shape (B, y_len)
            # CrossEntropyLoss expects (B, y_len) logits + (B,) class indices
            loss = criterion(outputs, targets.long())
            
            # Backprop + Update
            loss.backward()
            optimizer.step()
            batch_train_losses.append(loss.item())

        # Mean train loss
        train_loss = np.mean(batch_train_losses)

        # -- Validation --
        model.eval()
        batch_test_losses = []
        with torch.no_grad():
            for inputs, targets in test_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs.float())
                loss = criterion(outputs, targets.long())
                batch_test_losses.append(loss.item())
        test_loss = np.mean(batch_test_losses)

        # Record
        train_losses[it] = train_loss
        test_losses[it]  = test_loss

        # Early Stopping logic
        if test_loss < best_test_loss:
            torch.save(model, './best_val_model_pytorch_attention_30')
            best_test_loss = test_loss
            best_test_epoch = it
            no_improve_epochs = 0
            print("Model saved (new best validation loss).")
        else:
            no_improve_epochs += 1

        dt = datetime.now() - t0
        print(f"Epoch {it+1}/{epochs} => Train Loss: {train_loss:.4f}, Val Loss: {test_loss:.4f}, "
              f"Duration: {dt}, Best Val Epoch: {best_test_epoch}")

        if no_improve_epochs >= patience:
            print(f"Early stopping after {it+1} epochs (no improvement for {patience} epochs).")
            break

    return train_losses[:it+1], test_losses[:it+1]


model = deeplobAttention(y_len=3, hidden_dim=64, dropout_rate=0.1).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Train
train_losses, val_losses = batch_gd(
    model, criterion, optimizer,
    train_loader=train_loader,
    test_loader=val_loader, 
    epochs=120,
    patience=17
    )


  1%|          | 1/120 [01:26<2:50:57, 86.20s/it]

Model saved (new best validation loss).
Epoch 1/120 => Train Loss: 1.0736, Val Loss: 1.0675, Duration: 0:01:26.194963, Best Val Epoch: 0


  2%|▏         | 2/120 [02:52<2:49:16, 86.07s/it]

Model saved (new best validation loss).
Epoch 2/120 => Train Loss: 1.0699, Val Loss: 1.0668, Duration: 0:01:25.987332, Best Val Epoch: 1


  2%|▎         | 3/120 [04:17<2:47:26, 85.87s/it]

Epoch 3/120 => Train Loss: 1.0713, Val Loss: 1.0762, Duration: 0:01:25.626185, Best Val Epoch: 1


  3%|▎         | 4/120 [05:43<2:45:50, 85.78s/it]

Epoch 4/120 => Train Loss: 1.0696, Val Loss: 1.0694, Duration: 0:01:25.636683, Best Val Epoch: 1


  4%|▍         | 5/120 [07:08<2:44:11, 85.66s/it]

Epoch 5/120 => Train Loss: 1.0646, Val Loss: 1.0686, Duration: 0:01:25.455120, Best Val Epoch: 1


  5%|▌         | 6/120 [08:34<2:42:40, 85.62s/it]

Epoch 6/120 => Train Loss: 1.0624, Val Loss: 1.0851, Duration: 0:01:25.542756, Best Val Epoch: 1


  6%|▌         | 7/120 [10:00<2:41:19, 85.66s/it]

Model saved (new best validation loss).
Epoch 7/120 => Train Loss: 1.0616, Val Loss: 1.0604, Duration: 0:01:25.721876, Best Val Epoch: 6


  7%|▋         | 8/120 [11:26<2:40:05, 85.76s/it]

Epoch 8/120 => Train Loss: 1.0586, Val Loss: 1.0625, Duration: 0:01:25.983888, Best Val Epoch: 6


  8%|▊         | 9/120 [12:52<2:38:47, 85.83s/it]

Model saved (new best validation loss).
Epoch 9/120 => Train Loss: 1.0134, Val Loss: 0.9800, Duration: 0:01:25.992541, Best Val Epoch: 8


  8%|▊         | 10/120 [14:18<2:37:30, 85.91s/it]

Model saved (new best validation loss).
Epoch 10/120 => Train Loss: 0.9109, Val Loss: 0.9113, Duration: 0:01:26.083960, Best Val Epoch: 9


  9%|▉         | 11/120 [15:43<2:35:52, 85.80s/it]

Model saved (new best validation loss).
Epoch 11/120 => Train Loss: 0.8745, Val Loss: 0.8730, Duration: 0:01:25.559337, Best Val Epoch: 10


 10%|█         | 12/120 [17:09<2:34:15, 85.70s/it]

Model saved (new best validation loss).
Epoch 12/120 => Train Loss: 0.8617, Val Loss: 0.8697, Duration: 0:01:25.466921, Best Val Epoch: 11


 11%|█         | 13/120 [18:35<2:33:07, 85.86s/it]

Epoch 13/120 => Train Loss: 0.8489, Val Loss: 0.8726, Duration: 0:01:26.233600, Best Val Epoch: 11


 12%|█▏        | 14/120 [20:01<2:31:49, 85.94s/it]

Model saved (new best validation loss).
Epoch 14/120 => Train Loss: 0.7958, Val Loss: 0.8572, Duration: 0:01:26.109552, Best Val Epoch: 13


 12%|█▎        | 15/120 [21:27<2:30:14, 85.85s/it]

Model saved (new best validation loss).
Epoch 15/120 => Train Loss: 0.7570, Val Loss: 0.8506, Duration: 0:01:25.648049, Best Val Epoch: 14


 13%|█▎        | 16/120 [22:52<2:28:39, 85.77s/it]

Model saved (new best validation loss).
Epoch 16/120 => Train Loss: 0.7385, Val Loss: 0.8210, Duration: 0:01:25.575835, Best Val Epoch: 15


 14%|█▍        | 17/120 [24:18<2:27:05, 85.69s/it]

Model saved (new best validation loss).
Epoch 17/120 => Train Loss: 0.7227, Val Loss: 0.8201, Duration: 0:01:25.502514, Best Val Epoch: 16


 15%|█▌        | 18/120 [25:43<2:25:38, 85.68s/it]

Model saved (new best validation loss).
Epoch 18/120 => Train Loss: 0.7097, Val Loss: 0.8066, Duration: 0:01:25.644617, Best Val Epoch: 17


 16%|█▌        | 19/120 [27:09<2:24:03, 85.58s/it]

Model saved (new best validation loss).
Epoch 19/120 => Train Loss: 0.6967, Val Loss: 0.8010, Duration: 0:01:25.347203, Best Val Epoch: 18


 17%|█▋        | 20/120 [28:34<2:22:31, 85.51s/it]

Model saved (new best validation loss).
Epoch 20/120 => Train Loss: 0.6847, Val Loss: 0.7851, Duration: 0:01:25.356143, Best Val Epoch: 19


 18%|█▊        | 21/120 [30:00<2:21:05, 85.51s/it]

Epoch 21/120 => Train Loss: 0.6746, Val Loss: 0.7969, Duration: 0:01:25.491027, Best Val Epoch: 19


 18%|█▊        | 22/120 [31:26<2:19:52, 85.63s/it]

Model saved (new best validation loss).
Epoch 22/120 => Train Loss: 0.6665, Val Loss: 0.7818, Duration: 0:01:25.932454, Best Val Epoch: 21


 19%|█▉        | 23/120 [32:52<2:18:41, 85.79s/it]

Epoch 23/120 => Train Loss: 0.6582, Val Loss: 0.7837, Duration: 0:01:26.137223, Best Val Epoch: 21


 20%|██        | 24/120 [34:18<2:17:24, 85.88s/it]

Model saved (new best validation loss).
Epoch 24/120 => Train Loss: 0.6516, Val Loss: 0.7734, Duration: 0:01:26.100133, Best Val Epoch: 23


 21%|██        | 25/120 [35:44<2:16:09, 86.00s/it]

Epoch 25/120 => Train Loss: 0.6460, Val Loss: 0.7778, Duration: 0:01:26.264383, Best Val Epoch: 23


 22%|██▏       | 26/120 [37:11<2:14:56, 86.13s/it]

Epoch 26/120 => Train Loss: 0.6423, Val Loss: 0.7901, Duration: 0:01:26.434993, Best Val Epoch: 23


 22%|██▎       | 27/120 [38:37<2:13:36, 86.20s/it]

Epoch 27/120 => Train Loss: 0.6345, Val Loss: 0.7758, Duration: 0:01:26.357940, Best Val Epoch: 23


 23%|██▎       | 28/120 [40:04<2:12:21, 86.32s/it]

Model saved (new best validation loss).
Epoch 28/120 => Train Loss: 0.6311, Val Loss: 0.7703, Duration: 0:01:26.606681, Best Val Epoch: 27


 24%|██▍       | 29/120 [41:30<2:11:09, 86.48s/it]

Epoch 29/120 => Train Loss: 0.6271, Val Loss: 0.7839, Duration: 0:01:26.860046, Best Val Epoch: 27


 25%|██▌       | 30/120 [42:57<2:09:46, 86.52s/it]

Epoch 30/120 => Train Loss: 0.6223, Val Loss: 0.7752, Duration: 0:01:26.607473, Best Val Epoch: 27


 26%|██▌       | 31/120 [44:24<2:08:35, 86.69s/it]

Epoch 31/120 => Train Loss: 0.6195, Val Loss: 0.7832, Duration: 0:01:27.080379, Best Val Epoch: 27


 27%|██▋       | 32/120 [45:51<2:07:17, 86.79s/it]

Epoch 32/120 => Train Loss: 0.6140, Val Loss: 0.7907, Duration: 0:01:27.040475, Best Val Epoch: 27


 28%|██▊       | 33/120 [47:18<2:05:53, 86.82s/it]

Epoch 33/120 => Train Loss: 0.6103, Val Loss: 0.7825, Duration: 0:01:26.882491, Best Val Epoch: 27


 28%|██▊       | 34/120 [48:45<2:04:25, 86.81s/it]

Epoch 34/120 => Train Loss: 0.6069, Val Loss: 0.7781, Duration: 0:01:26.775253, Best Val Epoch: 27


 29%|██▉       | 35/120 [50:12<2:03:00, 86.83s/it]

Epoch 35/120 => Train Loss: 0.6031, Val Loss: 0.8002, Duration: 0:01:26.869813, Best Val Epoch: 27


 30%|███       | 36/120 [51:38<2:01:31, 86.80s/it]

Epoch 36/120 => Train Loss: 0.6005, Val Loss: 0.7982, Duration: 0:01:26.731231, Best Val Epoch: 27


 31%|███       | 37/120 [53:05<2:00:02, 86.77s/it]

Epoch 37/120 => Train Loss: 0.5967, Val Loss: 0.7931, Duration: 0:01:26.717971, Best Val Epoch: 27


 32%|███▏      | 38/120 [54:32<1:58:37, 86.80s/it]

Epoch 38/120 => Train Loss: 0.5946, Val Loss: 0.8109, Duration: 0:01:26.849475, Best Val Epoch: 27


 32%|███▏      | 38/120 [54:50<1:58:21, 86.60s/it]


KeyboardInterrupt: 

In [44]:
model = torch.load('/kaggle/working/best_val_model_pytorch_attention_50')
model.eval() 

all_targets = []
all_predictions = []

# Turn off gradient computations for inference
with torch.no_grad():
    for inputs, targets in test_loader:
        inputs = inputs.to(device, dtype=torch.float)
        targets = targets.to(device, dtype=torch.long)

        outputs = model(inputs)  

        _, predictions = torch.max(outputs, dim=1)

        all_targets.append(targets.cpu().numpy())
        all_predictions.append(predictions.cpu().numpy())

all_targets = np.concatenate(all_targets)
all_predictions = np.concatenate(all_predictions)

# Now compute any metrics (accuracy, F1, etc.)
accuracy = np.mean(all_predictions == all_targets)
print(f"Test Accuracy: {accuracy:.4f}")

  model = torch.load('/kaggle/working/best_val_model_pytorch_attention_50')


Test Accuracy: 0.7813


In [45]:
print('accuracy_score:', accuracy_score(all_targets, all_predictions))
print(classification_report(all_targets, all_predictions, digits=4))

accuracy_score: 0.781257169075476
              precision    recall  f1-score   support

           0     0.7419    0.7165    0.7290     38408
           1     0.8356    0.8614    0.8483     65996
           2     0.7161    0.7014    0.7087     35084

    accuracy                         0.7813    139488
   macro avg     0.7645    0.7598    0.7620    139488
weighted avg     0.7798    0.7813    0.7803    139488



# DeepLOB with Self Attention on Order Flow data

Our data is in this format: $$ \text{s}_t^{LOB} := (a_t^1, v_t^{1,a}, b_t^1, v_t^{1,b}, ..., a_t^{10}, v_t^{10,a}, b_t^{10}, v_t^{10,b})^T \in \mathbb{R}^{40} $$

where $a_t^i$ and $v_t^{i,a}$ are the $i$-th level ask price and ask volume  
and $b_t^i$ and $v_t^{i,b}$ are the $i$-th level bid price and bid volume  

We will transform it to train our model with Order Flow.
We define the *bid order flows* (bOF) and *ask order flows* (aOF) at a timestamp to be 10-variable vectors computed using two consecutive order book states, where each element is given by

$$ \text{bOF}_{t,i} :=   \left\{
\begin{array}{ll}
      v_t^{i,b}, & b_t^i > b_{t-1}^i \\
      v_t^{i,b} - v_{t-1}^{i,b}, & b_t^i = b_{t-1}^i \\
      -v_t^{i,b}, & b_t^i < b_{t-1}^i \\
\end{array} 
\right. $$

$$ \text{aOF}_{t,i} :=   \left\{
\begin{array}{ll}
      -v_t^{i,a}, & a_t^i > a_{t-1}^i \\
      v_t^{i,a} - v_{t-1}^{i,a}, & a_t^i = a_{t-1}^i \\
      v_t^{i,a}, & a_t^i < a_{t-1}^i \\
\end{array} 
\right. $$

for $i = 1, ..., 10$. With this, we define *order flow* (OF)

$$ \text{OF}_t :=  (\text{bOF}_{t,1}, \text{aOF}_{t,1}, ..., \text{bOF}_{t,10}, \text{aOF}_{t,10})^T \in \mathbb{R}^{20} $$

In [None]:
def prepare_x(data):
    df1 = data[:40, :].T
    

    # Extract bids, asks, and their volumes
    bids = df1[:, 2::4]  
    bid_volumes = df1[:, 3::4]  
    asks = df1[:, 0::4]  
    ask_volumes = df1[:, 1::4]  

    # Compute changes over time
    bid_changes = np.diff(bids, axis=0, prepend=bids[0:1])
    ask_changes = np.diff(asks, axis=0, prepend=asks[0:1])
    bid_volume_changes = np.diff(bid_volumes, axis=0, prepend=bid_volumes[0:1])
    ask_volume_changes = np.diff(ask_volumes, axis=0, prepend=ask_volumes[0:1])

    # Compute bOF_t
    bOF = np.where(bid_changes > 0, bid_volumes, 
                np.where(bid_changes == 0, bid_volume_changes, 
                            -bid_volumes))

    # Compute aOF_t
    aOF = np.where(ask_changes > 0, -ask_volumes, 
                np.where(ask_changes == 0, ask_volume_changes, 
                            ask_volumes))

    # Concatenate to get OF_t
    OF_t = np.hstack((bOF, aOF))  


    return np.array(OF_t)

def get_label(data):
    lob = data[-5:, :].T
    return lob

def data_classification(X, Y, T):
    
    ############## If you have enough RAM, you can modify the dtype of df, dY and dataX to np.float64 ##############
    
    [N, D] = X.shape
    df = np.array(X, dtype=np.float32)

    dY = np.array(Y, dtype=np.float32)
    dataY = dY[T - 1:N]
    dataX = np.zeros((N - T + 1, T, D), dtype=np.float32)

    for i in range(T, N + 1):
        dataX[i - T] = df[i - T:i, :]

    return dataX, dataY

def torch_data(x, y):
    x = torch.from_numpy(x)
    x = torch.unsqueeze(x, 1)
    y = torch.from_numpy(y)
    y = F.one_hot(y, num_classes=3)
    return x, y

In [None]:
class Dataset(data.Dataset):
    """Characterizes a dataset for PyTorch"""
    def __init__(self, data, k, num_classes, T):
        """Initialization""" 
        self.k = k
        self.num_classes = num_classes
        self.T = T
        
        x = prepare_x(data)
        y = get_label(data)
        x, y = data_classification(x, y, self.T)
        y = y[:,self.k] - 1

        self.length = len(x)
        x = torch.from_numpy(x)
        self.x = torch.unsqueeze(x, 1)
        self.y = torch.from_numpy(y)
        
    def __len__(self):
        """Denotes the total number of samples"""
        return self.length

    def __getitem__(self, index):
        """Generates samples of data"""
        return self.x[index], self.y[index]

In [None]:
# please change the data_path to your local path

dec_data = np.loadtxt('Train_Dst_NoAuction_DecPre_CF_7.txt')
dec_train = dec_data[:, :int(np.floor(dec_data.shape[1] * 0.8))]
dec_val = dec_data[:, int(np.floor(dec_data.shape[1] * 0.8)):]

dec_test1 = np.loadtxt('Test_Dst_NoAuction_DecPre_CF_7.txt')
dec_test2 = np.loadtxt('Test_Dst_NoAuction_DecPre_CF_8.txt')
dec_test3 = np.loadtxt('Test_Dst_NoAuction_DecPre_CF_9.txt')
dec_test = np.hstack((dec_test1, dec_test2, dec_test3))

print(dec_train.shape, dec_val.shape, dec_test.shape)

In [None]:
batch_size = 64

dataset_train = Dataset(data=dec_train, k=4, num_classes=3, T=100)
print("1")
dataset_val = Dataset(data=dec_val, k=4, num_classes=3, T=100)
print("2")
dataset_test = Dataset(data=dec_test, k=4, num_classes=3, T=100)
print("3")

train_loader = torch.utils.data.DataLoader(dataset=dataset_train, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(dataset=dataset_val, batch_size=batch_size, shuffle=False)
test_loader = torch.utils.data.DataLoader(dataset=dataset_test, batch_size=batch_size, shuffle=False)

print(dataset_train.x.shape, dataset_train.y.shape)

In [None]:
tmp_loader = torch.utils.data.DataLoader(dataset=dataset_train, batch_size=1, shuffle=True)

for x, y in tmp_loader:
    print(x)
    print(y)
    print(x.shape, y.shape)
    break

In [None]:
class BahdanauAttention(nn.Module):
  
    def __init__(self, hidden_dim):
        super().__init__()
        self.encoder_proj = nn.Linear(hidden_dim, hidden_dim)
        self.query_proj = nn.Linear(hidden_dim, hidden_dim)
        self.score = nn.Linear(hidden_dim, 1)

    def forward(self, encoder_outputs, query):
        query = query.unsqueeze(1)
        enc_proj = self.encoder_proj(encoder_outputs)  # => (B, seq_len, hidden_dim)
        query_proj = self.query_proj(query)            # => (B, 1, hidden_dim)

     
        energies = torch.tanh(enc_proj + query_proj)
        alignment = self.score(energies)   # => (B, seq_len, 1)

        attn_weights = F.softmax(alignment.squeeze(-1), dim=1)  # => (B, seq_len)

        context = torch.bmm(attn_weights.unsqueeze(1), encoder_outputs)
        context = context.squeeze(1)

        return context, attn_weights

class DeeplobAttention_OF(nn.Module):
    def __init__(self, y_len=3):
        """
        Args:
          y_len: number of output classes (e.g. 3 for FI-2010 3-label classification).
        """
        super().__init__()
        self.y_len = y_len

        # ========== 1) Convolution Blocks ==========
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=(1,2), stride=(1,2)),
            nn.LeakyReLU(negative_slope=0.01),
            nn.BatchNorm2d(32),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(4,1)),
            nn.LeakyReLU(negative_slope=0.01),
            nn.BatchNorm2d(32),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(4,1)),
            nn.LeakyReLU(negative_slope=0.01),
            nn.BatchNorm2d(32),
        )
        
        #since we use Order Flow data, the dimension is divided by 2 and we don't need the second CNN anymore

        self.conv3 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(1,10)),
            nn.LeakyReLU(negative_slope=0.01),
            nn.BatchNorm2d(32),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(4,1)),
            nn.LeakyReLU(negative_slope=0.01),
            nn.BatchNorm2d(32),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(4,1)),
            nn.LeakyReLU(negative_slope=0.01),
            nn.BatchNorm2d(32),
        )

        # ========== 2) Inception Modules ==========
        self.inp1 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(1,1), padding='same'),
            nn.LeakyReLU(negative_slope=0.01),
            nn.BatchNorm2d(64),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3,1), padding='same'),
            nn.LeakyReLU(negative_slope=0.01),
            nn.BatchNorm2d(64),
        )
        self.inp2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(1,1), padding='same'),
            nn.LeakyReLU(negative_slope=0.01),
            nn.BatchNorm2d(64),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(5,1), padding='same'),
            nn.LeakyReLU(negative_slope=0.01),
            nn.BatchNorm2d(64),
        )
        self.inp3 = nn.Sequential(
            nn.MaxPool2d((3,1), stride=(1,1), padding=(1,0)),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(1,1), padding='same'),
            nn.LeakyReLU(negative_slope=0.01),
            nn.BatchNorm2d(64),
        )

        # ========== 3) LSTM (Encoder) ==========
        # After inception, we have 64 + 64 + 64 = 192 channels
        self.lstm = nn.LSTM(input_size=192, hidden_size=64,
                            num_layers=1, batch_first=True)

        # ========== 4) Bahdanau Attention ==========
        self.attn = BahdanauAttention(hidden_dim=64)

        # ========== 5) Classification Head ==========
        self.fc1 = nn.Linear(64, self.y_len)

    def forward(self, x):
        """
        x: shape (batch_size, 1, k=100, features=some_value),
           typically (B, 1, 100, 40) for the FI-2010 data.
        Returns:
          forecast_y: (B, y_len)  -> (B, 3) for 3-class classification
        """

        x = self.conv1(x)  # => (B, 32, ...)
        # x = self.conv2(x)  # => (B, 32, ...)
        x = self.conv3(x)  # => (B, 32, T, 1)

        # -------------- Step 2: Inception --------------
        x_inp1 = self.inp1(x)  # => (B, 64, T, 1)
        x_inp2 = self.inp2(x)  # => (B, 64, T, 1)
        x_inp3 = self.inp3(x)  # => (B, 64, T, 1)

        x = torch.cat((x_inp1, x_inp2, x_inp3), dim=1)

        x = x.permute(0, 2, 1, 3)          # => (B, T, 192, 1)
        x = x.reshape(x.shape[0], x.shape[1], x.shape[2])  # => (B, T, 192)

        lstm_out, (h, c) = self.lstm(x)    # lstm_out => (B, T, 64)
        final_state = h.squeeze(0)  # => (B, 64)

        context, attn_weights = self.attn(lstm_out, final_state)
        out = self.fc1(context)           # => (B, y_len)
        forecast_y = F.softmax(out, dim=1)  # => (B, y_len)

        return forecast_y

In [None]:
model = DeeplobAttention_OF(y_len = dataset_train.num_classes)
model.to(device)


In [None]:
summary(model, (1, 1, 100, 20))

Layer (type:depth-idx)                   Output Shape              Param #
DeeplobAttention_v2                      [1, 3]                    --
├─Sequential: 1-1                        [1, 32, 94, 10]           --
│    └─Conv2d: 2-1                       [1, 32, 100, 10]          96
│    └─LeakyReLU: 2-2                    [1, 32, 100, 10]          --
│    └─BatchNorm2d: 2-3                  [1, 32, 100, 10]          64
│    └─Conv2d: 2-4                       [1, 32, 97, 10]           4,128
│    └─LeakyReLU: 2-5                    [1, 32, 97, 10]           --
│    └─BatchNorm2d: 2-6                  [1, 32, 97, 10]           64
│    └─Conv2d: 2-7                       [1, 32, 94, 10]           4,128
│    └─LeakyReLU: 2-8                    [1, 32, 94, 10]           --
│    └─BatchNorm2d: 2-9                  [1, 32, 94, 10]           64
├─Sequential: 1-2                        [1, 32, 88, 1]            --
│    └─Conv2d: 2-10                      [1, 32, 94, 1]            10,272
│    

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

## Model Training

In [None]:
# A function to encapsulate the training loop
def batch_gd(model, criterion, optimizer, train_loader, test_loader, epochs, patience=15):
    
    train_losses = np.zeros(epochs)
    test_losses = np.zeros(epochs)
    best_test_loss = np.inf
    best_test_epoch = 0
    no_improvement_duration = 0 # counter to stop after 10 epochs without improvement on the validation loss

    for it in tqdm(range(epochs)):
        
        model.train()
        t0 = datetime.now()
        train_loss = []
        for inputs, targets in train_loader:
            # move data to GPU
            inputs, targets = inputs.to(device, dtype=torch.float), targets.to(device, dtype=torch.int64)
            # print("inputs.shape:", inputs.shape)
            # zero the parameter gradients
            optimizer.zero_grad()
            # Forward pass
            # print("about to get model output")
            outputs = model(inputs)
            # print("done getting model output")
            # print("outputs.shape:", outputs.shape, "targets.shape:", targets.shape)
            loss = criterion(outputs, targets)
            # Backward and optimize
            # print("about to optimize")
            loss.backward()
            optimizer.step()
            train_loss.append(loss.item())
        # Get train loss and test loss
        train_loss = np.mean(train_loss) # a little misleading
    
        model.eval()
        test_loss = []
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device, dtype=torch.float), targets.to(device, dtype=torch.int64)      
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            test_loss.append(loss.item())
        test_loss = np.mean(test_loss)

        # Save losses
        train_losses[it] = train_loss
        test_losses[it] = test_loss
        
        if test_loss < best_test_loss:
            torch.save(model, './best_val_model_pytorch')
            best_test_loss = test_loss
            best_test_epoch = it
            no_improvement_duration = 0
            print('model saved')
        else:
            no_improvement_duration +=1

        dt = datetime.now() - t0
        print(f'Epoch {it+1}/{epochs}, Train Loss: {train_loss:.4f}, \
          Validation Loss: {test_loss:.4f}, Duration: {dt}, Best Val Epoch: {best_test_epoch}')
        
        if no_improvement_duration == patience:
            print(f"Validation Loss has not improved for {patience} epochs, the gradient descent will be stopped.")
            break

    return train_losses, test_losses

In [None]:
train_losses, val_losses = batch_gd(model, criterion, optimizer, 
                                    train_loader, val_loader, epochs=70)

  1%|▏         | 1/70 [00:55<1:03:25, 55.16s/it]

model saved
Epoch 1/70, Train Loss: 0.7801,           Validation Loss: 0.7823, Duration: 0:00:55.157618, Best Val Epoch: 0


  3%|▎         | 2/70 [01:49<1:01:40, 54.42s/it]

model saved
Epoch 2/70, Train Loss: 0.7064,           Validation Loss: 0.7624, Duration: 0:00:53.910536, Best Val Epoch: 1


  4%|▍         | 3/70 [02:39<58:53, 52.73s/it]  

Epoch 3/70, Train Loss: 0.6973,           Validation Loss: 0.7662, Duration: 0:00:50.717157, Best Val Epoch: 1


  6%|▌         | 4/70 [03:30<57:11, 51.99s/it]

model saved
Epoch 4/70, Train Loss: 0.6906,           Validation Loss: 0.7572, Duration: 0:00:50.847256, Best Val Epoch: 3


  7%|▋         | 5/70 [04:21<55:55, 51.62s/it]

Epoch 5/70, Train Loss: 0.6859,           Validation Loss: 0.7740, Duration: 0:00:50.958081, Best Val Epoch: 3


  9%|▊         | 6/70 [05:13<55:11, 51.74s/it]

Epoch 6/70, Train Loss: 0.6819,           Validation Loss: 0.7598, Duration: 0:00:51.988967, Best Val Epoch: 3


 10%|█         | 7/70 [06:06<54:36, 52.01s/it]

model saved
Epoch 7/70, Train Loss: 0.6786,           Validation Loss: 0.7476, Duration: 0:00:52.570531, Best Val Epoch: 6


 11%|█▏        | 8/70 [07:03<55:20, 53.56s/it]

model saved
Epoch 8/70, Train Loss: 0.6746,           Validation Loss: 0.7345, Duration: 0:00:56.871339, Best Val Epoch: 7


 13%|█▎        | 9/70 [07:58<55:00, 54.11s/it]

Epoch 9/70, Train Loss: 0.6722,           Validation Loss: 0.7352, Duration: 0:00:55.327257, Best Val Epoch: 7


 14%|█▍        | 10/70 [08:51<53:47, 53.79s/it]

model saved
Epoch 10/70, Train Loss: 0.6689,           Validation Loss: 0.7291, Duration: 0:00:53.073415, Best Val Epoch: 9


 16%|█▌        | 11/70 [09:44<52:44, 53.64s/it]

Epoch 11/70, Train Loss: 0.6662,           Validation Loss: 0.7331, Duration: 0:00:53.305852, Best Val Epoch: 9


 17%|█▋        | 12/70 [10:39<52:05, 53.90s/it]

Epoch 12/70, Train Loss: 0.6641,           Validation Loss: 0.7423, Duration: 0:00:54.469354, Best Val Epoch: 9


 19%|█▊        | 13/70 [11:35<51:56, 54.68s/it]

model saved
Epoch 13/70, Train Loss: 0.6618,           Validation Loss: 0.7255, Duration: 0:00:56.485140, Best Val Epoch: 12


 20%|██        | 14/70 [12:32<51:45, 55.46s/it]

Epoch 14/70, Train Loss: 0.6595,           Validation Loss: 0.7255, Duration: 0:00:57.262325, Best Val Epoch: 12


 21%|██▏       | 15/70 [13:25<49:59, 54.54s/it]

model saved
Epoch 15/70, Train Loss: 0.6575,           Validation Loss: 0.7246, Duration: 0:00:52.412612, Best Val Epoch: 14


 23%|██▎       | 16/70 [14:16<48:16, 53.64s/it]

Epoch 16/70, Train Loss: 0.6559,           Validation Loss: 0.7303, Duration: 0:00:51.548114, Best Val Epoch: 14


 24%|██▍       | 17/70 [15:09<47:04, 53.29s/it]

Epoch 17/70, Train Loss: 0.6535,           Validation Loss: 0.7389, Duration: 0:00:52.486434, Best Val Epoch: 14


 26%|██▌       | 18/70 [16:01<45:50, 52.90s/it]

Epoch 18/70, Train Loss: 0.6524,           Validation Loss: 0.7252, Duration: 0:00:51.980982, Best Val Epoch: 14


 27%|██▋       | 19/70 [16:53<44:45, 52.66s/it]

Epoch 19/70, Train Loss: 0.6511,           Validation Loss: 0.7266, Duration: 0:00:52.094582, Best Val Epoch: 14


 29%|██▊       | 20/70 [17:47<44:11, 53.04s/it]

Epoch 20/70, Train Loss: 0.6500,           Validation Loss: 0.7327, Duration: 0:00:53.917796, Best Val Epoch: 14


 30%|███       | 21/70 [18:39<43:08, 52.82s/it]

Epoch 21/70, Train Loss: 0.6482,           Validation Loss: 0.7505, Duration: 0:00:52.325412, Best Val Epoch: 14


 31%|███▏      | 22/70 [19:31<42:07, 52.65s/it]

Epoch 22/70, Train Loss: 0.6471,           Validation Loss: 0.7289, Duration: 0:00:52.248803, Best Val Epoch: 14


 33%|███▎      | 23/70 [20:29<42:25, 54.16s/it]

Epoch 23/70, Train Loss: 0.6463,           Validation Loss: 0.7294, Duration: 0:00:57.667650, Best Val Epoch: 14


 34%|███▍      | 24/70 [21:26<42:11, 55.04s/it]

Epoch 24/70, Train Loss: 0.6453,           Validation Loss: 0.7273, Duration: 0:00:57.098294, Best Val Epoch: 14


 36%|███▌      | 25/70 [22:24<41:50, 55.78s/it]

Epoch 25/70, Train Loss: 0.6443,           Validation Loss: 0.7350, Duration: 0:00:57.509194, Best Val Epoch: 14


 37%|███▋      | 26/70 [23:23<41:43, 56.90s/it]

Epoch 26/70, Train Loss: 0.6432,           Validation Loss: 0.7272, Duration: 0:00:59.509066, Best Val Epoch: 14


 39%|███▊      | 27/70 [24:20<40:49, 56.97s/it]

Epoch 27/70, Train Loss: 0.6423,           Validation Loss: 0.7279, Duration: 0:00:57.145925, Best Val Epoch: 14


 40%|████      | 28/70 [25:17<39:47, 56.85s/it]

Epoch 28/70, Train Loss: 0.6415,           Validation Loss: 0.7278, Duration: 0:00:56.571864, Best Val Epoch: 14


 41%|████▏     | 29/70 [26:14<38:56, 56.98s/it]

Epoch 29/70, Train Loss: 0.6407,           Validation Loss: 0.7286, Duration: 0:00:57.285839, Best Val Epoch: 14


 41%|████▏     | 29/70 [27:11<38:26, 56.26s/it]

Epoch 30/70, Train Loss: 0.6404,           Validation Loss: 0.7291, Duration: 0:00:56.733904, Best Val Epoch: 14
Validation Loss has not improved for 15 epochs, the gradient descent will be stopped.





In [None]:
model = torch.load('best_val_model_pytorch')

n_correct = 0.
n_total = 0.
for inputs, targets in test_loader:
    # Move to GPU
    inputs, targets = inputs.to(device, dtype=torch.float), targets.to(device, dtype=torch.int64)

    # Forward pass
    outputs = model(inputs)
    
    # Get prediction
    # torch.max returns both max and argmax
    _, predictions = torch.max(outputs, 1)

    # update counts
    n_correct += (predictions == targets).sum().item()
    n_total += targets.shape[0]

test_acc = n_correct / n_total
print(f"Test acc: {test_acc:.4f}")

  model = torch.load('best_val_model_pytorch')


Test acc: 0.8537


In [None]:
# model = torch.load('best_val_model_pytorch')
all_targets = []
all_predictions = []

for inputs, targets in test_loader:
    # Move to GPU
    inputs, targets = inputs.to(device, dtype=torch.float), targets.to(device, dtype=torch.int64)

    # Forward pass
    outputs = model(inputs)
    
    # Get prediction
    # torch.max returns both max and argmax
    _, predictions = torch.max(outputs, 1)

    all_targets.append(targets.cpu().numpy())
    all_predictions.append(predictions.cpu().numpy())

all_targets = np.concatenate(all_targets)    
all_predictions = np.concatenate(all_predictions)    

In [None]:
print('accuracy_score:', accuracy_score(all_targets, all_predictions))
print(classification_report(all_targets, all_predictions, digits=4))

accuracy_score: 0.853707845836201
              precision    recall  f1-score   support

           0     0.8497    0.8739    0.8616     47915
           1     0.8736    0.8265    0.8494     48050
           2     0.8380    0.8615    0.8496     43523

    accuracy                         0.8537    139488
   macro avg     0.8537    0.8540    0.8535    139488
weighted avg     0.8543    0.8537    0.8536    139488

