In [2]:
import numpy as np

import torch
import torch.nn as nn
from transformers import AutoTokenizer

from torch.utils.data import DataLoader
from datasets import load_dataset

from sklearn.metrics import accuracy_score, f1_score

from tqdm.notebook import tqdm
import random

In [3]:
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [5]:
!nvidia-smi

Tue Jul 29 20:29:19 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 576.80                 Driver Version: 576.80         CUDA Version: 12.9     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3060 ...  WDDM  |   00000000:01:00.0 Off |                  N/A |
| N/A   52C    P8             16W /   95W |    4372MiB /   6144MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [6]:
max_length = 256
batch_size = 64
embed_dim = 300
hidden_dim = 512
num_classes = 3
epochs = 5
lr = 0.0002

In [7]:
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

In [8]:
dataset = load_dataset('multi_nli')

In [9]:
def filter_labels(example):
    return example['label'] != -1

dataset['validation_matched'] = dataset['validation_matched'].filter(filter_labels)

In [10]:
def tokenize_fn(example):
    
    enc = tokenizer(
        example['premise'],
        example['hypothesis'],
        padding = 'max_length',
        truncation = True,
        max_length = max_length
    )
    return {
        'input_ids' : enc['input_ids'],
        'label' : example['label']
    }

In [11]:
dataset = dataset.map(tokenize_fn, batched = False)
dataset.set_format(type = 'torch', columns = ['input_ids', 'label'])

In [12]:
train_loader = DataLoader(dataset['train'], batch_size = batch_size, shuffle = True)
val_loader = DataLoader(dataset['validation_matched'], batch_size = batch_size)

In [13]:
class LSTM(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, output_dim):
        
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first = True, bidirectional = True)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        
        x = self.embedding(x)
        lstm_out, (h_n, c_n) = self.lstm(x)
        h_final = torch.cat((h_n[0], h_n[1]), dim = 1)
        x = self.dropout(h_final)
        
        return self.fc(x)

In [14]:
model = LSTM(
    vocab_size = tokenizer.vocab_size,
    embed_dim = embed_dim,
    hidden_dim = hidden_dim,
    output_dim = num_classes
).to(device)

In [16]:
model

LSTM(
  (embedding): Embedding(30522, 300)
  (lstm): LSTM(300, 512, batch_first=True, bidirectional=True)
  (fc): Linear(in_features=1024, out_features=3, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)

In [17]:
optimizer = torch.optim.Adam(model.parameters(), lr = lr)
criterion = nn.CrossEntropyLoss()

In [18]:
def train(model, train_loader, optimizer, criterion, device):
    
    model.train()
    total_loss = 0
    
    for batch in train_loader:
        input_ids = batch['input_ids'].to(device)
        labels = batch['label'].to(device)

        optimizer.zero_grad()
        outputs = model(input_ids)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    return total_loss / len(train_loader)

In [19]:
def evaluate(model, val_loader, device):
    
    model.eval()
    preds, true = [], []
    
    with torch.no_grad():
        for batch in val_loader:
            
            input_ids = batch['input_ids'].to(device)
            labels = batch['label'].to(device)
            outputs = model(input_ids)
            predictions = torch.argmax(outputs, dim = 1)
            preds.extend(predictions.cpu().numpy())
            true.extend(labels.cpu().numpy())

    acc = accuracy_score(true, preds)
    f1_weighted = f1_score(true, preds, average = 'weighted')
    f1_macro = f1_score(true, preds, average = 'macro')
    
    return acc, f1_weighted, f1_macro

In [21]:
for epoch in range(epochs):
    
    train_loss = train(model, train_loader, optimizer, criterion, device)
    val_acc, f1_w, f1_m = evaluate(model, val_loader, device)

    print(f'Epoch {epoch+1} - Train Loss: {train_loss:.4f} | Val Accuracy: {val_acc:.4f} | Weighted F1: {f1_w:.4f} | Macro F1: {f1_m:.4f}')

Epoch 1 - Train Loss: 0.8981 | Val Accuracy: 0.5884 | Weighted F1: 0.5857 | Macro F1: 0.5841
Epoch 2 - Train Loss: 0.8451 | Val Accuracy: 0.6003 | Weighted F1: 0.6007 | Macro F1: 0.5996
Epoch 3 - Train Loss: 0.7941 | Val Accuracy: 0.6137 | Weighted F1: 0.6118 | Macro F1: 0.6105
Epoch 4 - Train Loss: 0.7382 | Val Accuracy: 0.6167 | Weighted F1: 0.6151 | Macro F1: 0.6139
Epoch 5 - Train Loss: 0.6742 | Val Accuracy: 0.6103 | Weighted F1: 0.6091 | Macro F1: 0.6079


In [22]:
torch.save(model.state_dict(), 'bi_lstm_model.pt')