In [1]:
!pip install datasets
!pip install nltk

Collecting datasets
  Downloading datasets-3.0.2-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Downloading datasets-3.0.2-py3-none-any.whl (472 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m472.7/472.7 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading multiprocess-0.70.16-py310-none-any.whl (134 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading x

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
import pickle
from datasets import load_dataset

In [59]:
# Load the Rotten Tomatoes dataset
dataset = load_dataset("rotten_tomatoes")
train_dataset = dataset['train']
valid_dataset = dataset['validation']
test_dataset = dataset['test']

train_text = train_dataset.to_pandas()['text']

max_text_len = 0
for text in train_text:
    max_text_len = max(max_text_len, len(text))

print(max_text_len)


267


In [60]:
# Load the embedding matrix and vocab from files
with open('../embedding_matrix_300d.pkl', 'rb') as f:
    embedding_matrix = pickle.load(f).astype(np.float32)
    padding = [0 for i in range(300)]
    embedding_matrix = np.insert(embedding_matrix, 0, padding, 0)
    print(type(embedding_matrix))

with open('../vocab_word_to_index_300d.pkl', 'rb') as f:
    vocab_word_to_index = pickle.load(f)
    print(type(vocab_word_to_index))

# Convert to torch tensors
embedding_matrix = torch.tensor(embedding_matrix)
vocab_size, embedding_dim = embedding_matrix.shape
#print(embedding_matrix[0])

<class 'numpy.ndarray'>
<class 'dict'>


In [16]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [61]:
import numpy as np
import torch
import nltk
from torch.utils.data import TensorDataset, DataLoader

class SentimentDataset:
    def __init__(self, dataset, word_to_index, max_len=30):
        self.dataset = dataset
        self.word_to_index = word_to_index
        self.max_len = max_len

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        text = self.dataset[idx]['text']
        label = self.dataset[idx]['label']

        # Tokenization and word-to-index conversion
        text = text.lower()
        word_list = nltk.tokenize.word_tokenize(text)
        word_list = [word.strip("'\"") for word in word_list]
        indices = [self.word_to_index.get(word, self.word_to_index.get('<UNK>')) + 1 for word in word_list]
        indices = indices[:self.max_len] + [0] * (self.max_len - len(indices))  # Padding

        return np.array(indices), np.array(label)

    def preprocess_data(self):
        texts = []
        labels = []
        for i in range(len(self.dataset)):
            features, label = self.__getitem__(i)
            texts.append(features)
            labels.append(label)
        return np.array(texts), np.array(labels)

train_texts, train_labels = SentimentDataset(train_dataset, vocab_word_to_index, max_len=30).preprocess_data()
valid_texts, valid_labels = SentimentDataset(valid_dataset, vocab_word_to_index, max_len=30).preprocess_data()
test_texts, test_labels = SentimentDataset(test_dataset, vocab_word_to_index, max_len=30).preprocess_data()

# Convert preprocessed arrays to PyTorch tensors
train_texts = torch.tensor(train_texts)
train_labels = torch.tensor(train_labels)
valid_texts = torch.tensor(valid_texts)
valid_labels = torch.tensor(valid_labels)
test_texts = torch.tensor(test_texts)
test_labels = torch.tensor(test_labels)

train_dataset = TensorDataset(train_texts, train_labels)
valid_dataset = TensorDataset(valid_texts, valid_labels)
test_dataset = TensorDataset(test_texts, test_labels)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=128, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)


# Vanilla RNN - Max Pooling

In [19]:
## Max Pooling##
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class SentimentRNN_MaxPool(nn.Module):
    def __init__(self, embedding_matrix, hidden_dim, output_dim, num_layers = 1, freeze_embeddings=True, dropout = 0.5):
        super(SentimentRNN_MaxPool, self).__init__()
        self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=freeze_embeddings)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, num_layers, batch_first=True, device= device)
        self.fc = nn.Linear(hidden_dim, output_dim, device= device)
        self.sigmoid = nn.Sigmoid()
        self.device = device


    def forward(self, x):
        embedded = self.embedding(x)
        out, hidden = self.rnn(embedded)
        out = out.max(dim=1).values
        #out = self.dropout(out[:, -1, :])
        out = self.fc(out)
        out = self.sigmoid(out)
        return out

# Model hyperparameters
hidden_dim = 128
output_dim = 1  # Binary classification (positive, negative)

model_maxp = SentimentRNN_MaxPool(embedding_matrix, hidden_dim, output_dim, 1)
#print(model.embedding.weight[1])

In [20]:
# Training function
def train_model(model, train_loader, valid_loader, epochs=30):
    global best_metric, no_improvement_count #variables for early stopping
    for epoch in range(epochs):
        # Set model to training mode
        model.train()
        total_train_loss = 0
        total_train_correct = 0
        total_train_samples = 0

        for texts, labels in train_loader:
            texts, labels = texts.to(device), labels.to(device)

            optimizer.zero_grad()  # Reset gradients

            # Forward pass: get predictions
            predictions = model(texts)

            # Compute the loss
            loss = criterion(predictions, labels.unsqueeze(1).float())
            total_train_loss += loss.item()

            # Backpropagation and optimization
            loss.backward()
            optimizer.step()

            # Accuracy calculation
            predicted_labels = (predictions > 0.5).int()
            total_train_correct += (predicted_labels.squeeze() == labels).sum().item()

            #print(total_train_correct)
            total_train_samples += labels.size(0)

        # Calculate and print average training accuracy and loss per epoch
        train_accuracy = total_train_correct / total_train_samples
        train_loss = total_train_loss / len(train_loader)

        # Evaluate model on validation set
        valid_accuracy = evaluate_model(model, valid_loader)

        print(f'Epoch {epoch+1} | Train Loss: {train_loss:.4f} | Train Accuracy: {train_accuracy:.4f} | Validation Accuracy: {valid_accuracy:.4f}')

        # Early stopping check
        if best_metric is None or valid_accuracy > best_metric:
            best_metric = valid_accuracy
            no_improvement_count = 0  # Reset counter
            torch.save(model.state_dict(), 'best_max_model.pt')  # Save best model state
        else:
            no_improvement_count += 1  # Increment counter if no improvement

        if no_improvement_count >= patience:
            print(f"Early stopping triggered after {epoch+1} epochs")
            break  # Exit training loop if no improvement for `patience` epochs

def evaluate_model(model, loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for texts, labels in loader:
            texts, labels = texts.to(device), labels.to(device)
            predictions = model(texts)
            predicted_labels = (predictions >= 0.5).int()
            correct += (predicted_labels.squeeze() == labels).sum().item()
            total += labels.size(0)
    return correct / total

In [21]:
optimizer = optim.Adam(model_maxp.parameters(), lr=0.0001)
criterion = nn.BCELoss()

model_maxp.to(device)

patience = 5
best_metric = None
no_improvement_count = 0

In [22]:
import random
random.seed(42)

np.random.seed(42)

torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.cuda.manual_seed_all(42)

# Ensure deterministic behavior for cuDNN (CUDA)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [23]:
train_model(model_maxp, train_loader, valid_loader, epochs=30)

Epoch 1 | Train Loss: 0.6888 | Train Accuracy: 0.5489 | Validation Accuracy: 0.6313
Epoch 2 | Train Loss: 0.6678 | Train Accuracy: 0.6613 | Validation Accuracy: 0.6942
Epoch 3 | Train Loss: 0.6100 | Train Accuracy: 0.6892 | Validation Accuracy: 0.6886
Epoch 4 | Train Loss: 0.5685 | Train Accuracy: 0.7127 | Validation Accuracy: 0.7111
Epoch 5 | Train Loss: 0.5418 | Train Accuracy: 0.7313 | Validation Accuracy: 0.7308
Epoch 6 | Train Loss: 0.5239 | Train Accuracy: 0.7420 | Validation Accuracy: 0.7439
Epoch 7 | Train Loss: 0.5111 | Train Accuracy: 0.7512 | Validation Accuracy: 0.7467
Epoch 8 | Train Loss: 0.5004 | Train Accuracy: 0.7544 | Validation Accuracy: 0.7523
Epoch 9 | Train Loss: 0.4918 | Train Accuracy: 0.7623 | Validation Accuracy: 0.7411
Epoch 10 | Train Loss: 0.4890 | Train Accuracy: 0.7644 | Validation Accuracy: 0.7439
Epoch 11 | Train Loss: 0.4851 | Train Accuracy: 0.7675 | Validation Accuracy: 0.7523
Epoch 12 | Train Loss: 0.4816 | Train Accuracy: 0.7680 | Validation Accura

In [35]:
# Load the best model
model_maxp.load_state_dict(torch.load('best_max_model.pt'))

# Evaluate the model on the test set
test_acc = evaluate_model(model_maxp, test_loader)
print(f'Test Accuracy: {test_acc:.4f}')

Test Accuracy: 0.7486


  model_maxp.load_state_dict(torch.load('best_max_model.pt'))


# Vanilla RNN - Mean Pooling

In [28]:
## Max Pooling##
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class SentimentRNN_MeanPool(nn.Module):
    def __init__(self, embedding_matrix, hidden_dim, output_dim, num_layers = 1, freeze_embeddings=True, dropout = 0.5):
        super(SentimentRNN_MeanPool, self).__init__()
        self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=freeze_embeddings)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, num_layers, batch_first=True, device= device)
        self.fc = nn.Linear(hidden_dim, output_dim, device= device)
        self.sigmoid = nn.Sigmoid()
        self.device = device


    def forward(self, x):
        embedded = self.embedding(x)
        out, hidden = self.rnn(embedded)
        out = out.mean(dim=1)
        #out = self.dropout(out[:, -1, :])
        out = self.fc(out)
        out = self.sigmoid(out)
        return out

# Model hyperparameters
hidden_dim = 128
output_dim = 1  # Binary classification (positive, negative)

model_meanp = SentimentRNN_MeanPool(embedding_matrix, hidden_dim, output_dim, 1)
#print(model.embedding.weight[1])

In [29]:
# Training function
def train_model(model, train_loader, valid_loader, epochs=30):
    global best_metric, no_improvement_count #variables for early stopping
    for epoch in range(epochs):
        # Set model to training mode
        model.train()
        total_train_loss = 0
        total_train_correct = 0
        total_train_samples = 0

        for texts, labels in train_loader:
            texts, labels = texts.to(device), labels.to(device)

            optimizer.zero_grad()  # Reset gradients

            # Forward pass: get predictions
            predictions = model(texts)

            # Compute the loss
            loss = criterion(predictions, labels.unsqueeze(1).float())
            total_train_loss += loss.item()

            # Backpropagation and optimization
            loss.backward()
            optimizer.step()

            # Accuracy calculation
            predicted_labels = (predictions > 0.5).int()
            total_train_correct += (predicted_labels.squeeze() == labels).sum().item()

            #print(total_train_correct)
            total_train_samples += labels.size(0)

        # Calculate and print average training accuracy and loss per epoch
        train_accuracy = total_train_correct / total_train_samples
        train_loss = total_train_loss / len(train_loader)

        # Evaluate model on validation set
        valid_accuracy = evaluate_model(model, valid_loader)

        print(f'Epoch {epoch+1} | Train Loss: {train_loss:.4f} | Train Accuracy: {train_accuracy:.4f} | Validation Accuracy: {valid_accuracy:.4f}')

        # Early stopping check
        if best_metric is None or valid_accuracy > best_metric:
            best_metric = valid_accuracy
            no_improvement_count = 0  # Reset counter
            torch.save(model.state_dict(), 'best_mean_model.pt')  # Save best model state
        else:
            no_improvement_count += 1  # Increment counter if no improvement

        if no_improvement_count >= patience:
            print(f"Early stopping triggered after {epoch+1} epochs")
            break  # Exit training loop if no improvement for `patience` epochs

def evaluate_model(model, loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for texts, labels in loader:
            texts, labels = texts.to(device), labels.to(device)
            predictions = model(texts)
            predicted_labels = (predictions >= 0.5).int()
            correct += (predicted_labels.squeeze() == labels).sum().item()
            total += labels.size(0)
    return correct / total

In [30]:
optimizer = optim.Adam(model_meanp.parameters(), lr=0.0001)
criterion = nn.BCELoss()

model_meanp.to(device)

patience = 5
best_metric = None
no_improvement_count = 0

In [31]:
import random
random.seed(42)

np.random.seed(42)

torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.cuda.manual_seed_all(42)

# Ensure deterministic behavior for cuDNN (CUDA)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [32]:
train_model(model_meanp, train_loader, valid_loader, epochs=30)

Epoch 1 | Train Loss: 0.6864 | Train Accuracy: 0.5994 | Validation Accuracy: 0.6886
Epoch 2 | Train Loss: 0.6143 | Train Accuracy: 0.6819 | Validation Accuracy: 0.7045
Epoch 3 | Train Loss: 0.5549 | Train Accuracy: 0.7177 | Validation Accuracy: 0.7195
Epoch 4 | Train Loss: 0.5342 | Train Accuracy: 0.7332 | Validation Accuracy: 0.7223
Epoch 5 | Train Loss: 0.5200 | Train Accuracy: 0.7433 | Validation Accuracy: 0.7280
Epoch 6 | Train Loss: 0.5101 | Train Accuracy: 0.7503 | Validation Accuracy: 0.7383
Epoch 7 | Train Loss: 0.5046 | Train Accuracy: 0.7546 | Validation Accuracy: 0.7326
Epoch 8 | Train Loss: 0.5008 | Train Accuracy: 0.7526 | Validation Accuracy: 0.7411
Epoch 9 | Train Loss: 0.4947 | Train Accuracy: 0.7571 | Validation Accuracy: 0.7345
Epoch 10 | Train Loss: 0.4953 | Train Accuracy: 0.7620 | Validation Accuracy: 0.7270
Epoch 11 | Train Loss: 0.4916 | Train Accuracy: 0.7608 | Validation Accuracy: 0.7486
Epoch 12 | Train Loss: 0.4906 | Train Accuracy: 0.7598 | Validation Accura

In [34]:
# Load the best model
model_maxp.load_state_dict(torch.load('best_mean_model.pt'))

# Evaluate the model on the test set
test_acc = evaluate_model(model_maxp, test_loader)
print(f'Test Accuracy: {test_acc:.4f}')

Test Accuracy: 0.7411


  model_maxp.load_state_dict(torch.load('best_mean_model.pt'))


# Vanilla RNN - Concatenation Pooling

In [53]:
## Mixed Pooling##
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class SentimentRNN_MixedPool(nn.Module):
    def __init__(self, embedding_matrix, hidden_dim, output_dim, num_layers = 1, freeze_embeddings=True, dropout = 0.5):
        super(SentimentRNN_MixedPool, self).__init__()
        self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=freeze_embeddings)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, num_layers, batch_first=True, device= device)
        self.fc = nn.Linear(3*hidden_dim, output_dim, device= device)
        self.sigmoid = nn.Sigmoid()
        self.device = device


    def forward(self, x):
        embedded = self.embedding(x)
        out, hidden = self.rnn(embedded)

        last_hidden = out[:, -1, :]  #last hidden state
        mean_pooled = out.mean(dim=1) #max pooling
        max_pooled = out.max(dim=1).values #average pooling

        out = torch.cat([last_hidden, mean_pooled, max_pooled], dim=1)
        #out = self.dropout(out[:, -1, :])
        out = self.fc(out)
        out = self.sigmoid(out)
        return out

# Model hyperparameters
hidden_dim = 128
output_dim = 1  # Binary classification (positive, negative)

model_mixedp = SentimentRNN_MixedPool(embedding_matrix, hidden_dim, output_dim, 1)
#print(model.embedding.weight[1])

In [54]:
# Training function
def train_model(model, train_loader, valid_loader, epochs=30):
    global best_metric, no_improvement_count #variables for early stopping
    for epoch in range(epochs):
        # Set model to training mode
        model.train()
        total_train_loss = 0
        total_train_correct = 0
        total_train_samples = 0

        for texts, labels in train_loader:
            texts, labels = texts.to(device), labels.to(device)

            optimizer.zero_grad()  # Reset gradients

            # Forward pass: get predictions
            predictions = model(texts)

            # Compute the loss
            loss = criterion(predictions, labels.unsqueeze(1).float())
            total_train_loss += loss.item()

            # Backpropagation and optimization
            loss.backward()
            optimizer.step()

            # Accuracy calculation
            predicted_labels = (predictions > 0.5).int()
            total_train_correct += (predicted_labels.squeeze() == labels).sum().item()

            #print(total_train_correct)
            total_train_samples += labels.size(0)

        # Calculate and print average training accuracy and loss per epoch
        train_accuracy = total_train_correct / total_train_samples
        train_loss = total_train_loss / len(train_loader)

        # Evaluate model on validation set
        valid_accuracy = evaluate_model(model, valid_loader)

        print(f'Epoch {epoch+1} | Train Loss: {train_loss:.4f} | Train Accuracy: {train_accuracy:.4f} | Validation Accuracy: {valid_accuracy:.4f}')

        # Early stopping check
        if best_metric is None or valid_accuracy > best_metric:
            best_metric = valid_accuracy
            no_improvement_count = 0  # Reset counter
            torch.save(model.state_dict(), 'best_mixed_model.pt')  # Save best model state
        else:
            no_improvement_count += 1  # Increment counter if no improvement

        if no_improvement_count >= patience:
            print(f"Early stopping triggered after {epoch+1} epochs")
            break  # Exit training loop if no improvement for `patience` epochs

def evaluate_model(model, loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for texts, labels in loader:
            texts, labels = texts.to(device), labels.to(device)
            predictions = model(texts)
            predicted_labels = (predictions >= 0.5).int()
            correct += (predicted_labels.squeeze() == labels).sum().item()
            total += labels.size(0)
    return correct / total

In [55]:
optimizer = optim.Adam(model_mixedp.parameters(), lr=0.0001)
criterion = nn.BCELoss()

model_mixedp.to(device)

patience = 5
best_metric = None
no_improvement_count = 0

In [56]:
import random
random.seed(42)

np.random.seed(42)

torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.cuda.manual_seed_all(42)

# Ensure deterministic behavior for cuDNN (CUDA)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [57]:
train_model(model_mixedp, train_loader, valid_loader, epochs=30)

Epoch 1 | Train Loss: 0.6902 | Train Accuracy: 0.5579 | Validation Accuracy: 0.6088
Epoch 2 | Train Loss: 0.6707 | Train Accuracy: 0.6797 | Validation Accuracy: 0.6876
Epoch 3 | Train Loss: 0.5833 | Train Accuracy: 0.7019 | Validation Accuracy: 0.7148
Epoch 4 | Train Loss: 0.5300 | Train Accuracy: 0.7389 | Validation Accuracy: 0.7223
Epoch 5 | Train Loss: 0.5086 | Train Accuracy: 0.7518 | Validation Accuracy: 0.7467
Epoch 6 | Train Loss: 0.4972 | Train Accuracy: 0.7597 | Validation Accuracy: 0.7392
Epoch 7 | Train Loss: 0.4907 | Train Accuracy: 0.7627 | Validation Accuracy: 0.7336
Epoch 8 | Train Loss: 0.4870 | Train Accuracy: 0.7638 | Validation Accuracy: 0.7533
Epoch 9 | Train Loss: 0.4812 | Train Accuracy: 0.7695 | Validation Accuracy: 0.7439
Epoch 10 | Train Loss: 0.4799 | Train Accuracy: 0.7715 | Validation Accuracy: 0.7514
Epoch 11 | Train Loss: 0.4794 | Train Accuracy: 0.7716 | Validation Accuracy: 0.7533
Epoch 12 | Train Loss: 0.4782 | Train Accuracy: 0.7721 | Validation Accura

In [58]:
# Load the best model
model_mixedp.load_state_dict(torch.load('best_mixed_model.pt'))

# Evaluate the model on the test set
test_acc = evaluate_model(model_mixedp, test_loader)
print(f'Test Accuracy: {test_acc:.4f}')

Test Accuracy: 0.7505


  model_mixedp.load_state_dict(torch.load('best_mixed_model.pt'))


# Vanilla RNN - Attention Layer

In [62]:
## Mixed Pooling##
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class SentimentRNN_Attention(nn.Module):
    def __init__(self, embedding_matrix, hidden_dim, output_dim, num_layers = 1, freeze_embeddings=True, dropout = 0.5):
        super(SentimentRNN_Attention, self).__init__()
        self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=freeze_embeddings)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, num_layers, batch_first=True, device= device)
        self.attention = nn.Linear(hidden_dim, 1, bias=False)
        self.fc = nn.Linear(hidden_dim, output_dim, device= device)
        self.sigmoid = nn.Sigmoid()
        self.device = device


    def forward(self, x):
        embedded = self.embedding(x)
        out, hidden = self.rnn(embedded)
        attention_weights = torch.softmax(self.attention(out), dim=1)
        out = torch.sum(out * attention_weights, dim=1)

        out = self.fc(out)
        out = self.sigmoid(out)
        return out

# Model hyperparameters
hidden_dim = 128
output_dim = 1  # Binary classification (positive, negative)

model_attention= SentimentRNN_Attention(embedding_matrix, hidden_dim, output_dim, 1)
#print(model.embedding.weight[1])

In [63]:
# Training function
def train_model(model, train_loader, valid_loader, epochs=30):
    global best_metric, no_improvement_count #variables for early stopping
    for epoch in range(epochs):
        # Set model to training mode
        model.train()
        total_train_loss = 0
        total_train_correct = 0
        total_train_samples = 0

        for texts, labels in train_loader:
            texts, labels = texts.to(device), labels.to(device)

            optimizer.zero_grad()  # Reset gradients

            # Forward pass: get predictions
            predictions = model(texts)

            # Compute the loss
            loss = criterion(predictions, labels.unsqueeze(1).float())
            total_train_loss += loss.item()

            # Backpropagation and optimization
            loss.backward()
            optimizer.step()

            # Accuracy calculation
            predicted_labels = (predictions > 0.5).int()
            total_train_correct += (predicted_labels.squeeze() == labels).sum().item()

            #print(total_train_correct)
            total_train_samples += labels.size(0)

        # Calculate and print average training accuracy and loss per epoch
        train_accuracy = total_train_correct / total_train_samples
        train_loss = total_train_loss / len(train_loader)

        # Evaluate model on validation set
        valid_accuracy = evaluate_model(model, valid_loader)

        print(f'Epoch {epoch+1} | Train Loss: {train_loss:.4f} | Train Accuracy: {train_accuracy:.4f} | Validation Accuracy: {valid_accuracy:.4f}')

        # Early stopping check
        if best_metric is None or valid_accuracy > best_metric:
            best_metric = valid_accuracy
            no_improvement_count = 0  # Reset counter
            torch.save(model.state_dict(), 'best_attention_model.pt')  # Save best model state
        else:
            no_improvement_count += 1  # Increment counter if no improvement

        if no_improvement_count >= patience:
            print(f"Early stopping triggered after {epoch+1} epochs")
            break  # Exit training loop if no improvement for `patience` epochs

def evaluate_model(model, loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for texts, labels in loader:
            texts, labels = texts.to(device), labels.to(device)
            predictions = model(texts)
            predicted_labels = (predictions >= 0.5).int()
            correct += (predicted_labels.squeeze() == labels).sum().item()
            total += labels.size(0)
    return correct / total

In [64]:
optimizer = optim.Adam(model_attention.parameters(), lr=0.0001)
criterion = nn.BCELoss()

model_attention.to(device)

patience = 5
best_metric = None
no_improvement_count = 0

In [65]:
import random
random.seed(42)

np.random.seed(42)

torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.cuda.manual_seed_all(42)

# Ensure deterministic behavior for cuDNN (CUDA)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [66]:
train_model(model_attention, train_loader, valid_loader, epochs=30)

Epoch 1 | Train Loss: 0.6859 | Train Accuracy: 0.6035 | Validation Accuracy: 0.6979
Epoch 2 | Train Loss: 0.6127 | Train Accuracy: 0.6864 | Validation Accuracy: 0.7073
Epoch 3 | Train Loss: 0.5521 | Train Accuracy: 0.7183 | Validation Accuracy: 0.7214
Epoch 4 | Train Loss: 0.5317 | Train Accuracy: 0.7338 | Validation Accuracy: 0.7205
Epoch 5 | Train Loss: 0.5170 | Train Accuracy: 0.7455 | Validation Accuracy: 0.7233
Epoch 6 | Train Loss: 0.5067 | Train Accuracy: 0.7523 | Validation Accuracy: 0.7486
Epoch 7 | Train Loss: 0.5013 | Train Accuracy: 0.7546 | Validation Accuracy: 0.7326
Epoch 8 | Train Loss: 0.4972 | Train Accuracy: 0.7553 | Validation Accuracy: 0.7505
Epoch 9 | Train Loss: 0.4918 | Train Accuracy: 0.7607 | Validation Accuracy: 0.7345
Epoch 10 | Train Loss: 0.4925 | Train Accuracy: 0.7625 | Validation Accuracy: 0.7317
Epoch 11 | Train Loss: 0.4898 | Train Accuracy: 0.7612 | Validation Accuracy: 0.7495
Epoch 12 | Train Loss: 0.4876 | Train Accuracy: 0.7625 | Validation Accura

In [69]:
# Load the best model
model_attention.load_state_dict(torch.load('best_attention_model.pt'))

# Evaluate the model on the test set
test_acc = evaluate_model(model_attention, test_loader)
print(f'Test Accuracy: {test_acc:.4f}')

Test Accuracy: 0.7495


  model_attention.load_state_dict(torch.load('best_attention_model.pt'))
