## **Part 2. Model Training & Evaluation - RNN**

In [38]:
!pip install datasets
!pip install nltk



In [39]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
import pickle
from datasets import load_dataset

In [40]:
# Load the Rotten Tomatoes dataset
dataset = load_dataset("rotten_tomatoes")
train_dataset = dataset['train']
valid_dataset = dataset['validation']
test_dataset = dataset['test']

train_text = train_dataset.to_pandas()['text']

max_text_len = 0
for text in train_text:
    max_text_len = max(max_text_len, len(text))

print(max_text_len)


267


In [41]:
print(train_dataset[0])

{'text': 'the rock is destined to be the 21st century\'s new " conan " and that he\'s going to make a splash even greater than arnold schwarzenegger , jean-claud van damme or steven segal .', 'label': 1}


In [42]:
# Load the embedding matrix and vocab from files
with open('embedding_matrix_300d.pkl', 'rb') as f:
    embedding_matrix = pickle.load(f).astype(np.float32)
    padding = [0 for i in range(300)]
    embedding_matrix = np.insert(embedding_matrix, 0, padding, 0)
    print(type(embedding_matrix))

with open('vocab_word_to_index_300d.pkl', 'rb') as f:
    vocab_word_to_index = pickle.load(f)
    print(type(vocab_word_to_index))

# Convert to torch tensors
embedding_matrix = torch.tensor(embedding_matrix)
vocab_size, embedding_dim = embedding_matrix.shape
#print(embedding_matrix[0])

<class 'numpy.ndarray'>
<class 'dict'>


In [43]:
import numpy as np
import torch
import nltk
from torch.utils.data import TensorDataset, DataLoader

class SentimentDataset:
    def __init__(self, dataset, word_to_index, max_len=30):
        self.dataset = dataset
        self.word_to_index = word_to_index
        self.max_len = max_len

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        text = self.dataset[idx]['text']
        label = self.dataset[idx]['label']

        # Tokenization and word-to-index conversion
        text = text.lower()
        word_list = nltk.tokenize.word_tokenize(text)
        word_list = [word.strip("'\"") for word in word_list]
        indices = [self.word_to_index.get(word, self.word_to_index.get('<UNK>')) + 1 for word in word_list]
        indices = indices[:self.max_len] + [0] * (self.max_len - len(indices))  # Padding

        return np.array(indices), np.array(label)

    def preprocess_data(self):
        texts = []
        labels = []
        for i in range(len(self.dataset)):
            features, label = self.__getitem__(i)
            texts.append(features)
            labels.append(label)
        return np.array(texts), np.array(labels)

train_texts, train_labels = SentimentDataset(train_dataset, vocab_word_to_index, max_len=30).preprocess_data()
valid_texts, valid_labels = SentimentDataset(valid_dataset, vocab_word_to_index, max_len=30).preprocess_data()
test_texts, test_labels = SentimentDataset(test_dataset, vocab_word_to_index, max_len=30).preprocess_data()

# Convert preprocessed arrays to PyTorch tensors
train_texts = torch.tensor(train_texts)
train_labels = torch.tensor(train_labels)
valid_texts = torch.tensor(valid_texts)
valid_labels = torch.tensor(valid_labels)
test_texts = torch.tensor(test_texts)
test_labels = torch.tensor(test_labels)

train_dataset = TensorDataset(train_texts, train_labels)
valid_dataset = TensorDataset(valid_texts, valid_labels)
test_dataset = TensorDataset(test_texts, test_labels)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=128, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)


In [44]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

# Vanilla RNN - Hidden Representation

In [45]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class SentimentRNN(nn.Module):
    def __init__(self, embedding_matrix, hidden_dim, output_dim, num_layers = 1, freeze_embeddings=True, dropout = 0.5):
        super(SentimentRNN, self).__init__()
        self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=freeze_embeddings)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, num_layers, batch_first=True, device= device)
        #self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim, output_dim, device= device)
        self.sigmoid = nn.Sigmoid()
        self.device = device


    def forward(self, x):
        embedded = self.embedding(x)
        out, hidden = self.rnn(embedded)
        out = out[:, -1, :]
        #out = self.dropout(out[:, -1, :])
        out = self.fc(out)
        out = self.sigmoid(out)
        return out

# Model hyperparameters
hidden_dim = 128
output_dim = 1  # Binary classification (positive, negative)

model = SentimentRNN(embedding_matrix, hidden_dim, output_dim, 1)

In [46]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.BCELoss()

# Move model to GPU if available
model.to(device)

patience = 5
best_metric = None
no_improvement_count = 0

# Training function
def train_model(model, train_loader, valid_loader, epochs=30):
    global best_metric, no_improvement_count #variables for early stopping
    for epoch in range(epochs):
        # Set model to training mode
        model.train()
        total_train_loss = 0
        total_train_correct = 0
        total_train_samples = 0

        for texts, labels in train_loader:
            texts, labels = texts.to(device), labels.to(device)

            optimizer.zero_grad()  # Reset gradients

            # Forward pass: get predictions
            predictions = model(texts)

            # Compute the loss
            loss = criterion(predictions, labels.unsqueeze(1).float())
            total_train_loss += loss.item()

            # Backpropagation and optimization
            loss.backward()
            optimizer.step()

            # Accuracy calculation
            predicted_labels = (predictions > 0.5).int()
            total_train_correct += (predicted_labels.squeeze() == labels).sum().item()

            #print(total_train_correct)
            total_train_samples += labels.size(0)

        # Calculate and print average training accuracy and loss per epoch
        train_accuracy = total_train_correct / total_train_samples
        train_loss = total_train_loss / len(train_loader)

        # Evaluate model on validation set
        valid_accuracy = evaluate_model(model, valid_loader)

        print(f'Epoch {epoch+1} | Train Loss: {train_loss:.4f} | Train Accuracy: {train_accuracy:.4f} | Validation Accuracy: {valid_accuracy:.4f}')

        # Early stopping check
        if best_metric is None or valid_accuracy > best_metric:
            best_metric = valid_accuracy
            no_improvement_count = 0  # Reset counter
            torch.save(model.state_dict(), 'best_model.pt')  # Save best model state
        else:
            no_improvement_count += 1  # Increment counter if no improvement

        if no_improvement_count >= patience:
            print(f"Early stopping triggered after {epoch+1} epochs")
            break  # Exit training loop if no improvement for `patience` epochs

def evaluate_model(model, loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for texts, labels in loader:
            texts, labels = texts.to(device), labels.to(device)
            predictions = model(texts)
            predicted_labels = (predictions >= 0.5).int()
            correct += (predicted_labels.squeeze() == labels).sum().item()
            total += labels.size(0)
    return correct / total

In [47]:
import random
random.seed(42)

np.random.seed(42)

torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.cuda.manual_seed_all(42)

# Ensure deterministic behavior for cuDNN (CUDA)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Vanilla RNN (2a & 2b) [Best Model]
 **Learning Rate:**  = 0.0001

 **Batch Size:** =  128

 **Optimiser**  = Adam

 **Epochs (with Early Stopping)**  = 28

 **Accuracy:** = 75.8%


In [48]:
train_model(model, train_loader, valid_loader, epochs=30)

Epoch 1 | Train Loss: 0.6937 | Train Accuracy: 0.5000 | Validation Accuracy: 0.4925
Epoch 2 | Train Loss: 0.6914 | Train Accuracy: 0.5202 | Validation Accuracy: 0.5206
Epoch 3 | Train Loss: 0.6814 | Train Accuracy: 0.5637 | Validation Accuracy: 0.6876
Epoch 4 | Train Loss: 0.5876 | Train Accuracy: 0.6961 | Validation Accuracy: 0.7214
Epoch 5 | Train Loss: 0.5383 | Train Accuracy: 0.7385 | Validation Accuracy: 0.7336
Epoch 6 | Train Loss: 0.5171 | Train Accuracy: 0.7521 | Validation Accuracy: 0.7355
Epoch 7 | Train Loss: 0.5067 | Train Accuracy: 0.7607 | Validation Accuracy: 0.7336
Epoch 8 | Train Loss: 0.5014 | Train Accuracy: 0.7614 | Validation Accuracy: 0.7308
Epoch 9 | Train Loss: 0.4939 | Train Accuracy: 0.7647 | Validation Accuracy: 0.7439
Epoch 10 | Train Loss: 0.4920 | Train Accuracy: 0.7687 | Validation Accuracy: 0.7467
Epoch 11 | Train Loss: 0.4929 | Train Accuracy: 0.7668 | Validation Accuracy: 0.7420
Epoch 12 | Train Loss: 0.4892 | Train Accuracy: 0.7703 | Validation Accura

In [50]:
# Load the best model
model.load_state_dict(torch.load('best_model.pt'))

# Evaluate the model on the test set
test_acc = evaluate_model(model, test_loader)
print(f'Test Accuracy: {test_acc:.4f}')

Test Accuracy: 0.7580


  model.load_state_dict(torch.load('best_model.pt'))


*Varying Batch Size & Learning Rate*

1. Learning Rate: 0.0001
Batch Size: 32
Optimiser: Adam
Accuracy: 73.2%

2. Learning Rate: 0.0001,
Batch Size: 64,
Optimiser: Adam,
Accuracy: 74.5%

3. Learning Rate: 0.0001,
Batch Size: 128,
Optimiser: Adam,
Accuracy: 75.8% <== Highest Overall

4. Learning Rate: 0.001,
Batch Size: 64,
Optimiser: Adam,
Accuracy: 72.7%


5. Learning Rate: 0.001,
Batch Size: 128,
Optimiser: Adam,
Accuracy: 74.2%


6. Learning Rate: 0.0005,
Batch Size: 64,
Optimiser: Adam,
Accuracy: 71.8%

7. Learning Rate: 0.0005,
Batch Size: 128,
Optimiser: Adam,
Accuracy: 74.7%

*Varying Classifier*

8. Learning Rate: 0.0001,
Batch Size: 128,
Optimiser: RMSprop,
Accuracy: 74.2%

9. Learning Rate: 0.001,
Batch Size: 128,
Optimiser: RMSprop,
Accuracy: 59%
