In [None]:
# Cell 1: Importing Necessary Libraries
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torchtext.datasets import IMDB
from torchtext.data import Field, BucketIterator
import random

In [None]:
SEED = 1234
torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)

In [None]:
# Define the fields for processing the text and labels
TEXT = Field(tokenize='spacy', batch_first=True)
LABEL = Field(sequential=False, dtype=torch.float)

In [None]:
train_data, test_data = IMDB.splits(TEXT, LABEL)
# Build the vocabulary
TEXT.build_vocab(train_data, max_size=25000)
LABEL.build_vocab(train_data)

# Create iterators for training and testing
BATCH_SIZE = 64
train_iterator, test_iterator = BucketIterator.splits(
    (train_data, test_data), 
    batch_size=BATCH_SIZE, 
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
)

In [None]:
class LSTM(nn.Module):
    """
    LSTM Model for Sentiment Analysis:
    This model consists of an embedding layer, LSTM layer, and a fully connected layer.
    """
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim, n_layers, dropout):
        super(LSTM, self).__init__()
        
        self.embedding = nn.Embedding(input_dim, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, dropout=dropout, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        # Pass through the embedding layer
        embedded = self.dropout(self.embedding(x))
        
        # Pass through the LSTM layer
        lstm_out, (hidden, cell) = self.lstm(embedded)
        
        # Pass the last hidden state through the fully connected layer
        return self.fc(hidden[-1])

In [None]:
input_dim = len(TEXT.vocab)  # Size of the vocabulary
embedding_dim = 100  # Size of the embedding vector
hidden_dim = 256  # Number of hidden units in LSTM
output_dim = 1  # Output size (binary classification)
n_layers = 2  # Number of LSTM layers
dropout = 0.5  # Dropout rate

In [None]:
model = LSTM(input_dim, embedding_dim, hidden_dim, output_dim, n_layers, dropout)
# Define loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters())

In [None]:
def train(model, iterator, optimizer, criterion):
    """
    Function to train the model for one epoch.
    """
    model.train()  # Set the model to training mode
    epoch_loss = 0
    epoch_acc = 0

    for batch in iterator:
        optimizer.zero_grad()  # Reset gradients
        
        # Get input and target sequences
        text, labels = batch.text, batch.label
        
        # Forward pass
        predictions = model(text).squeeze(1)  # Get model predictions
        
        # Compute the loss
        loss = criterion(predictions, labels)
        acc = binary_accuracy(predictions, labels)  # Calculate accuracy
        
        # Backward pass
        loss.backward()
        optimizer.step()  # Update model parameters
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [None]:
def binary_accuracy(preds, y):
    """
    Function to calculate binary accuracy.
    """
    preds = torch.round(torch.sigmoid(preds))  # Round the predictions
    correct = (preds == y).float()  # Compare predictions to labels
    return correct.sum() / len(correct)  # Return accuracy

In [None]:
N_EPOCHS = 5  # Number of epochs
for epoch in range(N_EPOCHS):
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    print(f'Epoch {epoch+1}/{N_EPOCHS} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc:.3f}')

In [None]:
def evaluate(model, iterator, criterion):
    """
    Function to evaluate the model on the validation or test set.
    """
    model.eval()  # Set the model to evaluation mode
    epoch_loss = 0
    epoch_acc = 0

    with torch.no_grad():
        for batch in iterator:
            text, labels = batch.text, batch.label
            
            # Forward pass
            predictions = model(text).squeeze(1)  # Get model predictions
            
            # Compute the loss
            loss = criterion(predictions, labels)
            acc = binary_accuracy(predictions, labels)  # Calculate accuracy
            
            epoch_loss += loss.item()
            epoch_acc += acc.item()

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

# Cell 9: Evaluate on Test Set
test_loss, test_acc = evaluate(model, test_iterator, criterion)
print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc:.3f}')