In [None]:
# Apply RNNs to a real-world task by training an RNN to perform sentiment analysis on a set of movie reviews.
# Use a dataset of movie reviews labeled as positive or negative.
# Tokenize the text and convert it to sequences of word embeddings.
# Implement the RNN for binary classification (positive/negative sentiment).
# Measure the model's accuracy on a test set.
import torch
import torch.nn as nn
import torch.optim as optim

# from torchtext.legacy import data, datasets

# Load the IMDb dataset
TEXT = data.Field(tokenize='spacy', tokenizer_language='en_core_web_sm')
LABEL = data.LabelField(dtype=torch.float)

train_data, test_data = datasets.IMDb.splits(TEXT, LABEL)

# Build the vocabulary
TEXT.build_vocab(train_data, max_size=25000)
LABEL.build_vocab(train_data)

# Create data iterators
train_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, test_data),
    batch_size=64,
    device=torch.device('cpu')
)

class RNN(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, text):
        embedded = self.embedding(text)
        output, hidden = self.rnn(embedded)
        return self.fc(hidden.squeeze(0))

# Initialize the model
input_dim = len(TEXT.vocab)
embedding_dim = 100
hidden_dim = 256
output_dim = 1

model = RNN(input_dim, embedding_dim, hidden_dim, output_dim)

# Define loss and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters())

# Training loop
for epoch in range(5):
    for batch in train_iterator:
        text, labels = batch.text, batch.label

        optimizer.zero_grad()
        predictions = model(text).squeeze(1)
        loss = criterion(predictions, labels)
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch}, Loss: {loss.item()}")

# Evaluation
def binary_accuracy(preds, y):
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float()
    return correct.sum() / len(correct)

acc = 0
for batch in test_iterator:
    text, labels = batch.text, batch.label
    predictions = model(text).squeeze(1)
    acc += binary_accuracy(predictions, labels).item()

print(f"Test Accuracy: {acc / len(test_iterator)}") 
# This lab applies RNNs to sentiment analysis, a common NLP task. Students learn how to preprocess text data, build and train an RNN for classification, and evaluate the model's performance.