# Deep Learning Model Experiments: RNN vs. LSTM vs. GRU

In this notebook, we evaluate the performance of Recurrent Neural Networks (RNNs) and two of their variants: Long Short-Term Memory (LSTM) and Gated Recurrent Units (GRU). These architectures use memory cells to retain or discard relevant information, effectively addressing the vanishing gradient problem. 

The task at hand is fake news detection, which is a binary classification problem. To tackle this, we use two Kaggle datasets (ISOT and Kaggle True or Fake) containing labeled news articles categorized as either True or Fake. The datasets have already been preprocessed: all text has been lowercased, special characters have been removed, and the data has been split into training and test sets, making it ready for vectorization.

In [None]:
# Import Packages
import pandas as pd
import numpy as np

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_sequence

from collections import Counter
import copy

In [None]:
# Fixed Hyperparameters
MAX_VOCAB_SIZE = 30000
MAX_LEN = 512
EMBED_DIM = 100
HIDDEN_DIM = 128
BATCH_SIZE = 16
EPOCHS = 15

## Dataset Preprocessing

In [None]:
# Load datasets: choose ISOT or Kaggle True or Fake datasets

# train_df = pd.read_csv("data/kaggle/preprocessed/train.csv")
# test_df = pd.read_csv("data/kaggle/preprocessed/test.csv")

train_df = pd.read_csv("data/isot/preprocessed/train.csv")
test_df = pd.read_csv("data/isot/preprocessed/test.csv")

In [4]:
def tokenize(text):
    return text.split()

def encode(vocab, text):
    return [vocab.get(tok, 1) for tok in tokenize(text)[:MAX_LEN]]

In [5]:
# Build vocabulary
counter = Counter()
for text in train_df["text"]:
    tokens = tokenize(text)
    counter.update(tokens)

most_common = counter.most_common(MAX_VOCAB_SIZE - 2)
vocab = {"<PAD>": 0, "<UNK>": 1}
for i, (word, _) in enumerate(most_common, start=2):
    vocab[word] = i

In [6]:
# Custom Dataset
class TextDataset(Dataset):
    def __init__(self, df):
        self.texts = [torch.tensor(encode(vocab, text), dtype=torch.long) for text in df["text"]]
        self.labels = torch.tensor(df["label"].values, dtype=torch.long)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.texts[idx], self.labels[idx]

In [7]:
def collate_fn(batch):
    texts, labels = zip(*batch)
    texts = pad_sequence(texts, batch_first=True, padding_value=0)
    texts = texts[:, :MAX_LEN]  # truncate if needed
    return texts, torch.tensor(labels)

In [8]:
from torch.utils.data import DataLoader, random_split

# Validation split
val_ratio = 0.2
total_len = len(train_df)
val_len = int(total_len * val_ratio)
train_len = total_len - val_len

# Full train dataset
full_train_ds = TextDataset(train_df)

# Split train/val
train_ds, val_ds = random_split(full_train_ds, [train_len, val_len], generator=torch.Generator().manual_seed(42))

# DataLoaders
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_ds, batch_size=32, shuffle=False, collate_fn=collate_fn)

# Test DataLoader
test_ds = TextDataset(test_df)
test_loader = DataLoader(test_ds, batch_size=32, collate_fn=collate_fn)


## Defining the models
We implement our three models into classes. Our models would have the same hyperparameters: embedding and hidden dimensions, dropout rate, number of layers in order to be able to compare their performances efficiently.

In [9]:
# RNN Model
class RNNClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, dropout=0.5):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=vocab["<PAD>"])
        self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.embedding(x)
        _, hidden = self.rnn(x)
        hidden = self.dropout(hidden[-1])
        return self.fc(hidden)

In [10]:
# LSTM Model
class LSTMClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, dropout=0.5):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=vocab["<PAD>"])
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.embedding(x)
        _, (hidden, _) = self.lstm(x)
        return self.fc(hidden[-1])

In [11]:
# GRU Model
class GRUClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, dropout=0.5):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=vocab["<PAD>"])
        self.gru = nn.GRU(embedding_dim, hidden_dim, batch_first=True)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.embedding(x)
        _, hidden = self.gru(x)
        hidden = self.dropout(hidden[-1])
        return self.fc(hidden)

In [None]:
# Function to compute the accuracy for the evaluation
def accuracy_fn(y_pred, y_true):
    preds = torch.argmax(y_pred, dim=1)
    correct = (preds == y_true).sum().item()
    total = y_true.size(0)
    return correct / total

## RNN Model evaluation

In [22]:
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Initialize model
model = RNNClassifier(len(vocab), embedding_dim=100, hidden_dim=128, output_dim=len(train_df["label"].unique()))
model = model.to(device)

# Optimizer and loss
optimizer = torch.optim.Adam(model.parameters(), lr=1e-1)
criterion = nn.CrossEntropyLoss()

Using device: cuda


In [23]:
for epoch in range(15):
    model.train()
    total_loss = 0
    for x_batch, y_batch in train_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(x_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1} Training Loss: {total_loss / len(train_loader):.4f}")

    # Validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for x_val, y_val in val_loader:
            x_val, y_val = x_val.to(device), y_val.to(device)
            outputs = model(x_val)
            loss = criterion(outputs, y_val)
            val_loss += loss.item()
    avg_val_loss = val_loss / len(val_loader)
    print(f"Epoch {epoch+1} Validation Loss: {avg_val_loss:.4f}")

Epoch 1 Training Loss: 1.8978
Epoch 1 Validation Loss: 2.5809
Epoch 2 Training Loss: 1.8921
Epoch 2 Validation Loss: 0.9936
Epoch 3 Training Loss: 1.8865
Epoch 3 Validation Loss: 1.5679
Epoch 4 Training Loss: 1.8345
Epoch 4 Validation Loss: 1.2352
Epoch 5 Training Loss: 1.8387
Epoch 5 Validation Loss: 0.9257
Epoch 6 Training Loss: 1.9025
Epoch 6 Validation Loss: 1.2946
Epoch 7 Training Loss: 1.9359
Epoch 7 Validation Loss: 0.9790
Epoch 8 Training Loss: 1.8553
Epoch 8 Validation Loss: 2.9727
Epoch 9 Training Loss: 1.8996
Epoch 9 Validation Loss: 1.3206
Epoch 10 Training Loss: 1.8222
Epoch 10 Validation Loss: 1.3266
Epoch 11 Training Loss: 1.8334
Epoch 11 Validation Loss: 0.9331
Epoch 12 Training Loss: 1.9003
Epoch 12 Validation Loss: 1.4756
Epoch 13 Training Loss: 1.9504
Epoch 13 Validation Loss: 0.9384
Epoch 14 Training Loss: 1.8545
Epoch 14 Validation Loss: 1.2384
Epoch 15 Training Loss: 1.8779
Epoch 15 Validation Loss: 1.4399


We observe here, that our RNN model is not learning. When computing the gradient, RNNs can suffer from the severe attenuation of the gradient because of the multiple time steps (here the different tokens).

In [24]:
model.eval()
total_acc = 0
with torch.no_grad():
    for x_batch, y_batch in test_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        outputs = model(x_batch)
        acc = accuracy_fn(outputs, y_batch)
        total_acc += acc

avg_acc = total_acc / len(test_loader)
print(f"Test Accuracy: {avg_acc:.4f}")

Test Accuracy: 0.5021


Thus, the RNN model is no better than a random prediction on this dataset. We will rely on the other variants for better performance.

## LSTM Model Evaluation

In [16]:
# Initialize model
model = LSTMClassifier(len(vocab), embedding_dim=100, hidden_dim=128, output_dim=len(train_df["label"].unique()))
model = model.to(device)

# Optimizer and loss
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()


In [17]:
for epoch in range(15):
    model.train()
    total_loss = 0
    for x_batch, y_batch in train_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(x_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1} Training Loss: {total_loss / len(train_loader):.4f}")

    # Validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for x_val, y_val in val_loader:
            x_val, y_val = x_val.to(device), y_val.to(device)
            outputs = model(x_val)
            loss = criterion(outputs, y_val)
            val_loss += loss.item()
    avg_val_loss = val_loss / len(val_loader)
    print(f"Epoch {epoch+1} Validation Loss: {avg_val_loss:.4f}")

Epoch 1 Training Loss: 0.5847
Epoch 1 Validation Loss: 0.6304
Epoch 2 Training Loss: 0.5820
Epoch 2 Validation Loss: 0.6869
Epoch 3 Training Loss: 0.5300
Epoch 3 Validation Loss: 0.1676
Epoch 4 Training Loss: 0.1415
Epoch 4 Validation Loss: 0.0878
Epoch 5 Training Loss: 0.0610
Epoch 5 Validation Loss: 0.0799
Epoch 6 Training Loss: 0.0437
Epoch 6 Validation Loss: 0.0709
Epoch 7 Training Loss: 0.0272
Epoch 7 Validation Loss: 0.0663
Epoch 8 Training Loss: 0.0177
Epoch 8 Validation Loss: 0.0788
Epoch 9 Training Loss: 0.0130
Epoch 9 Validation Loss: 0.0695
Epoch 10 Training Loss: 0.0130
Epoch 10 Validation Loss: 0.0620
Epoch 11 Training Loss: 0.0093
Epoch 11 Validation Loss: 0.0672
Epoch 12 Training Loss: 0.0094
Epoch 12 Validation Loss: 0.0804
Epoch 13 Training Loss: 0.0078
Epoch 13 Validation Loss: 0.0643
Epoch 14 Training Loss: 0.0071
Epoch 14 Validation Loss: 0.0758
Epoch 15 Training Loss: 0.0055
Epoch 15 Validation Loss: 0.0549


In [18]:
model.eval()
total_acc = 0
with torch.no_grad():
    for x_batch, y_batch in test_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        outputs = model(x_batch)
        acc = accuracy_fn(outputs, y_batch)
        total_acc += acc

avg_acc = total_acc / len(test_loader)
print(f"Test Accuracy: {avg_acc:.4f}")


Test Accuracy: 0.9869


## GRU Model Evaluation

In [19]:
# Initialize model
model = GRUClassifier(len(vocab), embedding_dim=100, hidden_dim=128, output_dim=len(train_df["label"].unique()))
model = model.to(device)

# Optimizer and loss
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()


In [20]:
for epoch in range(15):
    model.train()
    total_loss = 0
    for x_batch, y_batch in train_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(x_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1} Training Loss: {total_loss / len(train_loader):.4f}")

    # Validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for x_val, y_val in val_loader:
            x_val, y_val = x_val.to(device), y_val.to(device)
            outputs = model(x_val)
            loss = criterion(outputs, y_val)
            val_loss += loss.item()
    avg_val_loss = val_loss / len(val_loader)
    print(f"Epoch {epoch+1} Validation Loss: {avg_val_loss:.4f}")

Epoch 1 Training Loss: 0.6361
Epoch 1 Validation Loss: 0.6163
Epoch 2 Training Loss: 0.2736
Epoch 2 Validation Loss: 0.0847
Epoch 3 Training Loss: 0.0528
Epoch 3 Validation Loss: 0.0437
Epoch 4 Training Loss: 0.0179
Epoch 4 Validation Loss: 0.0208
Epoch 5 Training Loss: 0.0070
Epoch 5 Validation Loss: 0.0357
Epoch 6 Training Loss: 0.0050
Epoch 6 Validation Loss: 0.0211
Epoch 7 Training Loss: 0.0030
Epoch 7 Validation Loss: 0.0270
Epoch 8 Training Loss: 0.0041
Epoch 8 Validation Loss: 0.0229
Epoch 9 Training Loss: 0.0029
Epoch 9 Validation Loss: 0.0227
Epoch 10 Training Loss: 0.0029
Epoch 10 Validation Loss: 0.0189
Epoch 11 Training Loss: 0.0021
Epoch 11 Validation Loss: 0.0191
Epoch 12 Training Loss: 0.0009
Epoch 12 Validation Loss: 0.0860
Epoch 13 Training Loss: 0.0003
Epoch 13 Validation Loss: 0.0623
Epoch 14 Training Loss: 0.0038
Epoch 14 Validation Loss: 0.0261
Epoch 15 Training Loss: 0.0009
Epoch 15 Validation Loss: 0.0219


In [21]:
model.eval()
total_acc = 0
with torch.no_grad():
    for x_batch, y_batch in test_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        outputs = model(x_batch)
        acc = accuracy_fn(outputs, y_batch)
        total_acc += acc

avg_acc = total_acc / len(test_loader)
print(f"Test Accuracy: {avg_acc:.4f}")


Test Accuracy: 0.9944
