# Deep Learning Model Experiments: RNN vs. LSTM vs. GRU

In this notebook, we evaluate the performance of Recurrent Neural Networks (RNNs) and two of their variants: Long Short-Term Memory (LSTM) and Gated Recurrent Units (GRU). These architectures use memory cells to retain or discard relevant information, effectively addressing the vanishing gradient problem. 

The task at hand is fake news detection, which is a binary classification problem. To tackle this, we use two Kaggle datasets (ISOT and Kaggle True or Fake) containing labeled news articles categorized as either True or Fake. The datasets have already been preprocessed: all text has been lowercased, special characters have been removed, and the data has been split into training and test sets, making it ready for vectorization.

In [15]:
# Import Packages
import pandas as pd
import numpy as np

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_sequence

from collections import Counter
import copy
from sklearn.metrics import classification_report

In [16]:
# Fixed Hyperparameters
MAX_VOCAB_SIZE = 30000
MAX_LEN = 512
EMBED_DIM = 100
HIDDEN_DIM = 128
BATCH_SIZE = 16
EPOCHS = 20

## Dataset Preprocessing

In [17]:
# Load datasets: choose ISOT or Kaggle True or Fake datasets

train_df = pd.read_csv("data/kaggle/preprocessed/train.csv")
test_df = pd.read_csv("data/kaggle/preprocessed/test.csv")

# train_df = pd.read_csv("data/isot/preprocessed/train.csv")
# test_df = pd.read_csv("data/isot/preprocessed/test.csv")

In [18]:
def tokenize(text):
    return text.split()

def encode(vocab, text):
    return [vocab.get(tok, 1) for tok in tokenize(text)[:MAX_LEN]]

In [19]:
# Build vocabulary
counter = Counter()
for text in train_df["text"]:
    tokens = tokenize(text)
    counter.update(tokens)

most_common = counter.most_common(MAX_VOCAB_SIZE - 2)
vocab = {"<PAD>": 0, "<UNK>": 1}
for i, (word, _) in enumerate(most_common, start=2):
    vocab[word] = i

In [20]:
# Custom Dataset
class TextDataset(Dataset):
    def __init__(self, df):
        self.texts = [torch.tensor(encode(vocab, text), dtype=torch.long) for text in df["text"]]
        self.labels = torch.tensor(df["label"].values, dtype=torch.long)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.texts[idx], self.labels[idx]

In [21]:
def collate_fn(batch):
    texts, labels = zip(*batch)
    texts = pad_sequence(texts, batch_first=True, padding_value=0)
    texts = texts[:, :MAX_LEN]  # truncate if needed
    return texts, torch.tensor(labels)

In [22]:
from torch.utils.data import DataLoader, random_split

# Validation split
val_ratio = 0.2
total_len = len(train_df)
val_len = int(total_len * val_ratio)
train_len = total_len - val_len

# Full train dataset
full_train_ds = TextDataset(train_df)

# Split train/val
train_ds, val_ds = random_split(full_train_ds, [train_len, val_len], generator=torch.Generator().manual_seed(42))

# DataLoaders
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_ds, batch_size=32, shuffle=False, collate_fn=collate_fn)

# Test DataLoader
test_ds = TextDataset(test_df)
test_loader = DataLoader(test_ds, batch_size=32, collate_fn=collate_fn)


## Defining the models
We implement our three models into classes. Our models would have the same hyperparameters: embedding and hidden dimensions, dropout rate, number of layers in order to be able to compare their performances efficiently.

In [23]:
# RNN Model
class RNNClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, dropout=0.5):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=vocab["<PAD>"])
        self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.init_weights()

    def init_weights(self):
        for name, param in self.named_parameters():
            if 'weight' in name:
                nn.init.xavier_uniform_(param)
            elif 'bias' in name:
                nn.init.zeros_(param)

    def forward(self, x):
        x = self.embedding(x)
        _, hidden = self.rnn(x)
        hidden = self.dropout(hidden[-1])
        return self.fc(hidden)

In [24]:
# LSTM Model
class LSTMClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, dropout=0.5):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=vocab["<PAD>"])
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.init_weights()

    def init_weights(self):
        for name, param in self.named_parameters():
            if 'weight' in name:
                nn.init.xavier_uniform_(param)
            elif 'bias' in name:
                nn.init.zeros_(param)

    def forward(self, x):
        x = self.embedding(x)
        _, (hidden, _) = self.lstm(x)
        return self.fc(hidden[-1])

In [25]:
# GRU Model
class GRUClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, dropout=0.5):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=vocab["<PAD>"])
        self.gru = nn.GRU(embedding_dim, hidden_dim, batch_first=True)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.init_weights()

    def init_weights(self):
        for name, param in self.named_parameters():
            if 'weight' in name:
                nn.init.xavier_uniform_(param)
            elif 'bias' in name:
                nn.init.zeros_(param)

    def forward(self, x):
        x = self.embedding(x)
        _, hidden = self.gru(x)
        hidden = self.dropout(hidden[-1])
        return self.fc(hidden)

In [26]:
# Function to compute the accuracy for the evaluation
def accuracy_fn(y_pred, y_true):
    preds = torch.argmax(y_pred, dim=1)
    correct = (preds == y_true).sum().item()
    total = y_true.size(0)
    return correct / total

## RNN Model evaluation

In [27]:
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Initialize model
model = RNNClassifier(len(vocab), embedding_dim=100, hidden_dim=128, output_dim=len(train_df["label"].unique()))
model = model.to(device)

# Optimizer and loss
optimizer = torch.optim.Adam(model.parameters(), lr=1e-1)
criterion = nn.CrossEntropyLoss()

Using device: cuda


In [28]:
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    for x_batch, y_batch in train_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(x_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1} Training Loss: {total_loss / len(train_loader):.4f}")

    # Validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for x_val, y_val in val_loader:
            x_val, y_val = x_val.to(device), y_val.to(device)
            outputs = model(x_val)
            loss = criterion(outputs, y_val)
            val_loss += loss.item()
    avg_val_loss = val_loss / len(val_loader)
    print(f"Epoch {epoch+1} Validation Loss: {avg_val_loss:.4f}")

Epoch 1 Training Loss: 2.0025
Epoch 1 Validation Loss: 0.9755
Epoch 2 Training Loss: 1.6803
Epoch 2 Validation Loss: 1.0395
Epoch 3 Training Loss: 2.1502
Epoch 3 Validation Loss: 0.9600
Epoch 4 Training Loss: 1.8748
Epoch 4 Validation Loss: 1.3098
Epoch 5 Training Loss: 1.8470
Epoch 5 Validation Loss: 2.0260
Epoch 6 Training Loss: 1.7276
Epoch 6 Validation Loss: 1.8981
Epoch 7 Training Loss: 1.7383
Epoch 7 Validation Loss: 0.8439
Epoch 8 Training Loss: 1.9073
Epoch 8 Validation Loss: 2.0704
Epoch 9 Training Loss: 2.1005
Epoch 9 Validation Loss: 0.9550
Epoch 10 Training Loss: 1.6733
Epoch 10 Validation Loss: 1.2588
Epoch 11 Training Loss: 1.6504
Epoch 11 Validation Loss: 1.1074
Epoch 12 Training Loss: 1.8242
Epoch 12 Validation Loss: 1.0688
Epoch 13 Training Loss: 1.8849
Epoch 13 Validation Loss: 2.4650
Epoch 14 Training Loss: 1.9479
Epoch 14 Validation Loss: 1.1213
Epoch 15 Training Loss: 1.8099
Epoch 15 Validation Loss: 1.0166
Epoch 16 Training Loss: 2.0481
Epoch 16 Validation Loss: 1

We observe here, that our RNN model is not learning. When computing the gradient, RNNs can suffer from the severe attenuation of the gradient because of the multiple time steps (here the different tokens).

In [29]:
model.eval()
total_acc = 0
y_true = []
y_pred = []

with torch.no_grad():
    for x_batch, y_batch in test_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        outputs = model(x_batch)
        predictions = torch.argmax(outputs, dim=1).cpu()
        acc = accuracy_fn(outputs, y_batch)
        total_acc += acc

        y_true.extend(y_batch.cpu().numpy())
        y_pred.extend(predictions.numpy())

avg_acc = total_acc / len(test_loader)
print(f"Test Accuracy: {avg_acc:.4f}")

print(classification_report(y_true, y_pred))

Test Accuracy: 0.5361
              precision    recall  f1-score   support

           0       0.53      0.57      0.55       624
           1       0.55      0.51      0.53       643

    accuracy                           0.54      1267
   macro avg       0.54      0.54      0.54      1267
weighted avg       0.54      0.54      0.54      1267



Thus, the RNN model is no better than a random prediction on this dataset. We will rely on the other variants for better performance.

## LSTM Model Evaluation

In [30]:
# Initialize model
model = LSTMClassifier(len(vocab), embedding_dim=100, hidden_dim=128, output_dim=len(train_df["label"].unique()), dropout=0)
model = model.to(device)

# Optimizer and loss
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()


In [31]:
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    for x_batch, y_batch in train_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(x_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1} Training Loss: {total_loss / len(train_loader):.4f}")

    # Validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for x_val, y_val in val_loader:
            x_val, y_val = x_val.to(device), y_val.to(device)
            outputs = model(x_val)
            loss = criterion(outputs, y_val)
            val_loss += loss.item()
    avg_val_loss = val_loss / len(val_loader)
    print(f"Epoch {epoch+1} Validation Loss: {avg_val_loss:.4f}")

Epoch 1 Training Loss: 0.6441
Epoch 1 Validation Loss: 0.6237
Epoch 2 Training Loss: 0.4845
Epoch 2 Validation Loss: 0.6271
Epoch 3 Training Loss: 0.3460
Epoch 3 Validation Loss: 0.6603
Epoch 4 Training Loss: 0.3412
Epoch 4 Validation Loss: 0.7411
Epoch 5 Training Loss: 0.2805
Epoch 5 Validation Loss: 0.7669
Epoch 6 Training Loss: 0.3515
Epoch 6 Validation Loss: 0.6237
Epoch 7 Training Loss: 0.2331
Epoch 7 Validation Loss: 0.7308
Epoch 8 Training Loss: 0.2194
Epoch 8 Validation Loss: 0.7504
Epoch 9 Training Loss: 0.2482
Epoch 9 Validation Loss: 0.8766
Epoch 10 Training Loss: 0.2427
Epoch 10 Validation Loss: 0.8295
Epoch 11 Training Loss: 0.2016
Epoch 11 Validation Loss: 0.8000
Epoch 12 Training Loss: 0.2014
Epoch 12 Validation Loss: 0.8862
Epoch 13 Training Loss: 0.3370
Epoch 13 Validation Loss: 0.7104
Epoch 14 Training Loss: 0.2533
Epoch 14 Validation Loss: 0.7450
Epoch 15 Training Loss: 0.2124
Epoch 15 Validation Loss: 1.3705
Epoch 16 Training Loss: 0.2287
Epoch 16 Validation Loss: 0

In [32]:
model.eval()
total_acc = 0
y_true = []
y_pred = []

with torch.no_grad():
    for x_batch, y_batch in test_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        outputs = model(x_batch)
        predictions = torch.argmax(outputs, dim=1).cpu()
        acc = accuracy_fn(outputs, y_batch)
        total_acc += acc

        y_true.extend(y_batch.cpu().numpy())
        y_pred.extend(predictions.numpy())

avg_acc = total_acc / len(test_loader)
print(f"Test Accuracy: {avg_acc:.4f}")

print(classification_report(y_true, y_pred))


Test Accuracy: 0.8007
              precision    recall  f1-score   support

           0       0.84      0.73      0.78       624
           1       0.77      0.87      0.82       643

    accuracy                           0.80      1267
   macro avg       0.81      0.80      0.80      1267
weighted avg       0.81      0.80      0.80      1267



## GRU Model Evaluation

In [33]:
# Initialize model
model = GRUClassifier(len(vocab), embedding_dim=100, hidden_dim=128, output_dim=len(train_df["label"].unique()))
model = model.to(device)

# Optimizer and loss
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()


In [34]:
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    for x_batch, y_batch in train_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(x_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1} Training Loss: {total_loss / len(train_loader):.4f}")

    # Validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for x_val, y_val in val_loader:
            x_val, y_val = x_val.to(device), y_val.to(device)
            outputs = model(x_val)
            loss = criterion(outputs, y_val)
            val_loss += loss.item()
    avg_val_loss = val_loss / len(val_loader)
    print(f"Epoch {epoch+1} Validation Loss: {avg_val_loss:.4f}")

Epoch 1 Training Loss: 0.6442
Epoch 1 Validation Loss: 0.6205
Epoch 2 Training Loss: 0.5040
Epoch 2 Validation Loss: 0.6142
Epoch 3 Training Loss: 0.3452
Epoch 3 Validation Loss: 0.7879
Epoch 4 Training Loss: 0.2836
Epoch 4 Validation Loss: 0.8194
Epoch 5 Training Loss: 0.2648
Epoch 5 Validation Loss: 0.7548
Epoch 6 Training Loss: 0.2614
Epoch 6 Validation Loss: 0.9116
Epoch 7 Training Loss: 0.2206
Epoch 7 Validation Loss: 0.8761
Epoch 8 Training Loss: 0.1926
Epoch 8 Validation Loss: 0.8360
Epoch 9 Training Loss: 0.2249
Epoch 9 Validation Loss: 0.8442
Epoch 10 Training Loss: 0.1796
Epoch 10 Validation Loss: 0.7978
Epoch 11 Training Loss: 0.1582
Epoch 11 Validation Loss: 0.5567
Epoch 12 Training Loss: 0.1116
Epoch 12 Validation Loss: 0.5859
Epoch 13 Training Loss: 0.0737
Epoch 13 Validation Loss: 0.8061
Epoch 14 Training Loss: 0.0679
Epoch 14 Validation Loss: 0.8899
Epoch 15 Training Loss: 0.0572
Epoch 15 Validation Loss: 0.9412
Epoch 16 Training Loss: 0.0642
Epoch 16 Validation Loss: 0

In [35]:
model.eval()
total_acc = 0
y_true = []
y_pred = []

with torch.no_grad():
    for x_batch, y_batch in test_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        outputs = model(x_batch)
        predictions = torch.argmax(outputs, dim=1).cpu()
        acc = accuracy_fn(outputs, y_batch)
        total_acc += acc

        y_true.extend(y_batch.cpu().numpy())
        y_pred.extend(predictions.numpy())

avg_acc = total_acc / len(test_loader)
print(f"Test Accuracy: {avg_acc:.4f}")

print(classification_report(y_true, y_pred))

Test Accuracy: 0.8409
              precision    recall  f1-score   support

           0       0.86      0.81      0.83       624
           1       0.82      0.87      0.85       643

    accuracy                           0.84      1267
   macro avg       0.84      0.84      0.84      1267
weighted avg       0.84      0.84      0.84      1267

