In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import os
os.chdir('/content/drive/MyDrive/github/imdb-sentiment-comparison-rnn-transformer/src/jpynb')

In [3]:
import sys

path = '/content/drive/MyDrive/github/imdb-sentiment-comparison-rnn-transformer/src/python'
sys.path.append(path)


In [4]:
import sklearn
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score
import torch
from torch import optim, nn
import numpy as np
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# DataLoader

In [5]:
import data

train_loader_rnn = data.train_loader_rnn
test_loader_rnn = data.test_loader_rnn
train_loader_tr = data.train_loader_tr
test_loader_tr = data.test_loader_tr

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


#Evaluate def

In [6]:
def evaluation(y, y_proba):
    y_pred = (y_proba > 0.5).astype(int)
    accuracy = accuracy_score(y, y_pred)
    roc = roc_auc_score(y, y_proba)
    precision = precision_score(y, y_pred)
    recall = recall_score(y, y_pred)

    print(f'Accuracy Score: {np.round(accuracy, 4)}')
    print(f"ROC AUC Score : {np.round(roc, 4)}")
    print(f"Precision      : {np.round(precision, 4)}")
    print(f"Recall         : {np.round(recall, 4)}")

def get_predictions(model, loader, device, is_rnn=True):
    model.eval()
    all_probs = []
    all_labels = []

    with torch.no_grad():
        if is_rnn:
            for X_te, lengths, y_te in loader:
                X_te = X_te.to(device)
                y_te = y_te.view(-1)

                logits = model(X_te, lengths)
                probs = torch.sigmoid(logits).view(-1)

                all_probs.append(probs.cpu())
                all_labels.append(y_te.cpu())
        else:
            for X_te, mask, y_te in loader:
                X_te, mask = X_te.to(device), mask.to(device)
                y_te = y_te.view(-1)

                logits = model(X_te, mask)
                probs = torch.sigmoid(logits).view(-1)

                all_probs.append(probs.cpu())
                all_labels.append(y_te.cpu())

    all_probs = torch.cat(all_probs).numpy()
    all_labels = torch.cat(all_labels).numpy()
    return all_labels, all_probs

# Train RNN Model

In [7]:
from models import IMDBRNN
device = 'cuda' if torch.cuda.is_available() else 'cpu'

vocab_size = 20000
embedding_dim = 128
hidden_dim = 256
padding_idx = 1
num_layers = 2
dropout=0.3

clf_rnn = IMDBRNN(input_size=vocab_size,
                  embed_dim=embedding_dim,
                  hidden_size=hidden_dim,
                  num_layers=num_layers,
                  padding_idx=padding_idx,
                  dropout=dropout).to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(clf_rnn.parameters(), lr=0.001)

for epoch in range(10):
  total_loss = 0
  clf_rnn.train()
  for X_tr, lengths, y_tr in train_loader_rnn:
    X_tr, lengths, y_tr = X_tr.to(device), lengths, y_tr.to(device)
    loss = criterion(clf_rnn(X_tr, lengths), y_tr)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    total_loss += loss.item()
  print(f"epoch: {epoch + 1}, loss: {total_loss / len(train_loader_rnn)}")

epoch: 1, loss: 0.693548228887036
epoch: 2, loss: 0.6812971828843627
epoch: 3, loss: 0.6592243470041953
epoch: 4, loss: 0.6923161525555583
epoch: 5, loss: 0.663967124641399
epoch: 6, loss: 0.6396287109540857
epoch: 7, loss: 0.6182453757356805
epoch: 8, loss: 0.5812979475464053
epoch: 9, loss: 0.607650258580742
epoch: 10, loss: 0.5761672301060708


#Train LSTM Model

In [8]:
from models import IMDBLSTM

clf_lstm = IMDBLSTM(input_size=vocab_size,
                  embed_dim=embedding_dim,
                  hidden_size=hidden_dim,
                  num_layers=num_layers,
                  padding_idx=padding_idx,
                  dropout=dropout).to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(clf_lstm.parameters(), lr=0.001)

for epoch in range(10):
  total_loss = 0
  clf_lstm.train()
  for X_tr, lengths, y_tr in train_loader_rnn:
    X_tr, lengths, y_tr = X_tr.to(device), lengths, y_tr.to(device)
    loss = criterion(clf_lstm(X_tr, lengths), y_tr)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    total_loss += loss.item()
  print(f"epoch: {epoch + 1}, loss: {total_loss / len(train_loader_rnn)}")

epoch: 1, loss: 0.6737376314294917
epoch: 2, loss: 0.6434278651271634
epoch: 3, loss: 0.4435680077585113
epoch: 4, loss: 0.3252965375742949
epoch: 5, loss: 0.2572475076483948
epoch: 6, loss: 0.21748010915182436
epoch: 7, loss: 0.1836921989803424
epoch: 8, loss: 0.15366816096712865
epoch: 9, loss: 0.12856053744854828
epoch: 10, loss: 0.10535828338559632


#Train Tramsformer Model

In [11]:
from models import TransformerClassifier

vocab_size = 20000
d_model = 128
num_heads = 8
num_layers = 2
d_ff = 512
num_classes = 1
max_len = 512
dropout = 0.3

clf_tr = TransformerClassifier(
    vocab_size=vocab_size,
    d_model=d_model,
    num_heads=num_heads,
    num_layers=num_layers,
    d_ff=d_ff,
    num_classes=num_classes,
    max_len=max_len,
    dropout=dropout
).to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(clf_tr.parameters(), lr=0.0001)

In [12]:
for epoch in range(10):
    total_loss = 0
    clf_tr.train()

    for X_tr, mask, y_tr in train_loader_tr:
        X_tr, mask = X_tr.to(device), mask.to(device)
        y_tr = y_tr.to(device).view(-1, 1)

        logits = clf_tr(X_tr, mask)
        loss = criterion(logits, y_tr)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"epoch: {epoch + 1}, loss: {total_loss / len(train_loader_tr):.4f}")

epoch: 1, loss: 0.5876
epoch: 2, loss: 0.4581
epoch: 3, loss: 0.4141
epoch: 4, loss: 0.3818
epoch: 5, loss: 0.3557
epoch: 6, loss: 0.3366
epoch: 7, loss: 0.3216
epoch: 8, loss: 0.3025
epoch: 9, loss: 0.2917
epoch: 10, loss: 0.2794


rnn train score

In [13]:
labels, probs = get_predictions(clf_rnn, train_loader_rnn, device, True)
evaluation(labels, probs)

Accuracy Score: 0.7603
ROC AUC Score : 0.8373
Precision      : 0.7737
Recall         : 0.7358


LSTM train score

In [14]:
labels, probs = get_predictions(clf_lstm, train_loader_rnn, device, True)
evaluation(labels, probs)

Accuracy Score: 0.9864
ROC AUC Score : 0.9989
Precision      : 0.9928
Recall         : 0.9798


Transformer train score

In [15]:
labels, probs = get_predictions(clf_tr, train_loader_tr, device, is_rnn=False)
evaluation(labels, probs)

Accuracy Score: 0.9193
ROC AUC Score : 0.9739
Precision      : 0.9095
Recall         : 0.9313


# Test Score

In [16]:
# 1. RNN
print("="*30)
print("RNN Test Set Evaluation")
labels_rnn_test, probs_rnn_test = get_predictions(clf_rnn, test_loader_rnn, device, is_rnn=True)
evaluation(labels_rnn_test, probs_rnn_test)

# 2. LSTM
print("\n" + "="*30)
print("LSTM Test Set Evaluation")
labels_lstm_test, probs_lstm_test = get_predictions(clf_lstm, test_loader_rnn, device, is_rnn=True)
evaluation(labels_lstm_test, probs_lstm_test)

# 3. Transformer
print("\n" + "="*30)
print("Transformer Test Set Evaluation")
labels_tr_test, probs_tr_test = get_predictions(clf_tr, test_loader_tr, device, is_rnn=False)
evaluation(labels_tr_test, probs_tr_test)
print("="*30)

RNN Test Set Evaluation
Accuracy Score: 0.6416
ROC AUC Score : 0.6965
Precision      : 0.6511
Recall         : 0.6104

LSTM Test Set Evaluation
Accuracy Score: 0.8798
ROC AUC Score : 0.9498
Precision      : 0.9063
Recall         : 0.8472

Transformer Test Set Evaluation
Accuracy Score: 0.8582
ROC AUC Score : 0.9355
Precision      : 0.8494
Recall         : 0.8708
