In [None]:
from utils import create_imdb_dataloaders
from torch import optim
import torch
import torch.nn as nn

# Download and load IMDB dataset from Stanford dataset

In [1]:
!curl -O https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 80.2M  100 80.2M    0     0  4398k      0  0:00:18  0:00:17  0:00:01 5510k2M   69 55.4M    0     0  4094k      0  0:00:20  0:00:13  0:00:07 5411k    0  4444k      0  0:00:18  0:00:18 --:--:-- 5495k


In [9]:
!tar -xf aclImdb_v1.tar.gz

In [None]:
train_dir = "aclImdb/train"
train_dataloader, vocab = create_imdb_dataloaders(train_dir)

In [None]:
test_dir = "aclImdb/test"
test_dataloader, vocab = create_imdb_dataloaders(test_dir)

# Define transformer model for classification 0 to 1

In [6]:
from transformer_encoder import TransformerEncoder
from positional_embedding import PositionalEmbedding

class Transformer(nn.Module):
    def __init__(self, embed_dim, dense_dim, num_heads, vocab_size, **kwargs):
        super(Transformer, self).__init__()

        self.embedding = PositionalEmbedding(vocab_size, embed_dim)

        self.encoder = TransformerEncoder(embed_dim, dense_dim, num_heads)

        self.global_max_pool = nn.AdaptiveMaxPool1d(1)
        self.out = nn.Linear(embed_dim, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, text, mask=None):
        embedded = self.embedding(text)
        encoder_output = self.encoder(embedded, mask)
        output = encoder_output.max(dim=1)[0]
        output = output.squeeze(-1)
        output = self.out(output)
        output = self.sigmoid(output)
        return output

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

embed_dim = 128
num_heads = 2
dense_dim = 32

transformer = Transformer(embed_dim, dense_dim, num_heads, 20000).to(device)

rmsprop = optim.RMSprop(params=transformer.parameters(), lr=0.0001)
criterion = nn.BCELoss()

  from .autonotebook import tqdm as notebook_tqdm


# Training

In [8]:
for epoch in range(10):
    transformer.train()
    correct_predictions = 0
    total_predictions = 0


    for batch in train_dataloader:
        text, label = batch

        rmsprop.zero_grad()

        output = transformer(text.to(device))

        loss = criterion(output[:, 0], label.to(device).float())

        correct_predictions += (output[:, 0] > 0.5).eq(label.to(device)).sum().item()
        total_predictions += len(label)


        loss.backward()
        rmsprop.step()


    print(f"Epoch: {epoch+1}, Loss: {loss.item()}, Accuracy: {correct_predictions / total_predictions * 100}")


Epoch: 1, Loss: 0.7373784780502319, Accuracy: 52.952
Epoch: 2, Loss: 0.63242107629776, Accuracy: 62.928
Epoch: 3, Loss: 0.44436052441596985, Accuracy: 69.34
Epoch: 4, Loss: 0.4980710446834564, Accuracy: 72.14
Epoch: 5, Loss: 0.29597508907318115, Accuracy: 74.856
Epoch: 6, Loss: 0.3648530840873718, Accuracy: 77.164
Epoch: 7, Loss: 0.5637338757514954, Accuracy: 78.928
Epoch: 8, Loss: 0.3411375880241394, Accuracy: 80.744
Epoch: 9, Loss: 0.2823878824710846, Accuracy: 82.216
Epoch: 10, Loss: 0.2987263798713684, Accuracy: 83.532


In [9]:
correct_predictions = 0
total_predictions = 0
for batch in test_dataloader:
    text, label = batch

    output = transformer(text.to(device))

    loss = criterion(output[:, 0], label.to(device).float())

    correct_predictions += (output[:, 0] > 0.5).eq(label.to(device)).sum().item()
    total_predictions += len(label)

print(f"Loss: {loss.item()}, Accuracy: {correct_predictions / total_predictions * 100}")

Loss: 1.269362211227417, Accuracy: 53.068000000000005


# AG News dataset

In [1]:
from utils import create_ag_dataloader


train_dataloader, vocab = create_ag_dataloader("ag_news/train.csv")

In [2]:
for batch in train_dataloader:
    label, text = batch
    print(label, text)
    break

tensor([[9691, 3373,    9,  ...,    0,    0,    0],
        [ 844,    6, 1568,  ...,    0,    0,    0],
        [ 189,   97, 8637,  ...,    0,    0,    0],
        ...,
        [   3, 2342,   11,  ...,    0,    0,    0],
        [2353, 2196,  240,  ...,    0,    0,    0],
        [8872, 6664,   17,  ...,    0,    0,    0]]) tensor([[0, 1, 0, 0],
        [0, 1, 0, 0],
        [0, 0, 0, 1],
        [0, 0, 0, 1],
        [0, 1, 0, 0],
        [0, 0, 1, 0],
        [0, 0, 1, 0],
        [0, 0, 1, 0],
        [0, 1, 0, 0],
        [0, 0, 0, 1],
        [0, 0, 0, 1],
        [0, 0, 1, 0],
        [0, 1, 0, 0],
        [0, 0, 1, 0],
        [0, 0, 1, 0],
        [0, 1, 0, 0],
        [0, 0, 1, 0],
        [0, 1, 0, 0],
        [1, 0, 0, 0],
        [0, 1, 0, 0],
        [0, 0, 1, 0],
        [0, 0, 0, 1],
        [1, 0, 0, 0],
        [0, 0, 1, 0],
        [0, 0, 1, 0],
        [0, 1, 0, 0],
        [0, 0, 0, 1],
        [0, 0, 0, 1],
        [0, 0, 0, 1],
        [0, 0, 0, 1],
        [0, 1,