In [1]:
!pip install torchtext==0.13.0
!pip install torchdata==0.4.0

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchtext==0.13.0
  Downloading torchtext-0.13.0-cp38-cp38-manylinux1_x86_64.whl (1.9 MB)
[K     |████████████████████████████████| 1.9 MB 34.0 MB/s 
Collecting torch==1.12.0
  Downloading torch-1.12.0-cp38-cp38-manylinux1_x86_64.whl (776.3 MB)
[K     |████████████████████████████████| 776.3 MB 11 kB/s 
Installing collected packages: torch, torchtext
  Attempting uninstall: torch
    Found existing installation: torch 1.13.0+cu116
    Uninstalling torch-1.13.0+cu116:
      Successfully uninstalled torch-1.13.0+cu116
  Attempting uninstall: torchtext
    Found existing installation: torchtext 0.14.0
    Uninstalling torchtext-0.14.0:
      Successfully uninstalled torchtext-0.14.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torchvision 0.14.0+

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
%cd /content/drive/MyDrive/DL

/content/drive/MyDrive/DL


In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import math

from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data.dataset import random_split


from tqdm import tqdm

import importlib

from datetime import datetime as dt
import time

import imdb_voc


root = "./"

# import sentences
importlib.reload(imdb_voc)

# set device
dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")

"""

You can implement any necessary methods.

"""


class MultiHeadAttention(nn.Module):
    def __init__(self, d_model, d_Q, d_K, d_V, numhead, dropout):
        super().__init__()
        self.d_Qs = []
        self.d_Ks = []
        self.d_Vs = []

        for _ in range(numhead):
            self.d_Qs.append(nn.Linear(d_model, d_Q).to(dev))
            self.d_Ks.append(nn.Linear(d_model, d_K).to(dev))
            self.d_Vs.append(nn.Linear(d_model, d_V).to(dev))

        self.d_K = d_K

        self.join_layer = nn.Linear(d_model, d_model).to(dev)
        self.dropout = nn.Dropout(dropout)

    def scaled_dot_product(self, q, k, v, src_batch_lens):
        softmax = nn.Softmax(dim=0)
        a = torch.matmul(q, k.transpose(1, 2))
        a = a / math.sqrt(self.d_K)
        a = softmax(a)
        a = self.dropout(a)
        a = torch.matmul(a, v)
        return a

    def forward(self, x_Q, x_K, x_V, src_batch_lens=None):
        # Q2. Implement
        heads_list = []
        for lq, lk, lv in zip(
            self.d_Qs, self.d_Ks, self.d_Vs
        ):
            q = lq(x_Q)
            k = lk(x_K)
            v = lv(x_V)
            head_list = self.scaled_dot_product(q, k, v, src_batch_lens)
            heads_list.append(head_list)

        out_join = torch.concat(heads_list, dim=2)
        out = self.join_layer(out_join)
        out = self.dropout(out)
        return out


class TF_Encoder_Block(nn.Module):
    def __init__(self, d_model, d_ff, numhead, dropout):
        super().__init__()
        d = int(d_model / numhead)
        self.Multi_head_attention = MultiHeadAttention(d_model, d, d, d, numhead, dropout)
        self.feed_foward_layer = nn.Sequential(
            nn.Linear(d_model, d_ff),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(d_ff, d_model),
            nn.ReLU(),
        ).to(dev)

    def forward(self, x: torch.Tensor, src_batch_lens):
        
        x_Q = x.clone().to(dev)
        x_K = x.clone().to(dev)
        x_V = x.clone().to(dev)
        x = x + self.Multi_head_attention(x_Q, x_K, x_V, src_batch_lens)



        # Q4. Implment forward function for transformer encoder block
        self.norm_layer = nn.LayerNorm(x.shape[1:]).to(dev)
        x_normalize = self.norm_layer(x)

        x_feed_foward = self.feed_foward_layer(x_normalize)

        self.feed_and_norm_layer = nn.LayerNorm(x_feed_foward.shape[1:]).to(dev)
        out = self.feed_and_norm_layer(x_feed_foward)

       
        return out


""" 
Positional encoding
PE(pos,2i) = sin(pos/10000**(2i/dmodel))
PE(pos,2i+1) = cos(pos/10000**(2i/dmodel))
"""


def PosEncoding(t_len, d_model):
    i = torch.tensor(range(d_model))
    pos = torch.tensor(range(t_len))
    POS, I = torch.meshgrid(pos, i)
    PE = (1 - I % 2) * torch.sin(POS / 10 ** (4 * I / d_model)) + (
        I % 2
    ) * torch.cos(POS / 10 ** (4 * (I - 1) / d_model))
    return PE


class TF_Encoder(nn.Module):
    def __init__(self, vocab_size, d_model, d_ff, numlayer, numhead, dropout):
        super().__init__()

        self.numlayer = numlayer
        self.src_embed = nn.Embedding(
            num_embeddings=vocab_size, embedding_dim=d_model
        )
        self.dropout = nn.Dropout(dropout)

        # Q5. Implement a sequence of numlayer encoder blocks
        self.encoder_layer = []
        for _ in range(numlayer):
            self.encoder_layer.append(TF_Encoder_Block(d_model, d_ff, numhead, dropout))


    def forward(self, x, src_batch_lens):
        # x.shape = (B, T_S) where B is batch size, T_S is seq length of tokens
        x_embed = self.src_embed(x)  # x_embed.shape = (B, T_S, d_model)
        x = self.dropout(x_embed)  # x.shape = (B, T_S, d_model)
        p_enc = PosEncoding(x.shape[1], x.shape[2]).to(
            dev
        )  # p_enc.shape = (T_S, d_model)
        x = x + p_enc  # x.shape = (B, T_S, d_model)

        # Q6. Implement: forward over numlayer encoder blocks
        for i in self.encoder_layer:
            x = i(x, src_batch_lens)

        out = x
        # out should be (B, T_S, d_model)
        return out


"""

main model

"""


class sentiment_classifier(nn.Module):
    def __init__(
        self,
        enc_input_size,
        enc_d_model,
        enc_d_ff,
        enc_num_layer,
        enc_num_head,
        dropout,
    ):
        super().__init__()

        self.encoder = TF_Encoder(
            vocab_size=enc_input_size,
            d_model=enc_d_model,
            d_ff=enc_d_ff,
            numlayer=enc_num_layer,
            numhead=enc_num_head,
            dropout=dropout,
        )

        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d((1, None)),
            nn.Dropout(dropout),
            nn.Linear(in_features=enc_d_model, out_features=enc_d_model),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(in_features=enc_d_model, out_features=1),
        )

    def forward(self, x, x_lens):
        src_ctx = self.encoder(x, src_batch_lens=x_lens)
        # size should be (b,)
        out_logits = self.classifier(src_ctx).flatten()

        return out_logits


"""

datasets

"""

# Load IMDB dataset
# once you build the dataset, you can load it from file to save time
# to load from file, set this flag True
load_imdb_dataset = False

if load_imdb_dataset:
    imdb_dataset = torch.load("imdb_dataset.pt")
else:
    imdb_dataset = imdb_voc.IMDB_tensor_dataset()
    torch.save(imdb_dataset, "imdb_dataset.pt")

train_dataset, test_dataset = imdb_dataset.get_dataset()

split_ratio = 0.85
num_train = int(len(train_dataset) * split_ratio)
split_train, split_valid = random_split(
    train_dataset, [num_train, len(train_dataset) - num_train]
)

# Set hyperparam (batch size)
batch_size_trn = 256
batch_size_val = 256
batch_size_tst = 256

train_dataloader = DataLoader(
    split_train, batch_size=batch_size_trn, shuffle=True
)
val_dataloader = DataLoader(
    split_valid, batch_size=batch_size_val, shuffle=True
)
test_dataloader = DataLoader(
    test_dataset, batch_size=batch_size_tst, shuffle=True
)

# get character dictionary
src_word_dict = imdb_dataset.src_stoi
src_idx_dict = imdb_dataset.src_itos

SRC_PAD_IDX = src_word_dict["<PAD>"]

# show sample reviews with pos/neg sentiments

show_sample_reviews = True

if show_sample_reviews:

    sample_text, sample_lab = next(iter(train_dataloader))
    slist = []

    for stxt in sample_text[:4]:
        slist.append([src_idx_dict[j] for j in stxt])

    for j, s in enumerate(slist):
        print("positive" if sample_lab[j] == 1 else "negative")
        print(" ".join([i for i in s if i != "<PAD>"]) + "\n")


"""

model

"""

enc_vocab_size = len(src_word_dict)  # counting eof, one-hot vector goes in

# Set hyperparam (model size)
# examples: model & ff dim - 8, 16, 32, 64, 128, numhead, numlayer 1~4

enc_d_model = 64

enc_d_ff = 128

enc_num_head = 4

enc_num_layer = 4

DROPOUT = 0.1

model = sentiment_classifier(
    enc_input_size=enc_vocab_size,
    enc_d_model=enc_d_model,
    enc_d_ff=enc_d_ff,
    enc_num_head=enc_num_head,
    enc_num_layer=enc_num_layer,
    dropout=DROPOUT,
)

model = model.to(dev)

"""

optimizer

"""

# Set hyperparam (learning rate)
# examples: 1e-3 ~ 1e-5

lr = 1e-3

optimizer = torch.optim.Adam(model.parameters(), lr=lr)

criterion = nn.BCEWithLogitsLoss()

"""

auxiliary functions

"""


# get length of reviews in batch
def get_lens_from_tensor(x):
    # lens (batch, t)
    lens = torch.ones_like(x).long()
    lens[x == SRC_PAD_IDX] = 0
    return torch.sum(lens, dim=-1)


def get_binary_metrics(y_pred, y):
    # find number of TP, TN, FP, FN
    TP = sum(((y_pred == 1) & (y == 1)).type(torch.int32))
    FP = sum(((y_pred == 1) & (y == 0)).type(torch.int32))
    TN = sum(((y_pred == 0) & (y == 0)).type(torch.int32))
    FN = sum(((y_pred == 0) & (y == 1)).type(torch.int32))
    accy = (TP + TN) / (TP + FP + TN + FN)

    recall = TP / (TP + FN) if TP + FN != 0 else 0
    prec = TP / (TP + FP) if TP + FP != 0 else 0
    f1 = 2 * recall * prec / (recall + prec) if recall + prec != 0 else 0

    return accy, recall, prec, f1


"""

train/validation

"""


def train(model, dataloader, optimizer, criterion, clip):

    model.train()

    epoch_loss = 0

    for i, batch in enumerate(dataloader):

        src = batch[0].to(dev)
        trg = batch[1].float().to(dev)

        # print('batch trg.shape', trg.shape)
        # print('batch src.shape', src.shape)

        optimizer.zero_grad()

        x_lens = get_lens_from_tensor(src).to(dev)

        output = model(x=src, x_lens=x_lens)

        output = output.contiguous().view(-1)
        trg = trg.contiguous().view(-1)

        loss = criterion(output, trg)

        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)

        optimizer.step()

        epoch_loss += loss.item()

    return epoch_loss / len(dataloader)


def evaluate(model, dataloader, criterion):

    model.eval()

    epoch_loss = 0

    epoch_accy = 0
    epoch_recall = 0
    epoch_prec = 0
    epoch_f1 = 0

    with torch.no_grad():
        for i, batch in enumerate(dataloader):

            src = batch[0].to(dev)
            trg = batch[1].float().to(dev)

            x_lens = get_lens_from_tensor(src).to(dev)

            output = model(x=src, x_lens=x_lens)

            output = output.contiguous().view(-1)
            trg = trg.contiguous().view(-1)

            loss = criterion(output, trg)

            accy, recall, prec, f1 = get_binary_metrics(
                (output >= 0).long(), trg.long()
            )
            epoch_accy += accy
            epoch_recall += recall
            epoch_prec += prec
            epoch_f1 += f1

            epoch_loss += loss.item()

    # show accuracy
    print(f"\tAccuracy: {epoch_accy/(len(dataloader)):.3f}")

    return epoch_loss / len(dataloader)


def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs


"""

Training loop

"""

N_EPOCHS = 30
CLIP = 1

best_valid_loss = float("inf")

for epoch in range(N_EPOCHS):

    start_time = time.time()

    train_loss = train(model, train_dataloader, optimizer, criterion, CLIP)
    valid_loss = evaluate(model, val_dataloader, criterion)

    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), "model.pt")

    print(f"Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s")
    print(f"\tTrain Loss: {train_loss:.3f} | Val. Loss: {valid_loss:.3f}")

"""

Test loop

"""
print("*** Now test phase begins! ***")
model.load_state_dict(torch.load("model.pt"))

test_loss = evaluate(model, test_dataloader, criterion)

print(f"| Test Loss: {test_loss:.3f}")

negative
i think its time for seagal to go quietly into the night . what i have just seen makes all his direct to video releases in the last few years look like his early 90 ' s smash hits in comparison . a secret bio lab is making a new kind of drug that <UNK> up a human ' s adrenaline system to the point where they become psychopathic killers or something . somehow seagal is supposed to stop the infection or its the end of the world . . . or something . seagal also went through hit <UNK> like <UNK> , every time i look up he was commanding a new face so it kinda got hard to follow character development as well i know steven ' s <UNK> prevent him from yelling at the top of his lungs but even so why is he constantly being dubbed by people who sound nothing like him ? usually the films plot and action sequences can save it from being a total waste of time but this was not even close . like i said , it was more of a horror movie with a lot of blood and <UNK> stabbing rather than straight 

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


	Accuracy: 0.517
Epoch: 01 | Time: 0m 31s
	Train Loss: 0.694 | Val. Loss: 0.693
	Accuracy: 0.523
Epoch: 02 | Time: 0m 31s
	Train Loss: 0.694 | Val. Loss: 0.692
	Accuracy: 0.532
Epoch: 03 | Time: 0m 31s
	Train Loss: 0.692 | Val. Loss: 0.690
	Accuracy: 0.576
Epoch: 04 | Time: 0m 31s
	Train Loss: 0.689 | Val. Loss: 0.684
	Accuracy: 0.611
Epoch: 05 | Time: 0m 31s
	Train Loss: 0.681 | Val. Loss: 0.668
	Accuracy: 0.686
Epoch: 06 | Time: 0m 32s
	Train Loss: 0.659 | Val. Loss: 0.628
	Accuracy: 0.753
Epoch: 07 | Time: 0m 32s
	Train Loss: 0.611 | Val. Loss: 0.554
	Accuracy: 0.771
Epoch: 08 | Time: 0m 32s
	Train Loss: 0.544 | Val. Loss: 0.489
	Accuracy: 0.800
Epoch: 09 | Time: 0m 32s
	Train Loss: 0.487 | Val. Loss: 0.436
	Accuracy: 0.824
Epoch: 10 | Time: 0m 32s
	Train Loss: 0.453 | Val. Loss: 0.399
	Accuracy: 0.830
Epoch: 11 | Time: 0m 32s
	Train Loss: 0.415 | Val. Loss: 0.381
	Accuracy: 0.841
Epoch: 12 | Time: 0m 32s
	Train Loss: 0.387 | Val. Loss: 0.370
	Accuracy: 0.842
Epoch: 13 | Time: 0m 32