In [4]:
import random
import numpy as np
from tqdm import tqdm

import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchtext.datasets import IMDB

from transformers import AdamW, get_linear_schedule_with_warmup

In [5]:
SEED=1234
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
class IMDBDataset(Dataset):
    def __init__(self, data, tokenizer):
        self.data = data
        self.tokenizer = tokenizer
        self.max_len = 512

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        (label, text) = self.data[index]
        conv_label = 1 if label == 2 else 0
        encoding = self.tokenizer(
            text,
            truncation=True,
            max_length=self.max_len,
            padding='max_length',
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'label': torch.tensor(conv_label, dtype=torch.long)
        }


In [7]:
from transformers import BertTokenizer,BertForSequenceClassification

tokenizer=BertTokenizer.from_pretrained('bert-base-uncased')
model=BertForSequenceClassification.from_pretrained('bert-base-uncased',num_labels=2)
model.to(device)



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [8]:
train_iter, test_iter = IMDB(split=('train', 'test'))
train_dataset=IMDBDataset(list(train_iter),tokenizer)
test_dataset=IMDBDataset(list(test_iter),tokenizer)
train_dataloader=DataLoader(train_dataset,
                            batch_size=8,
                            shuffle=True)
test_dataloader=DataLoader(test_dataset,
                            batch_size=8,
                            shuffle=False)

In [9]:
optimizer=AdamW(model.parameters(),lr=2e-5)
num_training_steps=len(train_dataloader)*3
scheduler=get_linear_schedule_with_warmup(optimizer,num_warmup_steps=0,num_training_steps=num_training_steps)



In [10]:
num_epochs=3
for epoch in range(num_epochs):
    model.train()
    total_loss=0

    for batch in tqdm(train_dataloader):
        input_ids=batch['input_ids'].to(device)
        attention_mask=batch['attention_mask'].to(device)
        labels=batch['label'].to(device)

        optimizer.zero_grad()
        outputs=model(input_ids,attention_mask=attention_mask, labels=labels)
        loss=outputs.loss
        total_loss+=loss.item()

        loss.backward()
        optimizer.step()
        scheduler.step()

    average_loss=total_loss/len(train_dataloader)
    print(f"Epoch {epoch+1}, Average Loss: {average_loss}")


  attn_output = torch.nn.functional.scaled_dot_product_attention(
100%|██████████| 3125/3125 [10:28<00:00,  4.97it/s]


Epoch 1, Average Loss: 0.2328582075767219


100%|██████████| 3125/3125 [10:24<00:00,  5.01it/s]


Epoch 2, Average Loss: 0.10221560388620943


100%|██████████| 3125/3125 [10:26<00:00,  4.99it/s]

Epoch 3, Average Loss: 0.03903465069940314





In [11]:
model.eval()
correct=0
total=0

with torch.no_grad():
    for batch in tqdm(test_dataloader):
        input_ids=batch['input_ids'].to(device)
        attention_mask=batch['attention_mask'].to(device)
        labels=batch['label'].to(device)

        outputs=model(input_ids,attention_mask=attention_mask)
        logits=outputs.logits
        predictions=F.softmax(logits,dim=1)
        predicted_labels=torch.argmax(predictions,dim=1)

        total+=labels.size(0)
        correct+=(predicted_labels==labels).sum().item()

accuracy=correct/total
print(f"Validation Accuracy: {accuracy*100:.2f}%")

100%|██████████| 3125/3125 [03:47<00:00, 13.71it/s]

Validation Accuracy: 94.19%



