In [None]:
!pip install transformer_lens
try:
    import google.colab # type: ignore
    IN_COLAB = True
except:
    IN_COLAB = False

import os, sys

if IN_COLAB:
    # Install packages
    %pip install einops
    %pip install jaxtyping
    %pip install transformer_lens
    %pip install git+https://github.com/callummcdougall/CircuitsVis.git#subdirectory=python

In [None]:
!pip install --upgrade torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

In [1]:
import os 
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [2]:
import torch
from transformer_lens import HookedTransformer

if torch.cuda.is_available():
    device = "cuda" # Use NVIDIA GPU (if available)
elif torch.backends.mps.is_available():
    device = "mps" # Use Apple Silicon GPU (if available)
else:
    device = "cpu" # Default to CPU if no GPU is available

In [None]:
from transformers import GPT2Tokenizer
from torch.nn import CrossEntropyLoss
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from datasets import load_dataset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data_path = "/home/sohampoddar/HDD2/utsav/Dataset/IMDB Dataset.csv"
# Load IMDB dataset (3000 train, 1000 test)
dataset = load_dataset('csv', data_files= data_path)
train_subset = dataset["train"].shuffle(seed=42).select(range(7000))
test_subset = dataset["train"].shuffle(seed=42).select(range(2000))
dataset = {"train": train_subset, "test": test_subset}

# Load GPT-2 tokenizer and set pad token
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

# Load HookedTransformer GPT-2 model
base_model = HookedTransformer.from_pretrained("gpt2-small", device=device)
# Dont Freeze GPT-2 layers (optional, speeds up training) 
for param in base_model.parameters():
    param.requires_grad = True

# Save initial model state
# torch.save(model.state_dict(), "gpt2-small-imdb-sentiment.pt")

# Define label mapping
label_map = {"negative": 0, "positive": 1}


Loaded pretrained model gpt2-small into HookedTransformer


In [4]:
class SentimentClassifier(nn.Module):
    def __init__(self, transformer, hidden_dim=768, num_classes=2):
        super(SentimentClassifier, self).__init__()
        self.transformer = transformer
        self.classifier = nn.Linear(50257, num_classes)  # Maps hidden state → sentiment classes

    def forward(self, input_ids, attention_mask):
        outputs = self.transformer(input_ids)  # (batch, seq_len, hidden_dim)
        pooled_output = outputs.mean(dim=1)  # Mean pool across sequence length
        logits = self.classifier(pooled_output)  # Shape: (batch_size, 2)
        return logits

from torch.utils.data import DataLoader, Dataset

class IMDBDataset(Dataset):
    def __init__(self, dataset, tokenizer, max_length=512):
        self.tokenizer = tokenizer
        self.texts = dataset["review"]
        self.labels = [label_map[label] for label in dataset["sentiment"]]
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encodings = self.tokenizer(
            self.texts[idx],
            padding="max_length",
            truncation=True,
            max_length=self.max_length,
            return_tensors="pt"
        )
        input_ids = encodings["input_ids"].squeeze()
        attention_mask = encodings["attention_mask"].squeeze()
        label = torch.tensor(self.labels[idx], dtype=torch.long)
        return input_ids, attention_mask, label

In [5]:
from torch.nn import CrossEntropyLoss
import torch.optim as optim
from tqdm import tqdm
from datasets import load_dataset

train_dataset = IMDBDataset(dataset["train"], tokenizer)
test_dataset = IMDBDataset(dataset["test"], tokenizer)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)

# Define optimizer & loss function
optimizer = optim.AdamW(base_model.parameters(), lr=5e-5)
criterion = CrossEntropyLoss()

In [None]:
base_model = SentimentClassifier(base_model).to(device)
base_model

In [7]:
epochs = 3
base_model.train()
for epoch in range(epochs):
    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}")
    for input_ids, attention_mask, labels in loop:
        input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)

        # Forward pass
        logits = base_model(input_ids,attention_mask)
        loss = criterion(logits, labels)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loop.set_postfix(loss=loss.item())

# Save fine-tuned model
torch.save(base_model.state_dict(), "gpt2-imdb-finetuned.pt")

Epoch 1: 100%|██████████| 1750/1750 [06:52<00:00,  4.24it/s, loss=0.113]  
Epoch 2: 100%|██████████| 1750/1750 [06:53<00:00,  4.23it/s, loss=0.332]  
Epoch 3: 100%|██████████| 1750/1750 [06:53<00:00,  4.23it/s, loss=0.0257]  
