In [2]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from peft import LoraConfig, get_peft_model, TaskType
from sklearn.metrics import accuracy_score, classification_report
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.optim as optim

In [3]:
dataset = load_dataset("imdb")
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=256)

tokenized = dataset.map(tokenize, batched=True)
tokenized.set_format("torch", columns=["input_ids", "attention_mask", "label"])
train_loader = DataLoader(tokenized["train"], batch_size=16, shuffle=True)
test_loader = DataLoader(tokenized["test"], batch_size=16)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

In [4]:
def train(model, loader, epochs=3):
    model.train()
    optimizer = optim.AdamW(model.parameters(), lr=2e-5)
    loss_fn = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        total_loss = 0
        all_preds, all_labels = [], []

        for batch in loader:
            optimizer.zero_grad()
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["label"].to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            loss = loss_fn(outputs.logits, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            preds = outputs.logits.argmax(dim=-1).detach().cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(labels.detach().cpu().numpy())

        acc = accuracy_score(all_labels, all_preds)
        print(f"Epoch {epoch+1} - Loss: {total_loss:.2f} - Accuracy: {acc:.4f}")

def evaluate(model, loader):
    model.eval()
    all_preds, all_labels = [], []

    with torch.no_grad():
        for batch in loader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["label"].to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            preds = outputs.logits.argmax(dim=-1).detach().cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(labels.detach().cpu().numpy())

    print("Test Accuracy:", accuracy_score(all_labels, all_preds))
    print(classification_report(all_labels, all_preds))

In [5]:
print("FULL FINETUNING")
model_full = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)
train(model_full, train_loader)
evaluate(model_full, test_loader)

FULL FINETUNING


config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1 - Loss: 443.41 - Accuracy: 0.8808
Epoch 2 - Loss: 261.30 - Accuracy: 0.9368
Epoch 3 - Loss: 135.86 - Accuracy: 0.9716
Test Accuracy: 0.9062
              precision    recall  f1-score   support

           0       0.90      0.91      0.91     12500
           1       0.91      0.90      0.91     12500

    accuracy                           0.91     25000
   macro avg       0.91      0.91      0.91     25000
weighted avg       0.91      0.91      0.91     25000



In [6]:
from transformers import AutoModelForSequenceClassification
from peft import LoraConfig, get_peft_model, TaskType

print("LORA FINETUNING")

model_full = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2).to(device)

lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    target_modules=["q_lin", "v_lin"]
)

model_lora = get_peft_model(model_full, lora_config).to(device)
model_lora.print_trainable_parameters()

train(model_lora, train_loader)
evaluate(model_lora, test_loader)

LORA FINETUNING


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 739,586 || all params: 67,694,596 || trainable%: 1.0925
Epoch 1 - Loss: 600.52 - Accuracy: 0.8287
Epoch 2 - Loss: 457.60 - Accuracy: 0.8770
Epoch 3 - Loss: 428.04 - Accuracy: 0.8868
Test Accuracy: 0.89132
              precision    recall  f1-score   support

           0       0.89      0.90      0.89     12500
           1       0.90      0.89      0.89     12500

    accuracy                           0.89     25000
   macro avg       0.89      0.89      0.89     25000
weighted avg       0.89      0.89      0.89     25000

