In [7]:
# !pip install -U transformers datasets evaluate accelerate



In [8]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [9]:
import os
SAVE_ROOT = "/content/drive/MyDrive/SST2"
os.makedirs(SAVE_ROOT, exist_ok=True)
print("Saving models to:", SAVE_ROOT)

Saving models to: /content/drive/MyDrive/SST2


In [10]:
import torch
print("CUDA available:", torch.cuda.is_available())
print("Device count:", torch.cuda.device_count())
if torch.cuda.is_available():
    print("GPU name:", torch.cuda.get_device_name(0))

CUDA available: True
Device count: 1
GPU name: Tesla T4


In [13]:
import torch, time, csv
from transformers import (
    AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
)
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, TaskType
import evaluate, numpy as np, random

# ----------------------
# Global settings
# ----------------------
MODEL_NAME = "roberta-base"
TASK = "sst2"
EPOCHS = 3
SEEDS = [42, 123, 2025]
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# ----------------------
# Prepare dataset
# ----------------------
ds = load_dataset("glue", TASK)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)

def preprocess(batch):
    return tokenizer(batch["sentence"], truncation=True, max_length=128)
ds = ds.map(preprocess, batched=True)
ds = ds.rename_column("label", "labels")
ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

# ----------------------
# Metric
# ----------------------
metric = evaluate.load("glue", TASK)
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return metric.compute(predictions=preds, references=labels)

# ----------------------
# Utility: Reproducibility
# ----------------------
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

# ----------------------
# Utility: Parameter count
# ----------------------
def count_params(model):
    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total = sum(p.numel() for p in model.parameters())
    return trainable, total, 100 * trainable / total

# ----------------------
# Experiment runner
# ----------------------
def run_experiment(use_lora=False, r=8, alpha=16, lr=1e-3):
    results = []
    for seed in SEEDS:
        set_seed(seed)

        if use_lora:
            base = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)
            lora_cfg = LoraConfig(
                task_type=TaskType.SEQ_CLS,
                r=r, lora_alpha=alpha, lora_dropout=0.1,
                target_modules=["query", "value"], # Changed from ["q_proj", "v_proj"]
                bias="none"
            )
            model = get_peft_model(base, lora_cfg)
        else:
            model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)

        model.to(DEVICE)
        trainable, total, pct = count_params(model)
        print(f"{'LoRA' if use_lora else 'Full FT'} | Seed {seed}: {trainable}/{total} ({pct:.2f}%) trainable")

        args = TrainingArguments(
            output_dir=f"{SAVE_ROOT}/out_{'lora' if use_lora else 'full'}_{seed}",
            eval_strategy="epoch",
            save_strategy="epoch",
            num_train_epochs=EPOCHS,
            learning_rate=lr if use_lora else 2e-5,
            per_device_train_batch_size=32,
            per_device_eval_batch_size=64,
            weight_decay=0.01,
            warmup_ratio=0.06,
            logging_steps=50,
            load_best_model_at_end=True,
            report_to="none",
            fp16=torch.cuda.is_available()
        )

        trainer = Trainer(
            model=model,
            args=args,
            train_dataset=ds["train"],
            eval_dataset=ds["validation"],
            tokenizer=tokenizer,
            compute_metrics=compute_metrics
        )

        trainer.save_model(f"final_{'lora' if use_lora else 'full'}_{seed}")
        tokenizer.save_pretrained(f"final_{'lora' if use_lora else 'full'}_{seed}")

        start = time.time()
        trainer.train()
        elapsed = time.time() - start
        res = trainer.evaluate()
        # Change 'accuracy' to 'eval_accuracy'
        acc = res["eval_accuracy"]
        mem = torch.cuda.max_memory_allocated() / 1e9 if torch.cuda.is_available() else 0

        results.append({
            "mode": "LoRA" if use_lora else "Full",
            "seed": seed,
            "accuracy": acc,
            "trainable_params": trainable,
            "total_params": total,
            "pct_trainable": pct,
            "gpu_mem_gb": mem,
            "train_time_s": elapsed
        })

        torch.cuda.empty_cache()

    return results

# ----------------------
# Run both conditions
# ----------------------
full_results = run_experiment(use_lora=False)
lora_results = run_experiment(use_lora=True)

# ----------------------
# Save results
# ----------------------
fields = list(full_results[0].keys())
with open("results_lora_vs_full.csv", "w", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=fields)
    writer.writeheader()
    writer.writerows(full_results + lora_results)

print("✅ Results saved to results_lora_vs_full.csv")

Map:   0%|          | 0/67349 [00:00<?, ? examples/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1821 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LoRA | Seed 42: 887042/125534212 (0.71%) trainable


  trainer = Trainer(


Epoch,Training Loss,Validation Loss


KeyboardInterrupt: 

In [None]:
import pandas as pd, matplotlib.pyplot as plt
df = pd.read_csv("results_lora_vs_full.csv")
print(df.groupby("mode")[["accuracy","pct_trainable","train_time_s"]].mean())

plt.scatter(df["pct_trainable"], df["accuracy"])
for _,r in df.iterrows():
    plt.text(r["pct_trainable"], r["accuracy"], r["mode"], ha="left", fontsize=8)
plt.xlabel("% trainable params")
plt.ylabel("Accuracy")
plt.title("LoRA vs Full Fine-Tuning (SST-2)")
plt.show()


In [None]:
# from transformers import AutoModelForSequenceClassification, AutoTokenizer
# model = AutoModelForSequenceClassification.from_pretrained("final_lora_42")
# tokenizer = AutoTokenizer.from_pretrained("final_lora_42")

In [None]:
# from peft import PeftModel
# base = AutoModelForSequenceClassification.from_pretrained("roberta-base", num_labels=2)
# model = PeftModel.from_pretrained(base, "final_lora_42")

In [None]:
# TODO: Understand where the hyperparameters come from and how they differ from the papers