In [None]:
!pip install -U bitsandbytes transformers peft datasets accelerate
!pip install -U bitsandbytes
!pip install --upgrade datasets fsspec
import os
import torch
import numpy as np
import warnings
from sklearn.exceptions import UndefinedMetricWarning
warnings.filterwarnings("ignore", category = UndefinedMetricWarning)
warnings.filterwarnings("ignore", message = "MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization")
warnings.filterwarnings("ignore", message="None of the inputs have requires_grad=True.*", category=UserWarning)
warnings.filterwarnings("ignore", category = FutureWarning)
from datasets import load_dataset
from transformers import (
    GPT2ForSequenceClassification,
    GPT2TokenizerFast,
    Trainer,
    TrainingArguments,
    BitsAndBytesConfig,
    DataCollatorWithPadding
)
from peft import get_peft_model, LoraConfig, TaskType
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
os.environ["WANDB_DISABLED"] = "true"
HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN", "") # Insert your Hugging Face token with 'Write' access
MODEL_ID = "gpt2-medium"
TASK_SIZES = {"sst2": 3500, "qnli": 5500}
def get_tokenizer():
    tok = GPT2TokenizerFast.from_pretrained(MODEL_ID, token = HF_TOKEN)
    tok.pad_token = tok.eos_token
    tok.padding_side = "left"
    return tok
def load_model(quant_bits, tokenizer):
    compute_dtype = torch.bfloat16
    model_kwargs = {
        "token": HF_TOKEN,
        "device_map": "auto",
        "torch_dtype": compute_dtype
    }
    if quant_bits in (4, 8):
        model_kwargs["quantization_config"] = BitsAndBytesConfig(
            load_in_4bit=(quant_bits == 4),
            load_in_8bit=(quant_bits == 8),
            bnb_4bit_compute_dtype=compute_dtype,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4"
        )
    model = GPT2ForSequenceClassification.from_pretrained(MODEL_ID, **model_kwargs)
    model.config.pad_token_id = tokenizer.pad_token_id
    return model

def apply_lora(model):
    config = LoraConfig(
        r = 32,
        lora_alpha = 16,
        target_modules = None,
        lora_dropout = 0.05,
        bias = "none",
        task_type = TaskType.SEQ_CLS,
        inference_mode = False
    )
    return get_peft_model(model, config)

def prepare_raw_dataset(task, n):
    ds = load_dataset("glue", task, split=f"train[:{n}]")
    def format_ex(example):
        if task == "sst2":
            text = example["sentence"]
        else:
            text = f"Question: {example['question']} Sentence: {example['sentence']}"
        return {"text": text, "label": example["label"]}
    return ds.map(format_ex, remove_columns = ds.column_names)

def tokenize_and_add_labels(example):
    out = tokenizer(example['text'], truncation = True, max_length = 128, padding = "max_length", add_special_tokens = True)
    out["labels"] = example["label"]
    return out

def compute_metrics(pred):
    preds = np.argmax(pred.predictions, axis = 1)
    return {
        "accuracy": accuracy_score(pred.label_ids, preds),
        "precision": precision_score(pred.label_ids, preds, average = "binary"),
        "recall": recall_score(pred.label_ids, preds, average = "binary"),
        "f1": f1_score(pred.label_ids, preds, average = "binary"),
    }

def run_pipeline(bits):
    print(f"\n=== {bits}-bit QLoRA ===")
    global tokenizer
    tokenizer = get_tokenizer()
    print("Tokenization test:", tokenizer("Test input", return_tensors="pt"))
    model = load_model(bits, tokenizer)
    model = apply_lora(model)
    print(f"Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")
    for task, n in TASK_SIZES.items():
        print(f"\n--- Task: {task} ---")
        raw = prepare_raw_dataset(task, n)
        print("Sample:", raw[0]['text'][:50] + "...")
        print("Label distribution:", np.bincount(raw['label']))
        tok_ds = raw.map(tokenize_and_add_labels, batched=True)
        args = TrainingArguments(
            output_dir=f"./tmp/{task}_{bits}",
            per_device_train_batch_size = 8,
            per_device_eval_batch_size = 16,
            num_train_epochs = 4,
            learning_rate = 3e-4,
            logging_steps = 50,
            eval_strategy = "epoch",
            save_strategy = "epoch",
            report_to = "none",
            fp16 = not torch.cuda.is_bf16_supported(),
            bf16 = torch.cuda.is_bf16_supported(),
            gradient_checkpointing = True
        )
        trainer = Trainer(
            model = model,
            args = args,
            train_dataset = tok_ds,
            eval_dataset = tok_ds,
            tokenizer = tokenizer,
            data_collator = DataCollatorWithPadding(tokenizer),
            compute_metrics = compute_metrics
        )
        trainer.train()
        print("Evaluation results:", trainer.evaluate())
run_pipeline(16)
run_pipeline(8)
run_pipeline(4)