## code to fine tune deberta-v3-base modal on glue dataset

In [None]:
# !pip install -q transformers datasets evaluate accelerate

import evaluate
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer,
)
import numpy as np

MODEL_NAME = "microsoft/deberta-v3-base"
MAX_LENGTH = 128
BATCH_SIZE = 16
LR = 5e-5
EPOCHS = 1
WEIGHT_DECAY = 0.01
OUTPUT_BASE = "deberta-v3-glue"

TASK_TO_KEYS = {
    "cola": ("sentence", None),
    "sst2": ("sentence", None),
    "mrpc": ("sentence1", "sentence2"),
    "qqp":  ("question1", "question2"),
    "stsb": ("sentence1", "sentence2"),
    "mnli": ("premise", "hypothesis"),
    "qnli": ("question", "sentence"),
    "rte":  ("sentence1", "sentence2"),
    "wnli": ("sentence1", "sentence2"),
}

def make_training_args(**base_kwargs):
    try:
        return TrainingArguments(eval_strategy="epoch", **base_kwargs)
    except TypeError:
        return TrainingArguments(evaluation_strategy="epoch", **base_kwargs)

def build_compute_metrics(task_name):
    glue_metric = evaluate.load("glue", task_name)
    acc_metric = evaluate.load("accuracy")  # always compute accuracy
    is_reg = (task_name == "stsb")

    def compute_metrics(eval_pred):
        preds, labels = eval_pred
        if is_reg:
            preds = preds.squeeze()
            pearson_spearman = glue_metric.compute(predictions=preds, references=labels)
            return {
                "pearson": pearson_spearman["pearson"],
                "spearmanr": pearson_spearman["spearmanr"],
            }
        else:
            preds = preds.argmax(axis=-1)
            glue_results = glue_metric.compute(predictions=preds, references=labels)
            acc = acc_metric.compute(predictions=preds, references=labels)["accuracy"]
            glue_results["accuracy"] = acc
            return glue_results
    return compute_metrics

def make_preprocess(tokenizer, s1, s2, is_reg):
    def fn(batch):
        if s2 is None:
            enc = tokenizer(batch[s1], truncation=True, max_length=MAX_LENGTH)
        else:
            enc = tokenizer(batch[s1], batch[s2], truncation=True, max_length=MAX_LENGTH)
        enc["labels"] = [float(x) for x in batch["label"]] if is_reg else batch["label"]
        return enc
    return fn

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
data_collator = DataCollatorWithPadding(tokenizer)

for task, (s1, s2) in TASK_TO_KEYS.items():
    print(f"\n================ GLUE Task: {task.upper()} ================\n")

    ds = load_dataset("glue", task)
    is_reg = (task == "stsb")
    num_labels = 1 if is_reg else len(ds["train"].features["label"].names)

    model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=num_labels)

    preprocess = make_preprocess(tokenizer, s1, s2, is_reg)
    encoded = ds.map(preprocess, batched=True, remove_columns=ds["train"].column_names)

    args = make_training_args(
        output_dir=f"{OUTPUT_BASE}-{task}",
        save_strategy="epoch",
        learning_rate=LR,
        per_device_train_batch_size=BATCH_SIZE,
        per_device_eval_batch_size=BATCH_SIZE,
        num_train_epochs=EPOCHS,
        weight_decay=WEIGHT_DECAY,
        logging_dir=f"{OUTPUT_BASE}-{task}/logs",
        report_to="none",
        load_best_model_at_end=False,
        logging_steps=50,
    )

    compute_metrics = build_compute_metrics(task)

    if task == "mnli":
        eval_main = encoded["validation_matched"]
        extra = [("validation_mismatched", encoded["validation_mismatched"])]
    else:
        eval_main = encoded["validation"]
        extra = []

    trainer = Trainer(
        model=model,
        args=args,
        #train_dataset=encoded["train"],
        #eval_dataset=eval_main,
        train_dataset=encoded["train"].shuffle(seed=42).select(range(1000)),
        eval_dataset=eval_main.select(range(200)),
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )

    trainer.train()
    print(f"\n--- {task.upper()} | Eval on validation ---")
    print(trainer.evaluate(eval_dataset=eval_main))

    for name, split in extra:
        print(f"\n--- {task.upper()} | Eval on {name} ---")
        print(trainer.evaluate(eval_dataset=split))

print("\nâœ… Finished fine-tuning DeBERTa-v3-base on all 9 GLUE tasks (accuracy included).")
