In [None]:
%pip install -U datasets
%pip install evaluate bitsandbytes

In [None]:
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments
)
from datasets import load_dataset
import evaluate
import numpy as np
from peft import LoraConfig, get_peft_model, TaskType


In [None]:
dataset = load_dataset("ag_news")

model_name = "microsoft/deberta-v3-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
num_labels = len(dataset["train"].features["label"].names)

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=num_labels,
    device_map="auto"
)

#The device_map argument tells Hugging Face where (and how) to place the model’s layers and tensors across your available hardware.

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["query_proj", "key_proj", "value_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_CLS,
)
model = get_peft_model(model, lora_config)


def preprocess(ex):
    return tokenizer(
        ex["text"],
        truncation=True,
        padding="max_length",
        max_length=128
    )


encoded_train = dataset["train"].map(preprocess, batched=True)
encoded_test = dataset["test"].map(preprocess, batched=True)

encoded_train = encoded_train.rename_column("label", "labels")
encoded_test = encoded_test.rename_column("label", "labels")
encoded_train.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
encoded_test.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

metric = evaluate.load("accuracy")
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return metric.compute(predictions=preds, references=labels)

training_args = TrainingArguments(
    output_dir="./deberta-ag-news-lora",
    eval_strategy="steps",
    eval_steps=500,
    logging_steps=100,
    save_steps=1000,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    num_train_epochs=3,
    learning_rate=2e-5,
    weight_decay=0.01,
    report_to="none",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_train,
    eval_dataset=encoded_test,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train()
print(trainer.evaluate())

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/120000 [00:00<?, ? examples/s]

Map:   0%|          | 0/7600 [00:00<?, ? examples/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss,Validation Loss,Accuracy
500,1.3679,1.356997,0.397368
1000,0.6891,0.674039,0.696842
1500,0.6345,0.595483,0.701842
2000,0.5336,0.575323,0.704342
2500,0.563,0.557439,0.783158
3000,0.5816,0.540642,0.838947
3500,0.5604,0.53152,0.853816
4000,0.4567,0.426971,0.870132
4500,0.3611,0.376831,0.881447
5000,0.3299,0.36033,0.893553


{'eval_loss': 0.27855920791625977, 'eval_accuracy': 0.9140789473684211, 'eval_runtime': 11.2245, 'eval_samples_per_second': 677.091, 'eval_steps_per_second': 21.204, 'epoch': 3.0}
