In [2]:
!pip install -qU accelerate peft bitsandbytes transformers trl huggingface_hub scipy tensorboard datasets matplotlib sacrebleu evaluate

In [4]:
from datasets import load_dataset

ds = load_dataset("ag_news")

README.md:   0%|          | 0.00/8.07k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/18.6M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/1.23M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/120000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/7600 [00:00<?, ? examples/s]

In [4]:
ds

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 120000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 7600
    })
})

In [9]:
import peft
from transformers import TrainingArguments
from peft import LoraConfig, PeftModel,get_peft_model
from trl import SFTTrainer

In [5]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding
)
from datasets import DatasetDict, Dataset
import evaluate
import numpy as np

In [6]:
tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-large")

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [7]:
def preprocess(examples):
    tokenized = tokenizer(examples['text'], truncation=True, padding=True)
    return tokenized
tokenized_dataset = ds.map(preprocess, batched=True,  remove_columns=["text"])
train_dataset=tokenized_dataset['train']
eval_dataset=tokenized_dataset['test'].shard(num_shards=2, index=0)
test_dataset=tokenized_dataset['test'].shard(num_shards=2, index=1)



num_labels = ds['train'].features['label'].num_classes
class_names = ds["train"].features["label"].names
print(f"number of labels: {num_labels}")
print(f"the labels: {class_names}")

id2label = {i: label for i, label in enumerate(class_names)}

data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="pt")

Map:   0%|          | 0/120000 [00:00<?, ? examples/s]

Map:   0%|          | 0/7600 [00:00<?, ? examples/s]

number of labels: 4
the labels: ['World', 'Sports', 'Business', 'Sci/Tech']


In [17]:
model = AutoModelForSequenceClassification.from_pretrained("FacebookAI/roberta-large", id2label=id2label)

peft_config = LoraConfig(task_type="SEQ_CLS", inference_mode=False, r=8, lora_alpha=16, lora_dropout=0.1)
peft_model = get_peft_model(model, peft_config)

print('PEFT Model')
peft_model.print_trainable_parameters()


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


PEFT Model
trainable params: 1,840,132 || all params: 357,203,976 || trainable%: 0.5151


In [13]:
import numpy as np
import evaluate
from sklearn.metrics import f1_score, precision_score, recall_score

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)


    accuracy = evaluate.load("accuracy")


    accuracy_result = accuracy.compute(predictions=predictions, references=labels)

    f1_result = f1_score(labels, predictions, average="weighted")
    precision_result = precision_score(labels, predictions, average="weighted")
    recall_result = recall_score(labels, predictions, average="weighted")

    results = {
        "accuracy": accuracy_result["accuracy"],
        "f1": f1_result,
        "precision": precision_result,
        "recall": recall_result,
    }

    return results

In [8]:
num_labels=len(set(ds["train"]["label"]))

In [9]:
num_labels

20

In [11]:
tokenized_dataset

DatasetDict({
    train: Dataset({
        features: ['label', 'input_ids', 'attention_mask'],
        num_rows: 11314
    })
    test: Dataset({
        features: ['label', 'input_ids', 'attention_mask'],
        num_rows: 7532
    })
})

In [18]:
training_args=TrainingArguments(
    output_dir="./roberta_classifier",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=1,
    save_steps=1000,
    eval_steps=1000,
    weight_decay=0.01,
    eval_strategy="steps",
    # save_strategy="steps",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    push_to_hub=False,
    logging_dir="./logs",
    logging_steps=100,
    report_to="tensorboard"
)

In [19]:
trainer=Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    data_collator=DataCollatorWithPadding(tokenizer)
)

In [20]:
trainer.train()

Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1000,0.2674,0.321892,0.901842,0.901505,0.901446,0.901842
2000,0.2282,0.303268,0.908684,0.90827,0.908335,0.908684
3000,0.2985,0.268969,0.910263,0.910052,0.910241,0.910263
4000,0.2414,0.265981,0.911316,0.910969,0.910806,0.911316
5000,0.2364,0.259376,0.914474,0.914257,0.914906,0.914474
6000,0.2319,0.254145,0.917632,0.917308,0.917342,0.917632
7000,0.1904,0.248881,0.916053,0.915737,0.915774,0.916053


Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

TrainOutput(global_step=7500, training_loss=0.2757459762573242, metrics={'train_runtime': 9912.6266, 'train_samples_per_second': 12.106, 'train_steps_per_second': 0.757, 'total_flos': 7.2301650394603e+16, 'train_loss': 0.2757459762573242, 'epoch': 1.0})

In [25]:
trainer.save_model("./trained_model")

In [33]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [34]:
peft_model.push_to_hub("peft_roberta")
tokenizer.push_to_hub("peft_roberta")

adapter_model.safetensors:   0%|          | 0.00/7.37M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Ank110/peft_roberta/commit/5ed6921a0ddb9245d656e86ebaa4134143eb7b72', commit_message='Upload tokenizer', commit_description='', oid='5ed6921a0ddb9245d656e86ebaa4134143eb7b72', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Ank110/peft_roberta', endpoint='https://huggingface.co', repo_type='model', repo_id='Ank110/peft_roberta'), pr_revision=None, pr_num=None)

In [23]:
from torch.utils.data import DataLoader
import evaluate
from tqdm import tqdm

metric = evaluate.load('accuracy')

def evaluate_model(inference_model, dataset):

    eval_dataloader = DataLoader(dataset.rename_column("label", "labels"), batch_size=8, collate_fn=data_collator)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    inference_model.to(device)
    inference_model.eval()
    for step, batch in enumerate(tqdm(eval_dataloader)):
        batch.to(device)
        with torch.no_grad():
            outputs = inference_model(**batch)
        predictions = outputs.logits.argmax(dim=-1)
        predictions, references = predictions, batch["labels"]
        metric.add_batch(
            predictions=predictions,
            references=references,
        )

    eval_metric = metric.compute()
    print(eval_metric)

In [24]:
evaluate_model(peft_model, test_dataset)

100%|██████████| 475/475 [01:23<00:00,  5.66it/s]

{'accuracy': 0.9315789473684211}



