In [1]:
import pandas as pd
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from datasets import DatasetDict, Dataset, load_from_disk
from sklearn.model_selection import train_test_split
from transformers import BertForSequenceClassification, Trainer, TrainingArguments
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForSeq2SeqLM, GenerationConfig

from sklearn.metrics import accuracy_score

from sklearn.preprocessing import LabelEncoder
from transformers import BitsAndBytesConfig
from accelerate import Accelerator
from peft import prepare_model_for_kbit_training, LoraConfig, TaskType, get_peft_model
from transformers import TrainingArguments, AutoConfig, \
    AutoModelForSequenceClassification, AutoTokenizer, BitsAndBytesConfig, DataCollatorWithPadding
from peft import (
    PeftConfig,
    PeftModel,
)

In [2]:
#https://huggingface.co/docs/peft/quicktour
#https://huggingface.co/docs/peft/conceptual_guides/lora

In [3]:
path_to_retrieve = "../tokenized_dataset"


In [4]:
dataset_dict = load_from_disk(path_to_retrieve)

In [5]:
def print_trainable_parameters(model):
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param:.2f}"
    )

In [6]:
model_id = "bert-large-uncased"
num_labels=5

In [7]:
config = LoraConfig(
    r=32,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.SEQ_CLS
)


In [8]:

model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=5, device_map={"":0})

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
peft_config = LoraConfig(task_type=TaskType.SEQ_CLS, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1)


In [10]:
model = get_peft_model(model, peft_config)


In [11]:
model.print_trainable_parameters()


trainable params: 796,682 || all params: 335,938,570 || trainable%: 0.23715109580897484


In [13]:
def compute_metrics(p):
    logits, labels = p.predictions, p.label_ids
    preds = logits.argmax(axis=-1)
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc}

In [14]:
training_args = TrainingArguments(
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    output_dir='/results',
    num_train_epochs=1,
    evaluation_strategy="steps",
    save_steps=10,
    save_total_limit=2,
    remove_unused_columns=False,
    run_name='run_name',
    logging_dir='/logs',
    logging_steps=10,
    load_best_model_at_end=True,
)

In [15]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset_dict["train"],
    eval_dataset=dataset_dict["test"],
    compute_metrics=compute_metrics, 
)

In [16]:
trainer.train()


[34m[1mwandb[0m: Currently logged in as: [33mlukemonington3[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss,Validation Loss,Accuracy
10,1.6993,1.652429,0.18
20,1.6738,1.618239,0.19
30,1.6484,1.604853,0.21
40,1.6324,1.594725,0.21
50,1.6108,1.587926,0.23
60,1.6412,1.582966,0.23
70,1.5738,1.578993,0.235
80,1.6273,1.576158,0.265
90,1.6243,1.574304,0.29
100,1.5954,1.573542,0.3


TrainOutput(global_step=100, training_loss=1.6326755046844483, metrics={'train_runtime': 121.8774, 'train_samples_per_second': 6.564, 'train_steps_per_second': 0.82, 'total_flos': 747497978265600.0, 'train_loss': 1.6326755046844483, 'epoch': 1.0})