# BERT fine-tuning (LoRA)

## 0. Setting up ClearML

In [1]:
from clearml import Task

task = Task.init(
    project_name="ai_text_classification",
    task_name="bert_with_lora_ft",
    reuse_last_task_id=True,
    task_type=Task.TaskTypes.training #by default
)

ClearML Task: created new task id=177849254c27481dba78dad9f2bd0572
ClearML results page: https://app.clear.ml/projects/78810acc2d5d484cb6d259425be12de4/experiments/177849254c27481dba78dad9f2bd0572/output/log


Could not fetch GPU stats: RM has detected an NVML/RM version mismatch.


ClearML Monitor: GPU monitoring failed getting GPU reading, switching off GPU monitoring


In [None]:
params = {
    'lr': 2e-4,
    'batch_size': 8,
    'num_epochs': 10,
    'lora_r': 8,
    'lora_alpha': 16,
    'lora_dropout': .1,
}

params = task.connect(params)

In [None]:
task_logger = task.get_logger()

## 1. Setting env, loading dataset

In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
os.environ["CLEARML_LOG_MODEL"] = "True"

from datasets import Dataset, DatasetDict
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
)
import evaluate
import peft

import pickle
import warnings
warnings.filterwarnings(action='ignore')


CUDA initialization: Unexpected error from cudaGetDeviceCount(). Did you run some cuda functions before calling NumCudaDevices() that might have already set an error? Error 804: forward compatibility was attempted on non supported HW (Triggered internally at /pytorch/c10/cuda/CUDAFunctions.cpp:119.)



In [8]:
dataset = pd.read_csv("../dataset/Dataset.csv")

dataset.sample(10)

Unnamed: 0,Scenario,Requirement,Requirement Type,Author
375,Travel Planning and Guidance Application,Software should appeal to a wide audience by p...,Nonfunctional,ChatGpt
47,Block chain -based secure voting system,Safety measures that prevent fraud and double ...,Functional,Human
277,Pet Maintenance Application,The system should be scaled and can be easily ...,Nonfunctional,ChatGpt
225,Online Education Platform Development and Stud...,Training materials should be presented in diff...,Functional,ChatGpt
357,Virtual Speech Practical Platform Supporting L...,The system should be scaled and can be easily ...,Nonfunctional,ChatGpt
156,Chatbot Support Customer Service Platform,That data storage and processing processes are...,Nonfunctional,Human
267,Pet Maintenance Application,Vaccination tracking of pets and keeping the h...,Functional,ChatGpt
315,Social Donation and Help Collection Mobile App...,Software should appeal to a wide audience by p...,Nonfunctional,ChatGpt
110,Smart Agriculture Application that gives farme...,Compliance with high safety standards by maint...,Nonfunctional,Human
216,Energy efficiency improvement application for ...,User account security should be provided with ...,Nonfunctional,ChatGpt


## 2. Loading model, splitting dataset

In [9]:
from sklearn.model_selection import train_test_split

In [10]:
MODEL_NAME = 'google-bert/bert-base-uncased'

label2id = {'human': 0, 'chatgpt': 1}
id2label = {0: 'human', 1: 'chatgpt'}

In [11]:
tokenizer_bert = AutoTokenizer.from_pretrained(MODEL_NAME)
model_bert = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME, num_labels=2, id2label=id2label, label2id=label2id
)



Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

[1mBertForSequenceClassification LOAD REPORT[0m from: google-bert/bert-base-uncased
Key                                        | Status     | 
-------------------------------------------+------------+-
cls.seq_relationship.weight                | UNEXPECTED | 
cls.predictions.transform.LayerNorm.bias   | UNEXPECTED | 
cls.predictions.bias                       | UNEXPECTED | 
cls.predictions.transform.dense.weight     | UNEXPECTED | 
cls.predictions.transform.LayerNorm.weight | UNEXPECTED | 
cls.predictions.transform.dense.bias       | UNEXPECTED | 
cls.seq_relationship.bias                  | UNEXPECTED | 
classifier.bias                            | MISSING    | 
classifier.weight                          | MISSING    | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING[3m	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.[0m


In [12]:
train_df, test_df = train_test_split(dataset, test_size=.4, stratify=dataset['Author'], random_state=42)
test_df, val_df = train_test_split(test_df, test_size=.5, stratify=test_df['Author'], random_state=42)

train_df, test_df, val_df = [df.rename(columns={'Requirement': 'text', 'Author': 'labels'}) for df in [train_df, test_df, val_df]]

for df in [train_df, test_df, val_df]:
    df['labels'] = df['labels'].str.lower().map(label2id)

dataset_dict = DatasetDict({
    "train": Dataset.from_pandas(train_df[['text', 'labels']]),
    "test": Dataset.from_pandas(test_df[['text', 'labels']]),
    "validation": Dataset.from_pandas(val_df[['text', 'labels']]),
})

dataset_dict['train'][0]

{'text': 'User data should be backed regularly and stored safely',
 'labels': 1,
 '__index_level_0__': 218}

## 3. Tokenization, collating

In [13]:
from transformers import DataCollatorWithPadding

def preprocess_function(example):
    example['text'] = example['text'].lower()
    tokens = tokenizer_bert(
        example['text'],
        truncation=True,
        padding='max_length',
        max_length=128
    )
    return tokens

In [14]:
dataset_tokenized = dataset_dict.map(preprocess_function, batched=False)
collator = DataCollatorWithPadding(tokenizer=tokenizer_bert)

Map:   0%|          | 0/240 [00:00<?, ? examples/s]

Map:   0%|          | 0/80 [00:00<?, ? examples/s]

Map:   0%|          | 0/80 [00:00<?, ? examples/s]

## 4. Defining Trainer & metrics computation

In [19]:
accuracy = evaluate.load('accuracy')
roc_auc = evaluate.load('roc_auc')

In [20]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred

    # softmax action
    probs = np.exp(predictions) / np.exp(predictions).sum(-1, keepdims=True)
    positive_probs = probs[:, 1]
    auc = np.round(roc_auc.compute(prediction_scores=positive_probs, references=labels)['roc_auc'], 3)

    predicted_classes = np.argmax(predictions, axis=1)
    acc = np.round(accuracy.compute(predictions=predicted_classes, references=labels)['accuracy'], 3)

    return {"accuracy": acc, "roc_auc": auc}

In [17]:
lora_config = peft.LoraConfig(
    task_type=peft.TaskType.SEQ_CLS,
    r=params['lora_r'],
    lora_alpha=params['lora_alpha'],
    lora_dropout=params['lora_dropout'],
    target_modules=['query', 'value'],
    
)

model_lora = peft.get_peft_model(model_bert, lora_config)
model_lora.print_trainable_parameters()

trainable params: 296,450 || all params: 109,780,228 || trainable%: 0.2700


In [None]:
training_args = TrainingArguments(
    output_dir='../dataset/results',
    per_device_train_batch_size=params['batch_size'],
    per_device_eval_batch_size=params['batch_size'],
    learning_rate=params['lr'],
    num_train_epochs=params['num_epochs'],
    logging_strategy='epoch',
    eval_strategy='epoch',
    save_strategy='epoch',
    load_best_model_at_end=True,
    fp16=True,
)

trainer = Trainer(
    args=training_args,
    model=model_lora,
    train_dataset=dataset_tokenized['train'],
    eval_dataset=dataset_tokenized['test'],
    data_collator=collator,
    compute_metrics=compute_metrics,
)

trainer.train()

## 5. Plotting results

In [None]:
log_history = trainer.state.log_history
train_logs = [x for x in log_history if 'loss' in x and 'eval_loss' not in x]
eval_logs  = [x for x in log_history if 'eval_loss' in x]

train_df = pd.DataFrame(train_logs)[['epoch', 'loss']].rename(columns={'loss': 'train_loss'})
eval_df  = pd.DataFrame(eval_logs)[['epoch', 'eval_loss', 'eval_accuracy', 'eval_roc_auc']]

results_df = pd.merge(train_df, eval_df, on='epoch')
results_df['epoch'] = results_df['epoch'].astype(int)
results_df

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
fig.suptitle('BERT fine-tuning results (LoRA)')

axes[0].plot(results_df['epoch'], results_df['train_loss'], marker='o', label='Train Loss')
axes[0].plot(results_df['epoch'], results_df['eval_loss'],  marker='o', label='Val Loss')
axes[0].set_title("Loss")
axes[0].set_xlabel("Epoch")
axes[0].set_ylabel("Loss")
axes[0].legend()

axes[1].plot(results_df['epoch'], results_df['eval_accuracy'], marker='o', color='green')
axes[1].set_title("Validation Accuracy")
axes[1].set_xlabel("Epoch")
axes[1].set_ylabel("Accuracy")
axes[1].set_ylim(0.9, 1.0)

axes[2].plot(results_df['epoch'], results_df['eval_roc_auc'], marker='o', color='orange')
axes[2].set_title("Validation ROC-AUC")
axes[2].set_xlabel("Epoch")
axes[2].set_ylabel("ROC-AUC")
axes[2].set_ylim(0.9, 1.0)

plt.tight_layout()
plt.show()

## 6. Saving model