In [None]:
%pip install -q -U trl numpy torch peft transformers  datasets bitsandbytes wandb

# Imports

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import wandb
from datasets import load_dataset
from scipy.special import softmax
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
from sklearn.metrics import accuracy_score, f1_score, log_loss, confusion_matrix
from transformers import set_seed, TrainingArguments, Trainer, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

# Load Quantized Model

In [None]:
# Hugging face login
token='YOUR-API-KEY'

#quantization configurations - so you quantize the model while inferencing
bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_qunat_type = "nf4",
    bnb_4bit_compute_dtype = torch.float16,
)

model_name = 'meta-llama/Meta-Llama-3-8B-Instruct'

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.bos_token

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=2, # Change according to your case, it is hate / non-hate in our work.
    quantization_config=bnb_config, 
    device_map="auto",
    trust_remote_code=True,
    token=token
    )
model.config.pad_token_id = tokenizer.pad_token_id

# Initial trainable parameters of our model.
def count_trainable_params(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


num_params = count_trainable_params(model)
formatted_num_params = "{:,}".format(num_params)
print(f"Number of trainable parameters: {formatted_num_params}")

In [None]:
#quantization configurations - so you quantize the model while inferencing
bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_qunat_type = "nf4",
    bnb_4bit_compute_dtype = torch.float16,
)

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

# Initial trainable parameters of our model.
def count_trainable_params(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


num_params = count_trainable_params(model)
formatted_num_params = "{:,}".format(num_params)
print(f"Number of trainable parameters: {formatted_num_params}")

## Quick Model Test

In [None]:
messages = [
    {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
    {"role": "user", "content": "Who are you?"},
]

input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    return_tensors="pt"
).to(model.device)

terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

outputs = model.generate(
    input_ids,
    max_new_tokens=256,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.6,
    top_p=0.9,
)
response = outputs[0][input_ids.shape[-1]:]
print(tokenizer.decode(response, skip_special_tokens=True))

## Loading Data

In [None]:
dataset = load_dataset('csv', data_files={'train': ['path-to-your-train-dataset'],
                                          'test':['path-to-your-test-dataset']})
def tokenize(examples):
        return tokenizer(examples["text"], truncation=True)

train_dataset = dataset['train'].map(tokenize, batched=True)
test_dataset = dataset['test'].map(tokenize, batched=True)
dataset

## Configure LoRA Training

In [None]:
'''
==========================================================================================
After extensive trainings and experience my suggestions are,
use only (k, q, v, o) projections or else it may overfitt,
For DoRA as paper suggests lower rank may be optimal my choice is (r=8, alpha=8 or 16)
If you increase rank (r=16) and alpha(16<) try dropout (0.2).
==========================================================================================
Lastly, it all boils down to your specific use case. 
Experiment with your own choices to grasp your own hyperparameters for your task.
==========================================================================================
'''

peft_config = LoraConfig(
        r=16,
        lora_alpha=64,
        lora_dropout=0.2,
        bias="none",
        task_type='SEQ_CLS',
        use_dora=True # Make this False if you wanna use only LoRA
        target_modules=[
    "up_proj",
    "o_proj",
    "v_proj",
    "gate_proj",
    "q_proj",
    "down_proj",
    "k_proj"
  ]
) 

In [None]:
# Change the parameters and hyper-parameters as per your use case.
epochs = 10
batch_size = 5
gradient_accumulation_steps = 4

# output dir 
model_version = "openchat_3.5_QLoRA"
model_dir = f"{model_version}"

training_args = TrainingArguments(
        run_name=model_version,
        logging_dir=f"{model_dir}/logs",
        output_dir=model_dir,
        logging_steps=100,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        gradient_accumulation_steps=gradient_accumulation_steps,
        evaluation_strategy="epoch",
        learning_rate=2e-5,
        num_train_epochs=epochs,
        lr_scheduler_type="constant",
        save_strategy="epoch",
        fp16=True,

    )
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)

# The parameters after appling LoRA
num_params = count_trainable_params(model)
formatted_num_params = "{:,}".format(num_params)
print(f"Number of trainable parameters: {formatted_num_params}")

## Run Training

In [None]:
# designing computing metrics as per our use case. (F1-Macro is essential and log-loss is optional)
def compute_metrics(p):
    predictions, labels = p.predictions, p.label_ids
    predictions = np.argmax(predictions, axis=1)
    accuracy = accuracy_score(labels, predictions)
    macro_f1 = f1_score(labels, predictions, average='macro')

    return {"accuracy": accuracy, "macro_f1": macro_f1}

# configure Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# Store progress and track with wandb
wandb.init(
project="HOLD-Final", # Name of the dir you wanted to store this run
name=model_version # Run name
)

# start training
trainer.train()

In [None]:
# save trained model
trainer.save_model(f'{model_dir}/model')