# Fine-Tuning of LLMs with Hugging Face

## Step 1: Installing and importing the libraries for Hugging Face

In [None]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

In [None]:
!pip install huggingface_hub



In [None]:
import os
import torch
from trl import SFTTrainer
from datasets import load_dataset
from peft import LoraConfig, PeftModel
from transformers import (AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, TrainingArguments, pipeline, logging)

## Step 2: Setting up links to Hugging Face datasets and models

In [None]:
model_identifier = "aboonaji/llama2finetune-v2"
source_dataset = "gamino/wiki_medical_terms"
formatted_dataset = "aboonaji/wiki_medical_terms_llam2_format"

## Step 3: Setting up all the QLoRA hyperparameters for fine-tuning

In [None]:
lora_hyper_r = 64
lora_hyper_alpha = 16
lora_hyper_dropout = 0.1

## Step 4: Setting up all the bitsandbytes hyperparameters for fine-tuning

In [None]:
enable_4bit = True
compute_dtype_bnb = "float16"
quant_type_bnb = "nf4"
double_quant_flag = False

## Step 5: Setting up all the training arguments hyperparameters for fine-tuning

In [None]:
results_dir = "./results"
epochs_count = 10
enable_fp16 = False
enable_bf16 = False
train_batch_size = 4
eval_batch_size = 4
accumulation_steps = 1
checkpointing_flag = True
grad_norm_limit = 0.3
train_learning_rate = 2e-4
decay_rate = 0.001
optimizer_type = "paged_adamw_32bit"
scheduler_type = "cosine"
steps_limit = 100
warmup_percentage = 0.03
length_grouping = True
checkpoint_interval = 0
log_interval = 25

## Step 6: Setting up all the supervised fine-tuning arguments hyperparameters for fine-tuning

In [None]:
enable_packing = False
sequence_length_max = None
device_assignment = {"": 0}

## Step 7: Loading the dataset

In [None]:
training_data = load_dataset(formatted_dataset, split = "train")

In [None]:
training_data

Dataset({
    features: ['text'],
    num_rows: 6861
})

## Step 8: Defining the QLoRA configuration

In [None]:
dtype_computation = getattr(torch, compute_dtype_bnb)
bnb_setup = BitsAndBytesConfig(load_in_4bit = enable_4bit,
                               bnb_4bit_quant_type = quant_type_bnb,
                               bnb_4bit_use_double_quant = double_quant_flag,
                               bnb_4bit_compute_dtype = dtype_computation)

## Step 9: Loading the pre-trained LLaMA 2 model

In [None]:
llama_model = AutoModelForCausalLM.from_pretrained(model_identifier, quantization_config = bnb_setup, device_map = device_assignment)
llama_model.config.use_case = False
llama_model.config.pretraining_tp = 1

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

## Step 10: Loading the pre-trained tokenizer for the LLaMA 2 model

In [None]:
llama_tokenizer = AutoTokenizer.from_pretrained(model_identifier, trust_remote_code = True)
llama_tokenizer.pad_token = llama_tokenizer.eos_token
llama_tokenizer.padding_side = "right"

## Step 11: Setting up the configuration for the LoRA fine-tuning method

In [None]:
peft_setup = LoraConfig(lora_alpha = lora_hyper_alpha,
                        lora_dropout = lora_hyper_dropout,
                        r = lora_hyper_r,
                        bias = "none",
                        task_type = "CAUSAL_LM")

## Step 12: Creating a training configuration by setting the training parameters

In [None]:
train_args = TrainingArguments(output_dir = results_dir,
                               num_train_epochs = epochs_count,
                               per_device_train_batch_size = train_batch_size,
                               per_device_eval_batch_size = eval_batch_size,
                               gradient_accumulation_steps = accumulation_steps,
                               learning_rate = train_learning_rate,
                               weight_decay = decay_rate,
                               optim = optimizer_type,
                               save_steps = checkpoint_interval,
                               logging_steps = log_interval,
                               fp16 = enable_fp16,
                               bf16 = enable_bf16,
                               max_grad_norm = grad_norm_limit,
                               max_steps = steps_limit,
                               warmup_ratio = warmup_percentage,
                               group_by_length = length_grouping,
                               lr_scheduler_type = scheduler_type,
                               gradient_checkpointing = checkpointing_flag)

## Step 13: Creating the Supervised Fine-Tuning Trainer

In [None]:
llama_sftt_trainer = SFTTrainer(model = llama_model,
                                args = train_args,
                                train_dataset = training_data,
                                tokenizer = llama_tokenizer,
                                peft_config = peft_setup,
                                dataset_text_field = "text",
                                max_seq_length = sequence_length_max,
                                packing = enable_packing)



Map:   0%|          | 0/6861 [00:00<?, ? examples/s]

## Step 14: Training the model

In [None]:
llama_sftt_trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss
25,1.7077
50,0.8495
75,1.3928
100,0.7597


TrainOutput(global_step=100, training_loss=1.1774291801452637, metrics={'train_runtime': 1045.3591, 'train_samples_per_second': 0.383, 'train_steps_per_second': 0.096, 'total_flos': 5978369907425280.0, 'train_loss': 1.1774291801452637, 'epoch': 0.06})

## Step 15: Chatting with the model

In [None]:
user_prompt = "Please tell me about Bursitis"
text_generation_pipe = pipeline(task = "text-generation", model = llama_model, tokenizer = llama_tokenizer, max_length = 300)
generation_result = text_generation_pipe(f"<s>[INST] {user_prompt} [/INST]")
print(generation_result[0]['generated_text'])

<s>[INST] Please tell me about Bursitis [/INST]  Bursitis is a condition where the bursae, small fluid-filled sacs that cushion and reduce friction between tendons, muscles, and bones, become inflamed. everybody has bursae, but they are more common in the joints. Bursitis is a common condition that can affect any joint in the body. It is usually caused by repetitive motion or injury to the affected joint.

Bursitis can be caused by a number of factors, including:

* Overuse or repetitive motion of a joint, such as from repetitive jumping or running.
* Injury to a joint, such as from a fall or a blow.
* Infection of the bursa, which can occur from bacteria or viruses.
* Rheumatoid arthritis or other autoimmune disorders.
* Crystal-induced arthritis, which is caused by the buildup of crystals in the joint.
* Gout, which is caused by the buildup of uric acid in the joint.

Symptoms of bursitis may include:

* Pain or tenderness in the affected joint.
* Swelling or redness in the affected 