# Fine-tune Llama 3 with ORPO

In [1]:
#ORPO is a fine-tuning technique that combines the supervised fine-tuning and preference alignment stages into a single process. 
#Reduces the computational resources and time required for training. 
#Results demonstrate that ORPO outperforms other alignment methods on various model sizes and benchmarks.
#In this we will use Llama3 8B model to finetune
#Here I am using a A100 GPU to Finetune

## Install Libraries and Packages

In [2]:
%pip install --quiet --upgrade \
    pip \
    python-dotenv \
    datasets \
    accelerate \
    peft \
    bitsandbytes \
    transformers \
    trl \
    sentencepiece

Note: you may need to restart the kernel to use updated packages.


## Import Necessary Packages

In [3]:
import torch

from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline,
    TrainingArguments,
    logging,
)

from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training

from trl import ORPOConfig, ORPOTrainer, setup_chat_format

  from .autonotebook import tqdm as notebook_tqdm


## Setup Model and Configurations for finetuning

In [4]:
#default configs
attn_implementation = "eager"
torch_dtype = torch.float16

In [5]:
# Model Name 
base_model = "meta-llama/Meta-Llama-3-8B"

In [6]:
# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

In [7]:
# For loading model get access from the hugging face and load the token
import os
from dotenv import load_dotenv
# Load environment variables from the .env file
load_dotenv(override=True)
hf_access_token = os.environ["hf_access_token"]

from huggingface_hub import login
login(token=hf_access_token)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /home/ubuntu/.cache/huggingface/token
Login successful


In [8]:
# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation,
)

Loading checkpoint shards: 100%|██████████| 4/4 [00:04<00:00,  1.02s/it]


In [9]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model)

In [10]:
#Find target modules to finetune
def find_target_modules(model):
    # Initialize a Set to Store Unique Layers
    unique_layers = set()
    
    # Iterate Over All Named Modules in the Model
    for name, module in model.named_modules():
        # Check if the Module Type Contains 'Linear4bit'
        if "Linear4bit" in str(type(module)):
            # Extract the Type of the Layer
            layer_type = name.split('.')[-1]
            
            # Add the Layer Type to the Set of Unique Layers
            unique_layers.add(layer_type)

    # Return the Set of Unique Layers Converted to a List
    return list(unique_layers)

#Identify target modules to finetune
find_target_modules(model)

['gate_proj', 'q_proj', 'v_proj', 'down_proj', 'k_proj', 'o_proj', 'up_proj']

In [11]:
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)

In [12]:
#Prepare model for finetuning
model, tokenizer = setup_chat_format(model, tokenizer)
model = prepare_model_for_kbit_training(model)

## Prepare and analyze the data for finetune

In [13]:
# Load Dataset
dataset_name = "mlabonne/orpo-dpo-mix-40k"
dataset = load_dataset(dataset_name, split="all")
dataset

Dataset({
    features: ['source', 'chosen', 'rejected', 'prompt', 'question'],
    num_rows: 44245
})

In [14]:
#Filter the dataset to 1000 examples
dataset = dataset.shuffle(seed=42).select(range(100))
dataset

Dataset({
    features: ['source', 'chosen', 'rejected', 'prompt', 'question'],
    num_rows: 100
})

In [15]:
#setup chat template
def format_chat_template(row):
    row["chosen"] = tokenizer.apply_chat_template(row["chosen"], tokenize=False)
    row["rejected"] = tokenizer.apply_chat_template(row["rejected"], tokenize=False)
    return row

In [16]:
#Map the chat tempalte to the dataset
dataset = dataset.map(
    format_chat_template,
    num_proc= os.cpu_count(),
)

In [17]:
#Split the data for train and test
dataset = dataset.train_test_split(test_size=0.10)
dataset

DatasetDict({
    train: Dataset({
        features: ['source', 'chosen', 'rejected', 'prompt', 'question'],
        num_rows: 90
    })
    test: Dataset({
        features: ['source', 'chosen', 'rejected', 'prompt', 'question'],
        num_rows: 10
    })
})

## Setup Config for ORPO and Finetune the model

In [18]:
#Setup ORPO Configuration
orpo_args = ORPOConfig(
    learning_rate=8e-6,
    beta=0.1,
    lr_scheduler_type="linear",
    max_length=1024,
    max_prompt_length=512,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=2,
    optim="paged_adamw_8bit",
    num_train_epochs=1,
    evaluation_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    save_steps=5,
    output_dir="./llama-results",
)



In [19]:
#Setup ORPO Trainer
trainer = ORPOTrainer(
    model=model,
    args=orpo_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    peft_config=peft_config,
    tokenizer=tokenizer,
)

Map: 100%|██████████| 90/90 [00:00<00:00, 323.77 examples/s]
Map: 100%|██████████| 10/10 [00:00<00:00, 203.89 examples/s]


In [20]:
#Train the model
trainer.train()

Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Runtime,Samples Per Second,Steps Per Second,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/rejected,Logps/chosen,Logits/rejected,Logits/chosen,Nll Loss,Log Odds Ratio,Log Odds Chosen
5,4.4611,3.657716,14.8464,0.674,0.337,-0.324876,-0.31916,0.5,-0.005715,-3.191603,-3.248756,-1.423845,-1.663832,3.580327,-0.773894,-0.063276
10,4.1267,3.555529,14.9106,0.671,0.335,-0.315476,-0.310061,0.4,-0.005416,-3.100605,-3.154764,-1.509871,-1.735576,3.478447,-0.770818,-0.060678
15,2.6494,3.425721,14.9026,0.671,0.336,-0.30361,-0.29885,0.4,-0.00476,-2.988505,-3.036102,-1.622714,-1.826672,3.349164,-0.765576,-0.054431
20,3.5484,3.358943,14.8756,0.672,0.336,-0.297227,-0.292766,0.4,-0.004461,-2.927659,-2.972268,-1.658632,-1.854209,3.282602,-0.763417,-0.051575




TrainOutput(global_step=22, training_loss=3.687498298558322, metrics={'train_runtime': 495.4333, 'train_samples_per_second': 0.182, 'train_steps_per_second': 0.044, 'total_flos': 0.0, 'train_loss': 3.687498298558322, 'epoch': 0.9777777777777777})

In [21]:
#Save the model
new_model_name = "llama3_orpo_finetuned"
trainer.save_model(new_model_name)



## Infernce the Model

In [22]:
#Flush the memory
# Flush memory
import gc
del trainer, model
gc.collect()
torch.cuda.empty_cache()

In [23]:
# Reload tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(base_model, token=hf_access_token)

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map="auto",
    token=hf_access_token,
)

model, tokenizer = setup_chat_format(model, tokenizer)

Loading checkpoint shards: 100%|██████████| 4/4 [00:04<00:00,  1.16s/it]


In [25]:
# Merge adapter with base model
model = PeftModel.from_pretrained(model, new_model_name)
model = model.merge_and_unload()