In [None]:
! pip install mlflow

In [None]:
import gc
import os

import torch

from datasets import load_dataset

from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
)
from trl import ORPOConfig, ORPOTrainer, setup_chat_format

In [2]:
# Model
base_model = "NousResearch/Meta-Llama-3-8B"
new_model = "OrpoLlama-3-8B"

torch_dtype = torch.float16

# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model,
                                         cache_dir="/mnt/artifacts/llama3-tokenizer-cache/")

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    cache_dir=f"/mnt/artifacts/llama3-model-cache/"
    # attn_implementation=attn_implementation
)
model, tokenizer = setup_chat_format(model, tokenizer)
model = prepare_model_for_kbit_training(model)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Downloading shards: 100%|██████████| 4/4 [01:39<00:00, 24.86s/it]
Loading checkpoint shards: 100%|██████████| 4/4 [00:09<00:00,  2.29s/it]


In [3]:
dataset_name = "mlabonne/orpo-dpo-mix-40k"
dataset = load_dataset(dataset_name, split="all")
dataset = dataset.shuffle(seed=42).select(range(1000))

def format_chat_template(row):
    row["chosen"] = tokenizer.apply_chat_template(row["chosen"], tokenize=False)
    row["rejected"] = tokenizer.apply_chat_template(row["rejected"], tokenize=False)
    return row

dataset = dataset.map(
    format_chat_template,
    num_proc= os.cpu_count(),
)
dataset = dataset.train_test_split(test_size=0.01)


Downloading readme: 100%|██████████| 2.92k/2.92k [00:00<00:00, 24.9MB/s]
Downloading data: 100%|██████████| 115M/115M [00:00<00:00, 157MB/s]  
Generating train split: 100%|██████████| 44245/44245 [00:00<00:00, 79088.74 examples/s]
Map (num_proc=8): 100%|██████████| 1000/1000 [00:00<00:00, 3601.67 examples/s]


In [5]:
os.environ["MLFLOW_EXPERIMENT_NAME"] = "llama3_orpo"
# os.environ["MLFLOW_RUN_ID"] = "orpo_dpo_mix_40k_1"


orpo_args = ORPOConfig(
    learning_rate=8e-6,
    beta=0.1,
    lr_scheduler_type="linear",
    max_length=1024,
    max_prompt_length=512,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=4,
    optim="paged_adamw_8bit",
    num_train_epochs=1,
    evaluation_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    report_to="mlflow",
    output_dir="/mnt/artifacts/results/",
)

trainer = ORPOTrainer(
    model=model,
    args=orpo_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    peft_config=peft_config,
    tokenizer=tokenizer,
)
trainer.train()
trainer.save_model(f"/mnt/artifacts/llama3_sft/{new_model}")


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Runtime,Samples Per Second,Steps Per Second,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/rejected,Logps/chosen,Logits/rejected,Logits/chosen,Nll Loss,Log Odds Ratio,Log Odds Chosen
25,3.2139,3.384099,9.6927,1.032,0.516,-0.389522,-0.383038,0.3,-0.006484,-3.830378,-3.895217,-2.058413,-2.042024,3.29496,-0.891384,-0.074043
50,2.4823,2.733676,9.6918,1.032,0.516,-0.295852,-0.300485,0.3,0.004634,-3.004853,-2.958516,-2.071503,-2.052168,2.652839,-0.808365,0.031752
75,2.2862,2.373255,9.6962,1.031,0.516,-0.245485,-0.254076,0.5,0.008591,-2.54076,-2.454846,-1.935581,-1.889193,2.295473,-0.777829,0.06898
100,2.3516,2.210991,9.6933,1.032,0.516,-0.225141,-0.236281,0.4,0.01114,-2.362808,-2.25141,-1.886242,-1.803448,2.13542,-0.755712,0.095553




In [9]:
# Flush memory
del trainer, model
gc.collect()
torch.cuda.empty_cache()

# Reload tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(base_model,
                                          cache_dir="/mnt/artifacts/llama3-tokenizer-cache/"
                                         )
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map="auto",
    cache_dir="/mnt/artifacts/llama3-tokenizer-cache/"
)
model, tokenizer = setup_chat_format(model, tokenizer)

# Merge adapter with base model
model = PeftModel.from_pretrained(model, f"/mnt/artifacts/llama3_sft/{new_model}")
model = model.merge_and_unload()
model.save_pretrained(f"/mnt/artifacts/llama3_sft/merged/{new_model}")


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 4/4 [00:12<00:00,  3.13s/it]
