In [None]:
! pip install mlflow

In [1]:
import gc
import os

import torch

from datasets import load_dataset

from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
)
from trl import ORPOConfig, ORPOTrainer, setup_chat_format

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Model
base_model = "NousResearch/Meta-Llama-3-8B"
new_model = "OrpoLlama-3-8B"

torch_dtype = torch.float16

# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model,
                                         cache_dir="/mnt/artifacts/llama3-tokenizer-cache/")

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    cache_dir=f"/mnt/artifacts/llama3-model-cache/"
    # attn_implementation=attn_implementation
)
model, tokenizer = setup_chat_format(model, tokenizer)
model = prepare_model_for_kbit_training(model)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 4/4 [00:09<00:00,  2.33s/it]


In [3]:
dataset_name = "mlabonne/orpo-dpo-mix-40k"
dataset = load_dataset(dataset_name, split="all")
dataset = dataset.shuffle(seed=42).select(range(1000))

def format_chat_template(row):
    row["chosen"] = tokenizer.apply_chat_template(row["chosen"], tokenize=False)
    row["rejected"] = tokenizer.apply_chat_template(row["rejected"], tokenize=False)
    return row

dataset = dataset.map(
    format_chat_template,
    num_proc= os.cpu_count(),
)
dataset = dataset.train_test_split(test_size=0.01)


In [None]:
orpo_args = ORPOConfig(
    learning_rate=8e-6,
    beta=0.1,
    lr_scheduler_type="linear",
    max_length=1024,
    max_prompt_length=512,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=4,
    optim="paged_adamw_8bit",
    num_train_epochs=1,
    evaluation_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    report_to="mlflow",
    output_dir="/mnt/artifacts/results/",
)

trainer = ORPOTrainer(
    model=model,
    args=orpo_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    peft_config=peft_config,
    tokenizer=tokenizer,
)
trainer.train()
trainer.save_model(f"/mnt/artifacts/llama3_sft/{new_model}")


Map: 100%|██████████| 990/990 [00:03<00:00, 260.23 examples/s]
Map: 100%|██████████| 10/10 [00:00<00:00, 250.32 examples/s]
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Runtime,Samples Per Second,Steps Per Second,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/rejected,Logps/chosen,Logits/rejected,Logits/chosen,Nll Loss,Log Odds Ratio,Log Odds Chosen
25,1.5957,1.561999,10.1324,0.987,0.493,-0.145552,-0.190834,0.6,0.045282,-1.90834,-1.455521,-1.567665,-1.325927,1.500058,-0.619404,0.448783
50,1.2809,1.168232,10.1328,0.987,0.493,-0.105035,-0.152447,0.7,0.047413,-1.524475,-1.050346,-1.649109,-1.289403,1.109664,-0.585684,0.508478
75,1.2137,1.080717,10.1389,0.986,0.493,-0.094836,-0.139743,0.7,0.044907,-1.397427,-0.948361,-1.754547,-1.387708,1.021519,-0.591975,0.49088
100,1.2523,1.026803,10.1337,0.987,0.493,-0.088153,-0.13185,0.7,0.043697,-1.318504,-0.881532,-1.777413,-1.423205,0.96672,-0.600825,0.479358


In [None]:
# Flush memory
del trainer, model
gc.collect()
torch.cuda.empty_cache()

# Reload tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(base_model,
                                          cache_dir="/mnt/artifacts/llama3-tokenizer-cache/"
                                         )
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map="auto",
    cache_dir="/mnt/artifacts/llama3-tokenizer-cache/"
)
model, tokenizer = setup_chat_format(model, tokenizer)

# Merge adapter with base model
model = PeftModel.from_pretrained(model, new_model)
model = model.merge_and_unload()
model.save_pretrained(f"/mnt/artifacts/llama3_sft/merged/{new_model}")
