# Train Model with ORPO

### imports

In [1]:
from transformers import (
    AutoModelForCausalLM, 
    AutoTokenizer, 
    BitsAndBytesConfig,
    pipeline,
)
from trl import ORPOConfig, ORPOTrainer
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import torch
from datasets import load_from_disk, load_dataset, Dataset, DatasetDict
import json

Skipping import of cpp extensions due to incompatible torch version 2.8.0+cu128 for torchao version 0.14.0         Please see GitHub issue #2919 for more info


### load model

In [2]:
model_name = "/home/ubuntu/projek_chatbot_galang/training_model/model/merged-taxbot-SeaLLMs-v3-1.5B-Chat-v9"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation="eager"
)

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = prepare_model_for_kbit_training(model)
model.gradient_checkpointing_enable()

In [3]:
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    target_modules=[
        "q_proj", "v_proj", "k_proj", "o_proj", 
        #"up_proj", "down_proj", "gate_proj"
    ],
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 2,179,072 || all params: 1,545,893,376 || trainable%: 0.1410


In [4]:
dataset = load_from_disk("/home/ubuntu/projek_chatbot_galang/rlhf/data_prep/dataset/preferences_chatbot_hf")

### train model

In [None]:
ft_model_name = "taxbot-SeaLLMs-v3-1.5B_v9-ORPO-v1"

orpo_args = ORPOConfig(
    output_dir=ft_model_name,
    learning_rate=1e-5,
    lr_scheduler_type="linear",
    max_length=1024,
    max_prompt_length=512,
    beta=0.05,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=4,
    optim="adamw_torch",
    num_train_epochs=3,
    save_strategy="epoch",
    eval_strategy="steps",
    eval_steps=0.1,
    logging_steps=25,
    warmup_steps=100,
    bf16=False,
    fp16=True,
    gradient_checkpointing=True,
    report_to='none'
)

In [6]:
trainer = ORPOTrainer(
    model=model,
    args=orpo_args,
    processing_class=tokenizer,
    train_dataset=dataset["train"],
    eval_dataset=dataset["valid"],
    peft_config=peft_config,
)

When using DPODataCollatorWithPadding, you should set `remove_unused_columns=False` in your TrainingArguments we have set it for you, but you should do it yourself in the future.


Map:   0%|          | 0/450 [00:00<?, ? examples/s]

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

In [7]:
trainer.train()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 151645, 'bos_token_id': None, 'pad_token_id': 151645}.


Step,Training Loss,Validation Loss


KeyboardInterrupt: 

In [None]:
trainer.evaluate()

In [None]:
trainer.save_model()

In [None]:
base = AutoModelForCausalLM.from_pretrained(model_name, dtype="float16")
lora = PeftModel.from_pretrained(base, ft_model_name)

merged = lora.merge_and_unload()

merged.save_pretrained(f"./merged-{ft_model_name}")
tokenizer.save_pretrained(f"./merged-{ft_model_name}")