## Install Libraries and Packages

In [1]:
%pip install --quiet --upgrade \
    pip \
    python-dotenv \
    datasets \
    accelerate \
    peft \
    bitsandbytes \
    transformers \
    trl \
    sentencepiece

Note: you may need to restart the kernel to use updated packages.


## Import Necessary Packages

In [2]:
import os
import torch

from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline,
    TrainingArguments,
    logging,
)

from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training

from trl import ORPOConfig, ORPOTrainer, setup_chat_format

## Load Model and Tokenizer

In [3]:
model_path = "./gpt2_tagalog"
model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [4]:
# Set the pad token to be the same as the eos token
tokenizer.pad_token = tokenizer.eos_token

## Prepare and analyze the data for finetune

In [5]:
# Load Dataset
dataset_name = "mlabonne/orpo-dpo-mix-40k"
dataset = load_dataset(dataset_name, split="all")
dataset

Dataset({
    features: ['source', 'chosen', 'rejected', 'prompt', 'question'],
    num_rows: 44245
})

In [6]:
#Filter the dataset to 1000 examples
dataset = dataset.shuffle(seed=42).select(range(100))
dataset

Dataset({
    features: ['source', 'chosen', 'rejected', 'prompt', 'question'],
    num_rows: 100
})

In [7]:
dataset[0]

{'source': 'ultrachat',
 'chosen': [{'content': 'Can you provide examples of how different cultures approach grief and mourning, and how this has evolved over time?',
   'role': 'user'},
  {'content': 'Of course, I\'ll do my best to provide you with accurate and helpful information.\n\nGrief and mourning are universal human experiences, and different cultures have developed unique ways of coping with loss and honoring the deceased. These practices have evolved over time, reflecting changes in societal values, beliefs, and traditions. Here are some examples:\n\n1. Ancient Egypt: In ancient Egyptian culture, mourning was a complex and elaborate process that lasted for 70 days. During this time, the family of the deceased would observe a period of mourning, known as "the sorrows of the heart," which involved abstaining from certain foods, wearing plain clothes, and covering their faces with dust and ashes. After 70 days, the family would hold a feast to celebrate the deceased\'s transitio

In [8]:
def format_chat_template(row):
    # Formatting for the chosen response
    chosen_template = """
    <|user|>
    {user_message}
    <|assistant|>
    {assistant_message}
    """.strip()

    # Formatting for the rejected response
    rejected_template = """
    <|user|>
    {user_message}
    <|assistant|>
    {assistant_message}
    """.strip()

    formatted_chosen = chosen_template.format(
        user_message=row["chosen"][0]["content"], 
        assistant_message=row["chosen"][1]["content"]
    )

    formatted_rejected = rejected_template.format(
        user_message=row["rejected"][0]["content"], 
        assistant_message=row["rejected"][1]["content"]
    )

    return {
        "chosen": formatted_chosen,
        "rejected": formatted_rejected
    }


In [9]:
tokenizer.apply_chat_template

<bound method PreTrainedTokenizerBase.apply_chat_template of GPT2TokenizerFast(name_or_path='./gpt2_tagalog', vocab_size=32000, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<|endoftext|>', 'eos_token': '<|endoftext|>', 'unk_token': '<|endoftext|>', 'pad_token': '<|endoftext|>'}, clean_up_tokenization_spaces=False),  added_tokens_decoder={
	0: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
}>

In [10]:
# Apply the formatting function
formatted_dataset = dataset.map(format_chat_template)

In [11]:
formatted_dataset[0]

{'source': 'ultrachat',
 'chosen': '<|user|>\n    Can you provide examples of how different cultures approach grief and mourning, and how this has evolved over time?\n    <|assistant|>\n    Of course, I\'ll do my best to provide you with accurate and helpful information.\n\nGrief and mourning are universal human experiences, and different cultures have developed unique ways of coping with loss and honoring the deceased. These practices have evolved over time, reflecting changes in societal values, beliefs, and traditions. Here are some examples:\n\n1. Ancient Egypt: In ancient Egyptian culture, mourning was a complex and elaborate process that lasted for 70 days. During this time, the family of the deceased would observe a period of mourning, known as "the sorrows of the heart," which involved abstaining from certain foods, wearing plain clothes, and covering their faces with dust and ashes. After 70 days, the family would hold a feast to celebrate the deceased\'s transition to the aft

In [12]:
#Split the data for train and test
formatted_dataset = formatted_dataset.train_test_split(test_size=0.10)
formatted_dataset

DatasetDict({
    train: Dataset({
        features: ['source', 'chosen', 'rejected', 'prompt', 'question'],
        num_rows: 90
    })
    test: Dataset({
        features: ['source', 'chosen', 'rejected', 'prompt', 'question'],
        num_rows: 10
    })
})

In [13]:
def find_linear_layers(model):
    # Initialize a list to store the names of all linear layers
    linear_layers = []
    
    # Iterate over all named modules in the model
    for name, module in model.named_modules():
        # Check if the module is of type Linear (from nn.Module)
        if isinstance(module, torch.nn.Linear):
            # Append the name of the layer to the list
            linear_layers.append(name)
    
    # Return the list of linear layer names
    return linear_layers

# Assuming 'model' is your GPT-2 model
linear_layer_names = find_linear_layers(model)
print(linear_layer_names)

['lm_head']


In [14]:
#default configs
attn_implementation = "eager"
torch_dtype = torch.float16

In [15]:
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['lm_head']
)

In [16]:
#Setup ORPO Configuration
orpo_args = ORPOConfig(
    learning_rate=8e-6,
    beta=0.1,
    lr_scheduler_type="linear",
    max_length=1024,
    max_prompt_length=512,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=2,
    optim="paged_adamw_8bit",
    num_train_epochs=1,
    evaluation_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    save_steps=5,
    output_dir="./tagalog-results",
)



In [17]:
#Setup ORPO Trainer
trainer = ORPOTrainer(
    model=model,
    args=orpo_args,
    train_dataset=formatted_dataset["train"],
    eval_dataset=formatted_dataset["test"],
    peft_config=peft_config,
    tokenizer=tokenizer,
)



Map:   0%|          | 0/90 [00:00<?, ? examples/s]

Map:   0%|          | 0/90 [00:00<?, ? examples/s]

Map:   0%|          | 0/90 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

In [18]:
#Train the model
trainer.train()

Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Runtime,Samples Per Second,Steps Per Second,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/rejected,Logps/chosen,Logits/rejected,Logits/chosen,Nll Loss,Log Odds Ratio,Log Odds Chosen
5,6.4851,8.00808,1.0108,9.894,4.947,-0.778745,-0.733009,0.5,-0.045736,-7.330093,-7.787448,-3.016872,-3.016319,7.897358,-1.107213,-0.458247
10,6.2683,8.007823,1.0077,9.923,4.962,-0.778718,-0.732982,0.5,-0.045736,-7.32982,-7.787179,-3.017571,-3.017027,7.897102,-1.10721,-0.45825
15,5.8418,8.007494,1.0101,9.9,4.95,-0.778683,-0.732948,0.5,-0.045735,-7.32948,-7.786832,-3.018563,-3.018042,7.896774,-1.107198,-0.458244
20,6.1523,8.00733,1.0083,9.918,4.959,-0.778666,-0.732931,0.5,-0.045735,-7.329309,-7.786659,-3.01915,-3.01864,7.89661,-1.107192,-0.458241




TrainOutput(global_step=22, training_loss=6.2801573059775615, metrics={'train_runtime': 19.1045, 'train_samples_per_second': 4.711, 'train_steps_per_second': 1.152, 'total_flos': 0.0, 'train_loss': 6.2801573059775615, 'epoch': 0.9777777777777777})