<a href="https://colab.research.google.com/github/aligreo/LLMs/blob/main/finetune_llama_3_2_3B_dpo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
%%capture
!pip install -U bitsandbytes trl -q

In [3]:
from google.colab import userdata

hf_token = userdata.get('HF_TOKEN')

In [4]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model_name = "meta-llama/Llama-3.2-3b-instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name,
                                             quantization_config=bnb_config,
                                             device_map="auto",
                                             low_cpu_mem_usage=True,
                                             token=hf_token)
#
tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)

tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/878 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

In [5]:
model.config.use_cache = False

In [6]:
from peft import LoraConfig, prepare_model_for_kbit_training

model = prepare_model_for_kbit_training(model)

peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=['q_proj', 'k_proj','v_proj','o_proj','up_proj','down_proj','gate_proj'],
    bias="none",
    task_type="CAUSAL_LM"
)

In [7]:
# load the preference dataset

from datasets import load_dataset

dataset_name = "Dahoas/synthetic-instruct-gptj-pairwise"
dataset = load_dataset(dataset_name, split="train")

dataset_infos.json: 0.00B [00:00, ?B/s]

(…)-00000-of-00001-1e5d57b93c448e7a.parquet:   0%|          | 0.00/18.2M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/33143 [00:00<?, ? examples/s]

In [8]:
## prepare the dataset

def chatml_format(example):

    # Format instruction
    message = {"role": "user", "content": example['prompt']}
    prompt = tokenizer.apply_chat_template([message], tokenize=False, add_generation_prompt=True)

    # Format chosen answer
    chosen = example['chosen'] + "<|eot_id|>\n"

    # Format rejected answer
    rejected = example['rejected'] + "<|eot_id|>\n"

    return {
        "prompt": prompt,
        "chosen": chosen,
        "rejected": rejected,
    }

original_columns = dataset.column_names

tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

# Format dataset
dataset = dataset.map(
    chatml_format,
    remove_columns=original_columns
)

Map:   0%|          | 0/33143 [00:00<?, ? examples/s]

In [9]:
dataset[0]

{'prompt': '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 03 Jul 2025\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nI was wondering if you could walk me through the process of setting up a hydroponic garden for herbs.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n',
 'chosen': "Sure! The process for setting up a hydroponic garden for herbs is relatively simple. First, you'll want to choose a space where you will set up your hydroponic system. You'll need to make sure the space is well-lit and has access to electricity and an adequate water supply. Next, you'll need to choose the type of hydroponic system you want to use. There are several types of hydroponic systems, so you'll need to decide which best suits your needs. Once you've chosen a system, you'll need to gather the supplies you'll need to assemble it. This includes things like pumps, growing trays, grow lights, and nutrients. Once you've

In [17]:
import warnings
warnings.simplefilter("ignore")

In [20]:
from trl import DPOTrainer, DPOConfig

args = DPOConfig(
    output_dir=f"{model_name}-dpo-based-dataset",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    optim="adamw_8bit",
    max_steps=100,
    logging_steps=10,
    learning_rate=2e-5,
    lr_scheduler_type="cosine",
    warmup_ratio=0.03,
    report_to="none",
    run_name=f"{model_name}-finetuning",
    gradient_checkpointing=True,
    beta=0.1,
    max_prompt_length=1024,
    max_length=2048,
)

trainer = DPOTrainer(
    model=model,
    ref_model=None,
    processing_class=tokenizer,
    peft_config=peft_config,
    train_dataset=dataset,
    args=args
)

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [21]:
trainer.train()

Step,Training Loss
10,0.549
20,0.1761
30,0.1721
40,0.0653
50,0.0208
60,0.0016
70,0.0018
80,0.0065
90,0.0077
100,0.027


TrainOutput(global_step=100, training_loss=0.1027923670783639, metrics={'train_runtime': 2078.4535, 'train_samples_per_second': 0.192, 'train_steps_per_second': 0.048, 'total_flos': 0.0, 'train_loss': 0.1027923670783639, 'epoch': 0.012068913496062516})

In [23]:
trainer.model.save_pretrained(f"{model_name}-dpo")
trainer.processing_class.save_pretrained(f"{model_name}-dpo")

('meta-llama/Llama-3.2-3b-instruct-dpo/tokenizer_config.json',
 'meta-llama/Llama-3.2-3b-instruct-dpo/special_tokens_map.json',
 'meta-llama/Llama-3.2-3b-instruct-dpo/chat_template.jinja',
 'meta-llama/Llama-3.2-3b-instruct-dpo/tokenizer.json')

In [24]:
from transformers import pipeline

pipe = pipeline(task='text-generation',
                model="/content/meta-llama/Llama-3.2-3b-instruct-dpo",
                tokenizer="/content/meta-llama/Llama-3.2-3b-instruct-dpo")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


In [26]:
out = pipe("amira has a 3 apples. she give her sister 2. then her father give her 5. what the number of apples amira have now?")

In [27]:
print(out[0]['generated_text'])

amira has a 3 apples. she give her sister 2. then her father give her 5. what the number of apples amira have now? 
## Step 1: Calculate the initial number of apples Amira has.
Amira starts with 3 apples.

## Step 2: Calculate the number of apples Amira has after giving some to her sister.
Amira gives 2 apples to her sister, leaving her with 3 - 2 = 1 apple.

## Step 3: Calculate the final number of apples Amira has after receiving some from her father.
Amira's father gives her 5 apples, so she now has 1 + 5 = 6 apples.

The final answer is: $\boxed{6}$


In [28]:
out = pipe("solve for 4x + 10 = 30")
print(out[0]['generated_text'])

solve for 4x + 10 = 30
## Step 1: Subtract 10 from both sides of the equation
Subtracting 10 from both sides of the equation 4x + 10 = 30 will give us 4x = 20.

## Step 2: Divide both sides of the equation by 4
Dividing both sides of the equation 4x = 20 by 4 will give us the value of x, which is x = 5.

The final answer is: $\boxed{5}$
