<a href="https://colab.research.google.com/github/d-klotz/llama-fine-tunning/blob/main/fine_tune_llama3_original.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

# Import the dataset using pandas, use lines=True because this is a JSONL
df = pd.read_json('/content/drive/MyDrive/Colab Notebooks/data/trn.json', lines=True)

# Use only the first 10k rows
df = df.head(10000)

# Select only the desired columns
df = df[['uid', 'title', 'content']]

# Remove rows with missing data in 'title' or 'content' columns
# Replace empty strings with NaN values and then drop rows with missing data
df['title'] = df['title'].replace('', pd.NA)
df['content'] = df['content'].replace('', pd.NA)
df = df.dropna(subset=['title', 'content'])

In [None]:
# prompt: print first 10 lines of df

print(df.head(10))


           uid                                              title  \
0   0000031909                        Girls Ballet Tutu Neon Pink   
3   0001360000                                      Mog's Kittens   
7   0000031895                        Girls Ballet Tutu Neon Blue   
12  000100039X                                        The Prophet   
13  0001473905                          Rightly Dividing the Word   
15  0001516035                        Worship with Don Moen [VHS]   
16  0001837397                         Autumn Story Brambly Hedge   
17  0000791156  Spirit Led-Moving By Grace In The Holy Spirit'...   
18  0001714384               The Very Bad Bunny (Beginner Series)   
22  0001983415                                      Nice for Mice   

                                              content  
0   High quality 3 layer ballet tutu. 12 inches in...  
3   Judith Kerr&#8217;s best&#8211;selling adventu...  
7   Dance tutu for girls ages 2-8 years. Perfect f...  
12  In a distant

In [None]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = torch.float16 # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/mistral-7b-v0.3-bnb-4bit",      # New Mistral v3 2x faster!
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    "unsloth/llama-3-8b-bnb-4bit",           # Llama-3 15 trillion tokens model 2x faster!
    "unsloth/llama-3-8b-Instruct-bnb-4bit",
    "unsloth/llama-3-70b-bnb-4bit",
    "unsloth/Phi-3-mini-4k-instruct",        # Phi-3 2x faster!
    "unsloth/Phi-3-medium-4k-instruct",
    "unsloth/mistral-7b-bnb-4bit",
    "unsloth/gemma-7b-bnb-4bit",             # Gemma 2.2x faster!
] # More models at https://huggingface.co/unsloth

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-8b-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

==((====))==  Unsloth 2025.2.15: Fast Llama patching. Transformers: 4.48.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

ValueError: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. 

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

In [None]:
import pandas as pd

def format_data(row):
  """Formats a row of the DataFrame into the desired dictionary structure."""
  return {
      "instruction": "You're a virtual seller at Amazon.com that knows all about the available amazon products, if the your asks for a specific product, you'll answer with this product's decription.",
      "input": row["title"],
      "output": row["content"]
  }

# Apply the formatting function to each row and convert the result to a list
list_ds = df.apply(format_data, axis=1).tolist()

# Print the first 5 dictionaries to demonstrate the result
for i in range(min(5,len(list_ds))):
  print(list_ds[i])

{'instruction': "You're a virtual seller at Amazon.com that knows all about the available amazon products, if the your asks for a specific product, you'll answer with this product's decription.", 'input': 'Girls Ballet Tutu Neon Pink', 'output': 'High quality 3 layer ballet tutu. 12 inches in length'}
{'instruction': "You're a virtual seller at Amazon.com that knows all about the available amazon products, if the your asks for a specific product, you'll answer with this product's decription.", 'input': "Mog's Kittens", 'output': 'Judith Kerr&#8217;s best&#8211;selling adventures of that endearing (and exasperating) cat Mog have entertained children for more than 30 years. Now, even infants and toddlers can enjoy meeting this loveable feline. These sturdy little board books&#8212;with their bright, simple pictures, easy text, and hand&#8211;friendly formats&#8212;are just the thing to delight the very young. Ages 6 months&#8211;2 years.'}
{'instruction': "You're a virtual seller at Amaz

In [None]:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

import datasets
import pandas as pd
# Create a dataset from the array of dictionaries

df = pd.DataFrame(list_ds)
dataset = datasets.Dataset.from_pandas(df)

# Print the dataset
print(dataset)
dataset = dataset.map(formatting_prompts_func, batched = True,)
print(dataset)

Dataset({
    features: ['instruction', 'input', 'output'],
    num_rows: 7529
})


Map:   0%|          | 0/7529 [00:00<?, ? examples/s]

Dataset({
    features: ['instruction', 'input', 'output', 'text'],
    num_rows: 7529
})


In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 50,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to="none"
    ),
)

Map (num_proc=2):   0%|          | 0/7529 [00:00<?, ? examples/s]

In [None]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 7,529 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 50
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
1,1.3203
2,1.491
3,1.4769
4,1.4926
5,1.3905
6,0.957
7,0.3868
8,0.1149
9,0.0709
10,0.0482


In [None]:

# alpaca_prompt = Copied from above
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    alpaca_prompt.format(
            "You're a virtual seller at Amazon.com that knows all about the available amazon products, if the your asks for a specific product, you'll answer with this product's decription.",
            "Describe this product: Girls Ballet Tutu Neon Pink",
            ""
        )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
tokenizer.batch_decode(outputs)

["<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nYou're a virtual seller at Amazon.com that knows all about the available amazon products, if the your asks for a specific product, you'll answer with this product's decription.\n\n### Input:\nDescribe this product: Girls Ballet Tutu Neon Pink\n\n### Response:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\nResponse:\n"]