<a href="https://colab.research.google.com/github/bsmsultani/ml-handson/blob/main/fine_tuning_llm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [63]:
from unsloth import FastModel
from unsloth.chat_templates import get_chat_template, standardize_data_formats
from datasets import load_dataset
from pprint import pprint
from trl import SFTTrainer, SFTConfig
import torch

In [5]:
fourbit_models = [
    # 4-bit model
    "unsloth/gemma-3-1b-it-unsloth-bnb-4bit",
    "unsloth/gemma-3-4b-it-unsloth-bnb-4bit",
    "unsloth/gemma-3-12b-it-unsloth-bnb-4bit",
    "unsloth/gemma-3-27b-it-unsloth-bnb-4bit",

    # Other popular models!
    "unsloth/Llama-3.1-8B",
    "unsloth/Llama-3.2-3B",
    "unsloth/Llama-3.3-70B",
    "unsloth/mistral-7b-instruct-v0.3",
    "unsloth/Phi-4",
]

In [7]:
model, tokenizer = FastModel.from_pretrained(
    fourbit_models[0],
    max_seq_length=2042,
    load_in_4bit=True # 4-bit quantization (reduce memory)
)

==((====))==  Unsloth 2025.8.9: Fast Gemma3 patching. Transformers: 4.55.4.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3 won't work! Using float32.




generation_config.json:   0%|          | 0.00/233 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/670 [00:00<?, ?B/s]

In [10]:
model = FastModel.get_peft_model(
    model,
    finetune_vision_layers = False,
    finetune_language_layers = True,
    finetune_attention_modules = True,
    finetune_mlp_modules = True,

    r = 8,
    lora_alpha = 8,
    lora_dropout = 0,
    bias = "none",
    random_state = 42

)

Unsloth: Making `model.base_model.model.model` require gradients


In [13]:
tokenizer = get_chat_template(tokenizer, 'gemma-3')

In [19]:
dataset = load_dataset('mlabonne/FineTome-100k', split='train')

README.md:   0%|          | 0.00/982 [00:00<?, ?B/s]

data/train-00000-of-00001.parquet:   0%|          | 0.00/117M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/100000 [00:00<?, ? examples/s]

In [25]:
pprint(dataset[100])

{'conversations': [{'from': 'human',
                    'value': 'What is the modulus operator in programming and '
                             'how can I use it to calculate the modulus of two '
                             'given numbers?'},
                   {'from': 'gpt',
                    'value': 'In programming, the modulus operator is '
                             "represented by the '%' symbol. It calculates the "
                             'remainder when one number is divided by another. '
                             'To calculate the modulus of two given numbers, '
                             'you can use the modulus operator in the '
                             'following way:\n'
                             '\n'
                             '```python\n'
                             '# Calculate the modulus\n'
                             'Modulus = a % b\n'
                             '\n'
                             'print("Modulus of the given numbers is:

In [26]:
dataset = standardize_data_formats(dataset)

Unsloth: Standardizing formats (num_proc=2):   0%|          | 0/100000 [00:00<?, ? examples/s]

In [27]:
pprint(dataset[100])

{'conversations': [{'content': 'What is the modulus operator in programming '
                               'and how can I use it to calculate the modulus '
                               'of two given numbers?',
                    'role': 'user'},
                   {'content': 'In programming, the modulus operator is '
                               "represented by the '%' symbol. It calculates "
                               'the remainder when one number is divided by '
                               'another. To calculate the modulus of two given '
                               'numbers, you can use the modulus operator in '
                               'the following way:\n'
                               '\n'
                               '```python\n'
                               '# Calculate the modulus\n'
                               'Modulus = a % b\n'
                               '\n'
                               'print("Modulus of the given numbers is: ", '


In [53]:
def formatting_prompt_func(examples):
    conversations = examples['conversations']
    texts = [tokenizer.apply_chat_template(convo, tokenize=False,  \
                                           add_generation_prompt=False).removeprefix('<bos>') for convo in conversations]

    return {'texts': texts, }

In [54]:
dataset = dataset.map(formatting_prompt_func, batched=True)

Map:   0%|          | 0/100000 [00:00<?, ? examples/s]

In [69]:
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    eval_dataset = None,
    formatting_func = formatting_prompt_func,

    args = SFTConfig(
        dataset_text_field = 'texts',
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 30,
        learning_rate = 2e-4,
        optim = 'adamw_8bit',
        weight_decay = 0.01,
        lr_scheduler_type = 'linear',
        seed = 42,
        report_to = 'none'

    )
)

Unsloth: Switching to float32 training since model cannot work with float16


Unsloth: Tokenizing ["texts"] (num_proc=2):   0%|          | 0/100000 [00:00<?, ? examples/s]