In [None]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
    !pip install llama-cpp-python
    # Add numpy and scipy installation before other dependencies
    !pip install numpy==1.26.4 scipy
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
     # Do this only in Colab notebooks! Otherwise use pip install unsloth
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1" huggingface_hub hf_transfer
    !pip install transformers==4.51.3
    !pip install --no-deps unsloth

In [None]:
huggingface_token = ""
from huggingface_hub import login
login(token = huggingface_token)

### Install unsloth and dependencies

In [None]:
import unsloth
from unsloth import FastModel
import torch

model_name = "unsloth/gemma-3-1b-it-qat"
model, tokenizer = FastModel.from_pretrained(model_name, max_seq_length=2048, full_finetuning = False,)
#

NotImplementedError: Unsloth currently only works on NVIDIA GPUs and Intel GPUs.

#### Use Lora for finetuning

In [None]:
model = FastModel.get_peft_model(
    model,
    finetune_language_layers = True,
    finetune_attention_modules = True,
    finetune_mlp_modules = True,
    r = 8,
    lora_alpha = 8,
    lora_dropout = 0.025,
    bias = "none",
    random_state = 3407,
)

# Prep Data

In [None]:
from unsloth.chat_templates import get_chat_template
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "gemma-3",
)

In [None]:
from datasets import load_dataset

json_file_path = "my_dataset.json" # Use your actual file path
dataset = load_dataset("json", data_files=json_file_path, split="train")
dataset[100]

In [None]:
def apply_chat_template(examples):
    texts = []
    for conversation in examples["conversations"]:
        formatted_text = ""
        system_content = ""

        # Extract system message
        user_assistant_msgs = []
        for msg in conversation:
            if msg["role"] == "system":
                system_content = msg["content"]
            else:
                user_assistant_msgs.append(msg)

        # Build conversation with single BOS token
        for i, message in enumerate(user_assistant_msgs):
            if message["role"] == "user":
                # Include system content in first user message
                if i == 0 and system_content:
                    content = f"{system_content}\n\n{message['content']}"
                else:
                    content = message["content"]

                # Add BOS only at the very beginning
                if i == 0:
                    formatted_text += f"<bos><start_of_turn>user\n{content}<end_of_turn>\n"
                else:
                    formatted_text += f"<start_of_turn>user\n{content}<end_of_turn>\n"
            elif message["role"] == "assistant":
                formatted_text += f"<start_of_turn>model\n{message['content']}<end_of_turn>\n"

        texts.append(formatted_text)
    return {"text": texts}
# Apply the corrected function to your dataset
dataset = dataset.map(apply_chat_template, batched=True)

In [None]:
dataset[100]['text']

In [None]:
from trl import SFTTrainer, SFTConfig
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    eval_dataset = None, # Can set up evaluation!
    args = SFTConfig(
        dataset_text_field = "text",
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4, # Use GA to mimic batch size!
        warmup_steps = 5,
        num_train_epochs = 2, # Set this for 1 full training run.
        max_steps = 30,
        learning_rate = 2e-4, # Reduce to 2e-5 for long training runs
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        report_to = "none", # Use this for WandB etc
    ),
)

In [None]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part ="<start_of_turn>user\n",
    response_part= "<start_of_turn>model\n",
)

In [None]:
tokenizer.decode(trainer.train_dataset[100]["input_ids"])

In [None]:
trainer_stats = trainer.train()


In [None]:
model.save_pretrained("gemma-3")
tokenizer.save_pretrained("gemma-3")
model.config.save_pretrained("gemma-3")

model.save_pretrained_gguf( tokenizer, save_directory="gemma-3", quantization_type = "f16")