In [1]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth

In [2]:
%%capture
# Install latest transformers for Gemma 3N
!pip install --no-deps --upgrade timm # Only for Gemma 3N
!pip install comet-ml

In [3]:
import os
import re
import io
import zipfile
from typing import Tuple, List, Dict, Any, Optional
from PIL import Image
import requests
import comet_ml
from unsloth import FastModel, get_chat_template
import torch
from datasets import load_dataset, Dataset
from transformers import TrainingArguments
from trl import SFTTrainer, SFTConfig

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


comet_ml is installed but the Comet API Key is not configured. Please set the `COMET_API_KEY` environment variable to enable Comet logging. Check out the documentation for other ways of configuring it: https://www.comet.com/docs/v2/guides/experiment-management/configure-sdk/#set-the-api-key


🦥 Unsloth Zoo will now patch everything to make training faster!


In [18]:
from comet_ml import Artifact, Experiment
from comet_ml.artifacts import ArtifactAsset

## COMET CONFIG


In [4]:
from google.colab import userdata
# Load the Comet.ml API key from Colab secrets
os.environ['COMET_API_KEY'] = userdata.get('COMET_API_KEY')
os.environ['COMET_PROJECT'] = userdata.get('COMET_PROJECT')
os.environ['COMET_WORKSPACE'] = userdata.get('COMET_WORKSPACE')

COMET_CONFIG = {
    # API Key - REQUIRED
    # Cách 1: Set environment variable
    # export COMET_API_KEY="your-api-key-here"

    # Cách 2: Set trực tiếp (không khuyến nghị cho production)
    "api_key": os.getenv("COMET_API_KEY"),  # Hoặc thay bằng API key của bạn

    # Workspace - REQUIRED
    # Tên workspace trên Comet ML
    "workspace": os.getenv("COMET_WORKSPACE"),  # Thay bằng workspace của bạn

    # Project Name - REQUIRED
    # Tên project trên Comet ML
    "project": os.getenv("COMET_PROJECT"),  # Có thể thay đổi tên project

    # Experiment Name - OPTIONAL
    # Tên experiment cụ thể (tự động generate nếu không set)
    "experiment_name": "base_line",  # Hoặc đặt tên custom như "exp-001"

    # Tags - OPTIONAL
    # Tags để phân loại experiments
    "tags": [
        "gemma3n",
        "math-tutor",
        "vietnamese",
        "sixth-grade",
        "fine-tuning"
    ],

    # Additional Settings
    "auto_metric_logging": True,     # Tự động log metrics
    "auto_param_logging": True,      # Tự động log parameters
    "auto_histogram_weight_logging": True,   # Log weight histograms
    "auto_histogram_gradient_logging": True, # Log gradient histograms
    "auto_histogram_activation_logging": False,  # Tắt để tiết kiệm memory
    "auto_output_logging": "default",  # Log output (stdout/stderr)

    # Model Logging
    "log_model": True,              # Upload model artifacts
    "log_graph": False,             # Log model graph (có thể chậm)
    "log_code": True,               # Log source code
    "log_git_metadata": True,       # Log git information
}

In [5]:
def print_comet_info():
    """Print Comet ML configuration info."""

    print("🔧 Comet ML Configuration:")
    print(f"   Workspace: {COMET_CONFIG['workspace']}")
    print(f"   Project: {COMET_CONFIG['project']}")
    print(f"   API Key: {'✅ Set' if COMET_CONFIG['api_key'] else '❌ Not set'}")
    print(f"   Tags: {', '.join(COMET_CONFIG['tags'])}")

print_comet_info()

🔧 Comet ML Configuration:
   Workspace: mathpal
   Project: mathpal-gemma3n
   API Key: ✅ Set
   Tags: gemma3n, math-tutor, vietnamese, sixth-grade, fine-tuning


## TRANING CONFIG

In [6]:
TRAINING_CONFIG = {
    # Model settings
    "model_name": "unsloth/gemma-3n-E4B-it",
    "max_seq_length": 2048,
    "load_in_4bit": True,
    "full_finetuning": False,

    # Dataset settings
    "dataset_name": "ngohongthai/exam-sixth_grade-instruct-dataset",
    "train_split": "train",
    "test_split": "test",

    # Training settings
    "output_dir": f"{COMET_CONFIG['project']}/{COMET_CONFIG['experiment_name']}",
    "max_steps": 100,
    "per_device_train_batch_size": 1,
    "gradient_accumulation_steps": 8,
    "learning_rate": 2e-4,
    "warmup_ratio": 0.03,
    "weight_decay": 0.01,
    "logging_steps": 5,
    "save_steps": 50,

    # LoRA settings
    "lora_r": 8,
    "lora_alpha": 8,
    "lora_dropout": 0.0,

    # System settings
    "use_gradient_checkpointing": False,  # Disabled to avoid CheckpointError
    "report_to": "comet_ml",  # Change to "tensorboard", "wandb" if needed
    "seed": 42,

    # Comet ML settings
    "comet_workspace": COMET_CONFIG['workspace'],  # Set your Comet workspace name
    "comet_project": COMET_CONFIG['project'],  # Set your Comet project name
}

## DATASET PROCESSING

In [7]:
def process_sample(sample: Dict[str, str]) -> Dict[str, List[Dict[str, Any]]]:
    # Create conversation
    conversations = [
        {
            "role": "user",
            "content": [{"type": "text", "text": sample["question"]}]
        },
        {
            "role": "assistant",
            "content": [{"type": "text", "text": sample["solution"]}]
        }
    ]

    return {"conversations": conversations}

def prepare_dataset(tokenizer, dataset_name, split) -> Dataset:
    """
    Load and prepare the math dataset.

    Args:
        dataset_name: HuggingFace dataset name
        split: Dataset split to load

    Returns:
        Processed Dataset object
    """
    print(f"Loading dataset: {dataset_name}, split: {split}")

    def formatting_prompts_func(examples):
      convos = examples["conversations"]
      texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False).removeprefix('<bos>') for convo in convos]
      return { "text" : texts, }

    raw_dataset = load_dataset(dataset_name, split=split)
    processed_dataset = raw_dataset.map(process_sample)
    formated_dataset = processed_dataset.map(formatting_prompts_func, batched=True)
    return formated_dataset

## MODEL SETUP

In [8]:
def setup_model_and_processor(config: Dict[str, Any]):
    """
    Load and setup Gemma3N model and processor.

    Args:
        config: Configuration dictionary

    Returns:
        Tuple of (model, processor)
    """
    print("Loading Gemma3N model and processor...")

    # Load model and processor
    model, processor = FastModel.from_pretrained(
        model_name = config["model_name"],
        max_seq_length=config["max_seq_length"],
        load_in_4bit=config["load_in_4bit"],
        full_finetuning=config["full_finetuning"],
    )

    # Apply LoRA
    model = FastModel.get_peft_model(
        model,
        finetune_vision_layers=False,
        finetune_language_layers=True,
        finetune_attention_modules=True,
        finetune_mlp_modules=True,
        r=config["lora_r"],
        lora_alpha=config["lora_alpha"],
        lora_dropout=config["lora_dropout"],
        bias="none",
        random_state=config["seed"],
    )

    # Setup chat template
    processor = get_chat_template(processor, "gemma-3n")

    print("Model and processor setup complete!")
    return model, processor

In [9]:
from unsloth.chat_templates import train_on_responses_only

def create_trainer(model, processor, train_dataset, config: Dict[str, Any]):
    """
    Create optimized SFTTrainer.

    Args:
        model: Prepared model
        processor: Model processor
        train_dataset: Training dataset
        config: Configuration dictionary

    Returns:
        Configured SFTTrainer
    """
    # Enable training
    FastModel.for_training(model)


    # Training arguments
    training_args = SFTConfig(
        # Basic training settings
        dataset_text_field = "text",
        output_dir=config["output_dir"],
        max_steps=config["max_steps"],
        per_device_train_batch_size=config["per_device_train_batch_size"],
        gradient_accumulation_steps=config["gradient_accumulation_steps"],

        # Optimization settings
        learning_rate=config["learning_rate"],
        warmup_ratio=config["warmup_ratio"],
        weight_decay=config["weight_decay"],
        optim="adamw_torch_fused",
        lr_scheduler_type="cosine",

        # Logging and saving
        logging_steps=config["logging_steps"],
        save_strategy="steps",
        save_steps=config["save_steps"],
        report_to=config["report_to"],

        max_length=config["max_seq_length"],

        # Reproducibility
        seed=config["seed"],
    )

    # Create trainer
    trainer = SFTTrainer(
        model=model,
        tokenizer=processor,
        train_dataset=train_dataset,
        args=training_args,
    )

    # We also use Unsloth's train_on_completions method to only train on the assistant outputs and ignore the loss on the user's inputs.
    # This helps increase accuracy of finetunes!
    trainer = train_on_responses_only(
        trainer,
        instruction_part = "<start_of_turn>user\n",
        response_part = "<start_of_turn>model\n",
    )

    return trainer

## COMET ML SETUP

In [10]:
def setup_comet_ml(config: Dict[str, Any]) -> None:
    """
    Setup Comet ML experiment tracking with full configuration support.

    Args:
        config: Configuration dictionary
    """
    if TRAINING_CONFIG["report_to"] == "comet_ml":
        try:
            # Initialize Comet experiment
            experiment_kwargs = {
                "workspace": COMET_CONFIG.get("workspace"),
                "project_name": COMET_CONFIG.get("project"),
                "auto_metric_logging": COMET_CONFIG.get("auto_metric_logging", True),
                "auto_param_logging": COMET_CONFIG.get("auto_param_logging", True),
                "auto_histogram_weight_logging": COMET_CONFIG.get("auto_histogram_weight_logging", True),
                "auto_histogram_gradient_logging": COMET_CONFIG.get("auto_histogram_gradient_logging", True),
                "auto_histogram_activation_logging": COMET_CONFIG.get("auto_histogram_activation_logging", False),
            }

            # Remove None values
            experiment_kwargs = {k: v for k, v in experiment_kwargs.items() if v is not None}

            experiment = comet_ml.Experiment(**experiment_kwargs)

            # Log configuration
            experiment.log_parameters(config)

            # Add tags
            tags = COMET_CONFIG.get("tags", ["gemma3n", "vision-language", "math-tutor"])
            for tag in tags:
                experiment.add_tag(tag)

            # Log additional metadata
            experiment.log_other("dataset", TRAINING_CONFIG['dataset_name'])
            experiment.log_other("model_base", TRAINING_CONFIG['model_name'])
            experiment.log_other("task", "sixth-grade-math-tutoring")
            experiment.log_other("language", "vietnamese")

            print(f"✅ Comet ML experiment initialized")
            print(f"🔗 Experiment URL: {experiment.url}")
            print(f"📊 Workspace: {COMET_CONFIG.get('workspace', 'default')}")
            print(f"📁 Project: {COMET_CONFIG.get('project', 'gemma3n-math-tutor')}")

            # Set environment variables for transformers integration
            os.environ["COMET_PROJECT_NAME"] = COMET_CONFIG.get("project", "gemma3n-math-tutor")
            if COMET_CONFIG.get("workspace"):
                os.environ["COMET_WORKSPACE"] = COMET_CONFIG["workspace"]

            return experiment

        except ImportError:
            print("❌ comet_ml not installed. Please install with: pip install comet-ml")
            print("Falling back to tensorboard logging...")
            config["report_to"] = "tensorboard"
            return None
        except Exception as e:
            print(f"❌ Failed to initialize Comet ML: {e}")
            print("Possible causes:")
            print("- Invalid API key or workspace/project names")
            print("- Network connection issues")
            print("- Missing permissions")
            print("Falling back to tensorboard logging...")
            config["report_to"] = "tensorboard"
            return None

    return None

## TRAININGGGGGG

In [11]:
os.makedirs(TRAINING_CONFIG["output_dir"], exist_ok=True)

In [12]:
comet_experiment = setup_comet_ml(COMET_CONFIG)

[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/mathpal/mathpal-gemma3n/09a139b404814913948d2b313f6de1ca

[1;38;5;39mCOMET INFO:[0m Couldn't find a Git repository in '/content' nor in any parent directory. Set `COMET_GIT_DIRECTORY` if your Git Repository is elsewhere.


✅ Comet ML experiment initialized
🔗 Experiment URL: https://www.comet.com/mathpal/mathpal-gemma3n/09a139b404814913948d2b313f6de1ca
📊 Workspace: mathpal
📁 Project: mathpal-gemma3n


In [13]:
# 1. Setup model and processor
model, processor = setup_model_and_processor(TRAINING_CONFIG)

Loading Gemma3N model and processor...
==((====))==  Unsloth 2025.8.1: Fast Gemma3N patching. Transformers: 4.54.1.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Gemma3N does not support SDPA - switching to eager!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/3.72G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/1.15G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/210 [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/98.0 [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

preprocessor_config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/4.70M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/777 [00:00<?, ?B/s]

Unsloth: Making `model.base_model.model.model.language_model` require gradients
Model and processor setup complete!


In [14]:
# 2. Prepare dataset
train_dataset = prepare_dataset(processor, TRAINING_CONFIG["dataset_name"], TRAINING_CONFIG["train_split"])
train_dataset

Loading dataset: ngohongthai/exam-sixth_grade-instruct-dataset, split: train


README.md:   0%|          | 0.00/408 [00:00<?, ?B/s]

data/train-00000-of-00001.parquet:   0%|          | 0.00/245k [00:00<?, ?B/s]

data/test-00000-of-00001.parquet:   0%|          | 0.00/29.4k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1010 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/113 [00:00<?, ? examples/s]

Map:   0%|          | 0/1010 [00:00<?, ? examples/s]

Map:   0%|          | 0/1010 [00:00<?, ? examples/s]

Dataset({
    features: ['question', 'solution', 'conversations', 'text'],
    num_rows: 1010
})

In [15]:
train_dataset[0]['text']

'<start_of_turn>user\nTìm x biết $x + 3,8 = 3,5 \\times 1,5$<end_of_turn>\n<start_of_turn>model\nx + 3,8 = 3, 5 x 1, 5\nx + 3,8 = 5, 25\nx = 5, 25 - 3,8\nx = 1, 45<end_of_turn>\n'

In [16]:
# 3. Create trainer
trainer = create_trainer(model, processor, train_dataset, TRAINING_CONFIG)

Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/1010 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/1010 [00:00<?, ? examples/s]

In [17]:
 # 4. Start training
print(f"\n🚀 Starting training...")
print(f"- Output directory: {TRAINING_CONFIG['output_dir']}")
print(f"- Max steps: {TRAINING_CONFIG['max_steps']}")
print(f"- Batch size: {TRAINING_CONFIG['per_device_train_batch_size']}")
print(f"- Gradient accumulation: {TRAINING_CONFIG['gradient_accumulation_steps']}")
print(f"- Effective batch size: {TRAINING_CONFIG['per_device_train_batch_size'] * TRAINING_CONFIG['gradient_accumulation_steps']}")

# Train the model
trainer_stats = trainer.train()


🚀 Starting training...
- Output directory: mathpal-gemma3n/base_line
- Max steps: 100
- Batch size: 1
- Gradient accumulation: 8
- Effective batch size: 8


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 1,010 | Num Epochs = 1 | Total steps = 100
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 8 x 1) = 8
 "-____-"     Trainable parameters = 19,210,240 of 7,869,188,432 (0.24% trained)
[1;38;5;39mCOMET INFO:[0m An experiment with the same configuration options is already running and will be reused.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
5,5.5706
10,5.6678
15,5.5325
20,3.5865
25,1.8914
30,1.8069
35,1.5086
40,1.0693
45,1.1613
50,1.0462


comet_ml is installed but the Comet API Key is not configured. Please set the `COMET_API_KEY` environment variable to enable Comet logging. Check out the documentation for other ways of configuring it: https://www.comet.com/docs/v2/guides/experiment-management/configure-sdk/#set-the-api-key
comet_ml is installed but the Comet API Key is not configured. Please set the `COMET_API_KEY` environment variable to enable Comet logging. Check out the documentation for other ways of configuring it: https://www.comet.com/docs/v2/guides/experiment-management/configure-sdk/#set-the-api-key


## INFERENCE

In [20]:
train_dataset[0]['question']

'Tìm x biết $x + 3,8 = 3,5 \\times 1,5$'

In [21]:
from unsloth.chat_templates import get_chat_template
tokenizer = get_chat_template(
    processor,
    chat_template = "gemma-3n",
)
messages = [{
    "role": "user",
    "content": [{
        "type" : "text",
        "text" : train_dataset[0]['question'],
    }]
}]
inputs = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True, # Must add for generation
    return_tensors = "pt",
    tokenize = True,
    return_dict = True,
).to("cuda")
outputs = model.generate(
    **inputs,
    max_new_tokens = 64, # Increase for longer outputs!
    # Recommended Gemma-3 settings!
    temperature = 1.0, top_p = 0.95, top_k = 64,
)
tokenizer.batch_decode(outputs)

['<bos><start_of_turn>user\nTìm x biết $x + 3,8 = 3,5 \\times 1,5$<end_of_turn>\n<start_of_turn>model\n$x + 3,8 = 5,25$\n$x = 5,25 - 3,8$\n$x = 1,45$\n\nVậy $x = 1,45$<end_of_turn>']

## SAVE MODEL

In [27]:
model_name = f"{TRAINING_CONFIG['model_name'].split('/')[-1]}-mathpal-grade6-vi"
model_name

'gemma-3n-E4B-it-mathpal-grade6-vi'

### Save LoRA layer

To save the final model as LoRA adapters, either use Huggingface's push_to_hub for an online save or save_pretrained for a local save.

[NOTE] This ONLY saves the LoRA adapters, and not the full model. To save to 16bit or GGUF, scroll down!

In [28]:
model.push_to_hub(f"ngohongthai/{model_name}", token = userdata.get('HF_TOKEN')) # Online saving
processor.push_to_hub(f"ngohongthai/{model_name}", token = userdata.get('HF_TOKEN')) # Online saving

README.md:   0%|          | 0.00/608 [00:00<?, ?B/s]

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...pfnow04gc/adapter_model.safetensors:  22%|##1       | 16.8MB / 76.9MB            

Saved model to https://huggingface.co/ngohongthai/gemma-3n-E4B-it-mathpal-grade6-vi


Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  /tmp/tmplr90jm_0/tokenizer.model      : 100%|##########| 4.70MB / 4.70MB            

  /tmp/tmplr90jm_0/tokenizer.json       :  50%|#####     | 16.8MB / 33.4MB            

### Saving to float16 for VLLM

In [29]:
model_name = f"{TRAINING_CONFIG['model_name'].split('/')[-1]}-F16-mathpal-grade6-vi"
model_name

'gemma-3n-E4B-it-F16-mathpal-grade6-vi'

In [None]:
model.push_to_hub_merged(
    f"ngohongthai/{model_name}", tokenizer,
    token = userdata.get('HF_TOKEN')
)

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...6-mathpal-grade6-vi/tokenizer.model: 100%|##########| 4.70MB / 4.70MB            

  ...16-mathpal-grade6-vi/tokenizer.json: 100%|##########| 33.4MB / 33.4MB            

Found HuggingFace hub cache directory: /root/.cache/huggingface/hub
Checking cache directory for required files...
Cache check failed: model-00001-of-00004.safetensors not found in local cache.
Not all required files found in cache. Will proceed with downloading.
Downloading safetensors index for unsloth/gemma-3n-e4b-it...


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Unsloth: Merging weights into 16bit:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/3.08G [00:00<?, ?B/s]

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...vi/model-00001-of-00004.safetensors:   1%|          | 25.2MB / 3.08GB            

Unsloth: Merging weights into 16bit:  25%|██▌       | 1/4 [03:55<11:45, 235.08s/it]

model-00002-of-00004.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...vi/model-00002-of-00004.safetensors:   0%|          | 96.5kB / 4.97GB            

Unsloth: Merging weights into 16bit:  50%|█████     | 2/4 [20:50<23:08, 694.21s/it]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

### GGUF / llama.cpp Conversion
To save to `GGUF` / `llama.cpp`, we support it natively now for all models! For now, you can convert easily to `Q8_0, F16 or BF16` precision. `Q4_K_M` for 4bit will come later!

In [1]:
model_name = f"{TRAINING_CONFIG['model_name'].split('/')[-1]}-GGUF-Q8_0-mathpal-grade6-vi"
model_name

NameError: name 'TRAINING_CONFIG' is not defined

In [None]:
model.save_pretrained_gguf(
    model_name,
    quantization_type = "Q8_0", # For now only Q8_0, BF16, F16 supported
)

In [None]:
model.push_to_hub_gguf(
    model_name,
    quantization_type = "Q8_0", # Only Q8_0, BF16, F16 supported
    repo_id =  f"ngohongthai/{model_name}",
    token = userdata.get('HF_TOKEN'),
)