# Full Fine-Tuning Google Gemma 3-270M-IT

This notebook provides a complete workflow for full fine-tuning of the Google Gemma 3-270M model using the Hugging Face `transformers`, `datasets`, and `trl` libraries.

In [None]:
# 1. Install necessary libraries
!pip install -q transformers datasets trl accelerate torch

In [3]:
# 2. Login to Hugging Face (Required for Gemma models if they are gated)
from huggingface_hub import login
# login() # Enter your HF token here

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from datasets import load_dataset
from trl import SFTTrainer

# Configuration
model_id = "google/gemma-3-270m-it"
output_dir = "./gemma-3-270m-full-finetuned"

W0111 05:34:39.966000 2488 torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.


In [1]:
# 3. Load Tokenizer and Model
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token # Ensure padding token is set
tokenizer.padding_side = "right"

# Load model for Full Fine-Tuning (no quantization/LoRA)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16,
    device_map="auto"
)

NameError: name 'AutoTokenizer' is not defined

In [None]:
# 4. Prepare Dataset
# Using a subset of Dolly 15k for demonstration purposes
dataset = load_dataset("databricks/databricks-dolly-15k", split="train[:1000]")

def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['instruction'])):
        # Construct prompt following Gemma's chat template format
        text = f"<start_of_turn>user\n{example['instruction'][i]}\n{example['context'][i]}<end_of_turn>\n<start_of_turn>model\n{example['response'][i]}<end_of_turn>"
        output_texts.append(text)
    return output_texts

# For SFTTrainer, we use formatting_func or specify the field

In [None]:
# 5. Define Training Arguments
training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    learning_rate=2e-5, 
    num_train_epochs=1,
    logging_steps=10,
    save_strategy="steps",
    save_steps=50,
    optim="adamw_torch",
    bf16=torch.cuda.is_bf16_supported(),
    fp16=not torch.cuda.is_bf16_supported(),
    report_to="none", # Can be set to "wandb" or "tensorboard"
    warmup_ratio=0.03,
    lr_scheduler_type="constant"
)

In [None]:
# 6. Initialize SFTTrainer
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    formatting_func=formatting_prompts_func,
    max_seq_length=512,
    tokenizer=tokenizer,
    args=training_args
)

# 7. Start Training
trainer.train()

In [None]:
# 8. Save the Fine-Tuned Model
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)

In [None]:
# 9. Inference Test after Training
messages = [
    {"role": "user", "content": "What are the benefits of full fine-tuning an SLM?"},
]
inputs = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    return_tensors="pt"
).to("cuda")

outputs = model.generate(inputs, max_new_tokens=150)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))