# Fine-tuning FunctionGemma pour Home Assistant

Ce notebook permet d'entraîner FunctionGemma sur Google Colab avec un GPU gratuit.

**Prérequis:**
- Un compte Hugging Face avec accès à FunctionGemma
- Vos fichiers `train.jsonl` et `val.jsonl` générés localement

**Instructions:**
1. Activez le GPU: Runtime → Change runtime type → T4 GPU (ou A100 avec Colab Pro)
2. Exécutez les cellules dans l'ordre

## 1. Installation des dépendances

In [None]:
!pip install -q transformers>=4.40.0 datasets accelerate peft bitsandbytes huggingface_hub trl

In [None]:
# Vérifier le GPU disponible
import torch
print(f"GPU disponible: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

## 2. Configuration Hugging Face

Créez un token ici: https://huggingface.co/settings/tokens

In [None]:
from huggingface_hub import login
from google.colab import userdata

# Option 1: Token stocké dans les secrets Colab (recommandé)
try:
    hf_token = userdata.get('HF_TOKEN')
    login(token=hf_token)
    print("Connecté via secret Colab")
except:
    # Option 2: Saisie manuelle
    login()

## 3. Upload du dataset

Uploadez vos fichiers `train.jsonl` et `val.jsonl` générés avec `python scripts/generate_dataset.py`

In [None]:
from google.colab import files
import os

# Créer le dossier data
os.makedirs("data", exist_ok=True)

print("Uploadez train.jsonl et val.jsonl")
uploaded = files.upload()

# Déplacer vers le dossier data
for filename in uploaded.keys():
    os.rename(filename, f"data/{filename}")
    print(f"  → data/{filename}")

In [None]:
# Vérifier le dataset
import json

def count_lines(filepath):
    with open(filepath, 'r') as f:
        return sum(1 for _ in f)

train_count = count_lines("data/train.jsonl")
val_count = count_lines("data/val.jsonl")

print(f"Dataset chargé:")
print(f"  Train: {train_count} exemples")
print(f"  Validation: {val_count} exemples")

# Aperçu d'un exemple
with open("data/train.jsonl", 'r') as f:
    example = json.loads(f.readline())
    print(f"\nExemple:")
    for msg in example['messages']:
        print(f"  [{msg['role']}]: {msg['content'][:80]}...")

## 4. Configuration

In [None]:
# Configuration de l'entraînement
CONFIG = {
    # Modèle
    "model_name": "google/functiongemma-270m-it",
    "max_length": 1024,  # Réduit pour économiser la mémoire sur T4
    
    # LoRA
    "lora_r": 16,
    "lora_alpha": 32,
    "lora_dropout": 0.05,
    "lora_target_modules": [
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj"
    ],
    
    # Entraînement - Ajusté pour Colab T4 (16GB VRAM)
    "batch_size": 4,
    "gradient_accumulation_steps": 4,  # Effective batch = 16
    "learning_rate": 2e-4,
    "num_epochs": 3,
    "warmup_ratio": 0.1,
    "weight_decay": 0.01,
    
    # Sauvegarde
    "output_dir": "./output",
    "save_steps": 100,
    "logging_steps": 10,
}

print("Configuration:")
for k, v in CONFIG.items():
    print(f"  {k}: {v}")

## 5. Chargement du modèle

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, TaskType

print(f"Chargement de {CONFIG['model_name']}...")

# Quantization 4-bit pour économiser la mémoire
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

# Charger le modèle
model = AutoModelForCausalLM.from_pretrained(
    CONFIG["model_name"],
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

# Charger le tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    CONFIG["model_name"],
    trust_remote_code=True,
)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = tokenizer.eos_token_id

print(f"Modèle chargé!")
print(f"  Paramètres: {model.num_parameters():,}")

In [None]:
# Configurer LoRA
from peft import prepare_model_for_kbit_training

model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=CONFIG["lora_r"],
    lora_alpha=CONFIG["lora_alpha"],
    lora_dropout=CONFIG["lora_dropout"],
    target_modules=CONFIG["lora_target_modules"],
    bias="none",
    task_type=TaskType.CAUSAL_LM,
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

## 6. Préparation du dataset

In [None]:
from datasets import load_dataset

# Charger le dataset
dataset = load_dataset(
    "json",
    data_files={
        "train": "data/train.jsonl",
        "validation": "data/val.jsonl",
    }
)

print(f"Dataset:")
print(f"  Train: {len(dataset['train'])} exemples")
print(f"  Validation: {len(dataset['validation'])} exemples")

In [None]:
def format_example(example):
    """Formate un exemple pour FunctionGemma."""
    messages = example["messages"]
    tools = example.get("tools", [])
    
    # Essayer d'utiliser le chat template natif
    try:
        text = tokenizer.apply_chat_template(
            messages,
            tools=tools,
            tokenize=False,
            add_generation_prompt=False,
        )
    except:
        # Fallback: format manuel
        text = ""
        for msg in messages:
            role = msg["role"]
            content = msg["content"]
            if role == "developer":
                text += f"<start_of_turn>developer\n{content}<end_of_turn>\n"
            elif role == "user":
                text += f"<start_of_turn>user\n{content}<end_of_turn>\n"
            elif role == "assistant":
                text += f"<start_of_turn>model\n{content}<end_of_turn>\n"
    
    return {"text": text}

def tokenize_function(examples):
    """Tokenize les exemples."""
    return tokenizer(
        examples["text"],
        padding="max_length",
        truncation=True,
        max_length=CONFIG["max_length"],
    )

# Préparation
print("Formatage du dataset...")
dataset = dataset.map(format_example)

print("Tokenization...")
tokenized_dataset = dataset.map(
    tokenize_function,
    remove_columns=dataset["train"].column_names,
    batched=True,
)

print("Dataset prêt!")

## 7. Entraînement

In [None]:
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

# Arguments d'entraînement
training_args = TrainingArguments(
    output_dir=CONFIG["output_dir"],
    num_train_epochs=CONFIG["num_epochs"],
    per_device_train_batch_size=CONFIG["batch_size"],
    per_device_eval_batch_size=CONFIG["batch_size"],
    gradient_accumulation_steps=CONFIG["gradient_accumulation_steps"],
    learning_rate=CONFIG["learning_rate"],
    warmup_ratio=CONFIG["warmup_ratio"],
    weight_decay=CONFIG["weight_decay"],
    logging_steps=CONFIG["logging_steps"],
    save_steps=CONFIG["save_steps"],
    eval_strategy="steps",
    eval_steps=CONFIG["save_steps"],
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    fp16=True,
    report_to="none",
    remove_unused_columns=False,
    optim="paged_adamw_8bit",  # Optimiseur économe en mémoire
)

# Data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    data_collator=data_collator,
)

print("Trainer configuré!")
print(f"  Epochs: {CONFIG['num_epochs']}")
print(f"  Effective batch size: {CONFIG['batch_size'] * CONFIG['gradient_accumulation_steps']}")
print(f"  Learning rate: {CONFIG['learning_rate']}")

In [None]:
# Lancer l'entraînement!
print("Démarrage de l'entraînement...")
print("(Cela peut prendre 30min à 2h selon la taille du dataset)\n")

trainer.train()

## 8. Sauvegarde du modèle

In [None]:
# Sauvegarder le modèle fine-tuné
final_path = f"{CONFIG['output_dir']}/final"

print(f"Sauvegarde vers {final_path}...")
trainer.save_model(final_path)
tokenizer.save_pretrained(final_path)

print("Modèle sauvegardé!")

In [None]:
# Créer une archive ZIP pour téléchargement
import shutil

zip_path = "functiongemma-ha-finetuned"
shutil.make_archive(zip_path, 'zip', final_path)

print(f"Archive créée: {zip_path}.zip")

# Télécharger
files.download(f"{zip_path}.zip")

## 9. Test du modèle (optionnel)

In [None]:
# Tester le modèle fine-tuné
def test_model(query: str):
    messages = [
        {
            "role": "developer",
            "content": "Tu es un assistant qui contrôle une maison intelligente avec Home Assistant."
        },
        {
            "role": "user",
            "content": query
        }
    ]
    
    # Formater l'input
    try:
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True,
        )
    except:
        text = f"<start_of_turn>developer\n{messages[0]['content']}<end_of_turn>\n"
        text += f"<start_of_turn>user\n{messages[1]['content']}<end_of_turn>\n"
        text += "<start_of_turn>model\n"
    
    inputs = tokenizer(text, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=100,
            temperature=0.1,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=False)
    # Extraire seulement la réponse du modèle
    if "<start_of_turn>model" in response:
        response = response.split("<start_of_turn>model")[-1]
    if "<end_of_turn>" in response:
        response = response.split("<end_of_turn>")[0]
    
    return response.strip()

# Tests
test_queries = [
    "Allume la lumière du salon",
    "Mets le chauffage à 21 degrés",
    "Ferme les volets de la chambre",
]

print("Tests du modèle fine-tuné:\n")
for query in test_queries:
    print(f"User: {query}")
    response = test_model(query)
    print(f"Model: {response}")
    print()

## 10. Upload vers Hugging Face Hub (optionnel)

In [None]:
# Décommenter pour upload vers HuggingFace Hub
# REPO_NAME = "votre-username/functiongemma-ha"  # Changez ceci!

# model.push_to_hub(REPO_NAME, private=True)
# tokenizer.push_to_hub(REPO_NAME, private=True)
# print(f"Modèle uploadé vers: https://huggingface.co/{REPO_NAME}")

---

## Utilisation après téléchargement

Pour utiliser le modèle sur votre machine locale:

```python
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer

# Charger le modèle de base
base_model = AutoModelForCausalLM.from_pretrained("google/functiongemma-270m-it")
tokenizer = AutoTokenizer.from_pretrained("google/functiongemma-270m-it")

# Charger les poids LoRA
model = PeftModel.from_pretrained(base_model, "./output/final")
```