In [None]:
!pip install bitsandbytes accelerate
!pip install transformers peft datasets pypdf2

In [None]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, PeftModel
from datasets import Dataset
import json

In [None]:
# ========== STEP 1: Load QA Pairs from JSON ==========
def load_qa_dataset(json_path):
    """
    Load QA pairs from JSON file and format for Gemma training
    Expected JSON format: List of {"question": "...", "answer": "..."}
    """
    print(f"Loading QA pairs from: {json_path}")
    
    with open(json_path, 'r', encoding='utf-8') as f:
        qa_pairs = json.load(f)
    
    # Format conversations with Gemma's special tokens
    conversations = []
    for qa in qa_pairs:
        question = qa['question']
        answer = qa['answer']
        
        # Gemma format: <start_of_turn>user\n{question}<end_of_turn>\n<start_of_turn>model\n{answer}<end_of_turn>
        conversation = f"<start_of_turn>user\n{question}<end_of_turn>\n<start_of_turn>model\n{answer}<end_of_turn>"
        conversations.append({"text": conversation})
    
    print(f"Loaded {len(conversations)} QA pairs")
    return Dataset.from_list(conversations)

In [None]:
# ========== STEP 2: Setup Model and Training ==========
def setup_model_and_train(dataset, output_dir="./gemma-finetuned"):
    """Setup and train Gemma 3 1B with LoRA"""

    model_name = "google/gemma-3-1b-it"  # Gemma 3 1B Instruct - Best for mobile

    # Quantization config for memory efficiency
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,  # Gemma uses bfloat16
        bnb_4bit_use_double_quant=True,
    )

    # Get HuggingFace token
    
    
    # Load model and tokenizer
    print("Loading Gemma 3 1B model and tokenizer...")
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True
    )

    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

    # Prepare model for training
    model = prepare_model_for_kbit_training(model)

    # LoRA configuration optimized for Gemma
    peft_config = LoraConfig(
        r=16,
        lora_alpha=32,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
    )

    model = get_peft_model(model, peft_config)

    # Tokenize the dataset
    print("Tokenizing dataset...")
    def tokenize_function(examples):
        tokenized = tokenizer(
            examples["text"],
            truncation=True,
            padding="max_length",
            max_length=512,
            return_tensors=None
        )
        tokenized["labels"] = tokenized["input_ids"].copy()
        return tokenized

    tokenized_dataset = dataset.map(
        tokenize_function,
        batched=True,
        remove_columns=dataset.column_names
    )

    # Training arguments
    training_args = TrainingArguments(
        output_dir=output_dir,
        num_train_epochs=3,  # Reduced for Gemma (learns faster)
        per_device_train_batch_size=4,  # Smaller batch for stability
        gradient_accumulation_steps=8,  # Compensate for smaller batch
        learning_rate=1e-4,  # Lower LR for Gemma
        bf16=True,  # Use bfloat16 for Gemma
        save_steps=100,
        logging_steps=10,
        save_total_limit=2,
        warmup_steps=50,
        optim="paged_adamw_8bit",
        remove_unused_columns=False,
        report_to="none",
    )

    # Use standard Trainer
    trainer = Trainer(
        model=model,
        tokenizer=tokenizer,
        args=training_args,
        train_dataset=tokenized_dataset,
    )

    # Train
    print("Starting training...")
    trainer.train()

    # Save the final model
    print(f"Saving LoRA model to {output_dir}")
    trainer.save_model(output_dir)
    tokenizer.save_pretrained(output_dir)

    return model, tokenizer

In [None]:
# ========== STEP 3: Merge LoRA Weights with Base Model ==========
def merge_and_save_model(lora_model_path, output_merged_path):
    """
    Merge LoRA weights with base Gemma model and save as standalone model
    This creates a model ready for Google AI Edge conversion
    """
    base_model_name = "google/gemma-3-1b-it"
    
    print("\n" + "="*70)
    print("MERGING LORA WEIGHTS INTO BASE GEMMA MODEL")
    print("="*70)
    
    print(f"\nðŸ“¥ Loading base Gemma model: {base_model_name}")
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        device_map="auto",
        torch_dtype=torch.bfloat16,
        low_cpu_mem_usage=True
    )
    
    print(f"ðŸ“¥ Loading LoRA adapters from: {lora_model_path}")
    model = PeftModel.from_pretrained(base_model, lora_model_path)
    
    print("ðŸ”„ Merging LoRA weights into base model...")
    merged_model = model.merge_and_unload()
    
    print(f"ðŸ’¾ Saving merged model to: {output_merged_path}")
    merged_model.save_pretrained(output_merged_path, max_shard_size="2GB")
    
    # Save tokenizer
    tokenizer = AutoTokenizer.from_pretrained(lora_model_path)
    tokenizer.save_pretrained(output_merged_path)
    
    # Save metadata for AI Edge
    metadata = {
        "base_model": base_model_name,
        "lora_checkpoint": lora_model_path,
        "model_type": "Gemma-3-1B-Instruct",
        "merged": True,
        "optimized_for": "Google AI Edge Gallery",
        "format": "Ready for .litellm conversion"
    }
    
    with open(f"{output_merged_path}/model_info.json", "w") as f:
        json.dump(metadata, f, indent=2)
    
    print("\nâœ… Merged model saved successfully!")
    print(f"   Location: {output_merged_path}")
    print("\nðŸ“± Next steps for Google AI Edge Gallery:")
    print("   1. Upload this folder to Google Colab")
    print("   2. Convert to .litellm format using AI Edge Torch")
    print("   3. Import via '+' button in AI Edge Gallery app")
    
    return merged_model, tokenizer

In [None]:
# ========== MAIN EXECUTION ==========
if __name__ == "__main__":
    
    # ============================================
    # CONFIGURATION
    # ============================================
    book = "anxity"  # Change this to your book name
    QA_JSON_PATH = f"/kaggle/input/dataset-qa/{book}_pairs.json"
    LORA_OUTPUT_DIR = f"/kaggle/working/gemma-lora-{book}"
    MERGED_OUTPUT_DIR = f"/kaggle/working/gemma-{book}-merged"
    
    print("=" * 70)
    print("FINE-TUNING GEMMA 3 1B WITH QA PAIRS")
    print("Optimized for Google AI Edge Gallery")
    print("=" * 70)
    
    # ============================================
    # PHASE 1: TRAINING
    # ============================================
    print("\nðŸ“š PHASE 1: Loading and Training")
    print("-" * 70)
    
    print("\nStep 1: Loading QA dataset...")
    dataset = load_qa_dataset(QA_JSON_PATH)
    print(f"âœ… Loaded {len(dataset)} training examples")
    
    print("\nStep 2: Fine-tuning Gemma model with LoRA...")
    model, tokenizer = setup_model_and_train(dataset, LORA_OUTPUT_DIR)
    
    print("\nâœ… Training complete!")
    print(f"   LoRA weights saved to: {LORA_OUTPUT_DIR}")
    
    # ============================================
    # PHASE 2: MERGING
    # ============================================
    print("\n\nðŸ”— PHASE 2: Merging LoRA with Base Model")
    print("-" * 70)
    
    merged_model, merged_tokenizer = merge_and_save_model(
        LORA_OUTPUT_DIR, 
        MERGED_OUTPUT_DIR
    )
    
    # ============================================
    # SUMMARY
    # ============================================
    print("\n\n" + "=" * 70)
    print("ðŸŽ‰ SUCCESS! GEMMA MODEL READY FOR MOBILE")
    print("=" * 70)
    
    print(f"""
ðŸ“¦ OUTPUT 1 - LoRA Weights:
   Location: {LORA_OUTPUT_DIR}/
   Size: ~50-100MB
   
ðŸ“¦ OUTPUT 2 - Merged Model (Ready for AI Edge):
   Location: {MERGED_OUTPUT_DIR}/
   Size: ~500MB
   Format: PyTorch (ready for .litellm conversion)
   
ðŸ“± To use in Google AI Edge Gallery:
   1. Download the merged model folder
   2. Convert to .litellm using AI Edge Torch (see conversion script below)
   3. Transfer to phone and import via '+' button
    """)
    
    print("=" * 70)
    print("âœ… Training and merging complete!")
    print("=" * 70)