In [None]:
"""
SCRIPT 1: LOCAL LORA MERGER
Run this on your local computer (CPU is fine)
This merges your LoRA weights with the base model
"""

import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import json

print("\n" + "=" * 70)
print("SCRIPT 1: MERGING LORA WEIGHTS LOCALLY")
print("=" * 70 + "\n")

# ============================================
# CONFIGURATION - CHANGE THESE PATHS
# ============================================
FINETUNED_MODEL_PATH = "./tinyllama-anxity-chat"  # Your fine-tuned model folder
CHECKPOINT = "checkpoint-1155"                # Your checkpoint folder name
BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
OUTPUT_DIR = "merged_model_for_mobile"

# ============================================
# STEP 1: MERGE LORA WEIGHTS
# ============================================

checkpoint_path = os.path.join(FINETUNED_MODEL_PATH, CHECKPOINT)

print("üìã Configuration:")
print(f"   Fine-tuned model: {FINETUNED_MODEL_PATH}")
print(f"   Checkpoint: {CHECKPOINT}")
print(f"   Output directory: {OUTPUT_DIR}")
print()

# Check if paths exist
if not os.path.exists(FINETUNED_MODEL_PATH):
    print(f"‚ùå Error: Model path not found: {FINETUNED_MODEL_PATH}")
    print("   Please update FINETUNED_MODEL_PATH in the script")
    exit(1)

if not os.path.exists(checkpoint_path):
    print(f"‚ùå Error: Checkpoint not found: {checkpoint_path}")
    print(f"   Available checkpoints in {FINETUNED_MODEL_PATH}:")
    for item in os.listdir(FINETUNED_MODEL_PATH):
        if item.startswith("checkpoint-"):
            print(f"   - {item}")
    exit(1)

print("‚úÖ Paths validated\n")

try:
    print("=" * 70)
    print("STEP 1/4: Loading Base Model")
    print("=" * 70)
    print(f"\nüì• Loading base model: {BASE_MODEL}")
    print("   (This may take 2-5 minutes...)\n")
    
    base_model = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL,
        torch_dtype=torch.float16,
        device_map="cpu",
        low_cpu_mem_usage=True
    )
    
    print("‚úÖ Base model loaded successfully!")
    print(f"   Model size: {sum(p.numel() for p in base_model.parameters()) / 1e6:.1f}M parameters")
    
    print("\n" + "=" * 70)
    print("STEP 2/4: Loading Your LoRA Weights")
    print("=" * 70)
    print(f"\nüì• Loading LoRA adapters from: {checkpoint_path}\n")
    
    model = PeftModel.from_pretrained(base_model, checkpoint_path)
    
    print("‚úÖ LoRA weights loaded successfully!")
    
    print("\n" + "=" * 70)
    print("STEP 3/4: Merging LoRA into Base Model")
    print("=" * 70)
    print("\nüîÑ Merging weights...")
    print("   (This may take 5-10 minutes...)\n")
    
    merged_model = model.merge_and_unload()
    
    print("‚úÖ Models merged successfully!")
    
    print("\n" + "=" * 70)
    print("STEP 4/4: Saving Merged Model")
    print("=" * 70)
    
    # Create output directory
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    
    print(f"\nüíæ Saving merged model to: {OUTPUT_DIR}")
    print("   (This may take 5-10 minutes...)\n")
    
    # Save model
    merged_model.save_pretrained(OUTPUT_DIR, max_shard_size="2GB")
    
    # Load and save tokenizer
    print("üì• Loading and saving tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)
    tokenizer.save_pretrained(OUTPUT_DIR)
    
    print("‚úÖ Tokenizer saved!")
    
    # Save metadata for Script 2
    metadata = {
        "base_model": BASE_MODEL,
        "original_checkpoint": checkpoint_path,
        "merged_date": str(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU"),
        "model_type": "TinyLlama-1.1B",
        "chat_template": tokenizer.chat_template if hasattr(tokenizer, 'chat_template') else None,
        "bos_token": tokenizer.bos_token,
        "eos_token": tokenizer.eos_token,
    }
    
    with open(os.path.join(OUTPUT_DIR, "conversion_metadata.json"), "w") as f:
        json.dump(metadata, f, indent=2)
    
    print("‚úÖ Metadata saved!")
    
    # ============================================
    # SUCCESS - SHOW NEXT STEPS
    # ============================================
    
    print("\n" + "=" * 70)
    print("üéâ SUCCESS! MERGED MODEL READY")
    print("=" * 70)
    
    print(f"\n‚úÖ Your merged model is saved in: {OUTPUT_DIR}/")
    print("\nüìÅ Files created:")
    for file in os.listdir(OUTPUT_DIR):
        file_path = os.path.join(OUTPUT_DIR, file)
        size_mb = os.path.getsize(file_path) / (1024 * 1024)
        print(f"   - {file} ({size_mb:.1f} MB)")
    
    total_size = sum(os.path.getsize(os.path.join(OUTPUT_DIR, f)) for f in os.listdir(OUTPUT_DIR))
    print(f"\nüìä Total size: {total_size / (1024 * 1024):.1f} MB")
    
    print("\n" + "=" * 70)
    print("üì± NEXT STEPS:")
    print("=" * 70)
    
    print(f"""
1. üì§ Upload the '{OUTPUT_DIR}' folder to Google Drive
   (You can zip it first to make upload faster)

2. üöÄ Open Google Colab: https://colab.research.google.com

3. üìã Copy and run SCRIPT 2 (the GPU conversion script)

4. üì• Download the .litertlm file from Colab

5. üì± Transfer to your phone and import via the '+' button!
""")
    
    print("=" * 70)
    print("‚úÖ Script 1 Complete! Ready for Script 2 on Colab")
    print("=" * 70 + "\n")
    
except Exception as e:
    print(f"\n‚ùå ERROR: {e}")
    print("\nüí° Troubleshooting:")
    print("   - Make sure you have enough RAM (at least 8GB free)")
    print("   - Check that all required packages are installed:")
    print("     pip install transformers peft torch")
    print("   - Verify your checkpoint path is correct")
    exit(1)

In [None]:
%pip install ai-edge-torch
%pip install ai-edge-torch-generative
%pip install torch transformers
%pip install mediapipe