In [None]:
# Install required packages
!pip install transformers==4.41.0
!pip install peft==0.11.0
!pip install datasets==2.19.0
!pip install trl==0.8.6
!pip install bitsandbytes==0.43.1
!pip install accelerate==0.30.1
!pip install torch==2.3.0

In [None]:
# from google.colab import userdata
# hf_token = userdata.get('HF_TOKEN')

from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("hf-token")


from huggingface_hub import login
login(token=secret_value_0)

In [None]:
import os
import torch
import gc
from datasets import load_dataset, Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    BitsAndBytesConfig,
)
from transformers.pipelines import pipeline
from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training, PeftModel
from trl import SFTTrainer
import pandas as pd
import json

# Enhanced memory management
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
torch.cuda.empty_cache()
gc.collect()

print(f"PyTorch detected {torch.cuda.device_count()} GPU(s)")

## Dataset Preparation

In [None]:
def load_financial_datasets():
    """Load and combine financial datasets - reduced size for faster training"""
    
    # Load Finance Alpaca dataset
    try:
        finance_alpaca = load_dataset("gbharti/finance-alpaca", split="train")
        print(f"Loaded Finance Alpaca: {len(finance_alpaca)} samples")
        
        # Limit dataset size for faster training (5000 samples max)
        if len(finance_alpaca) > 50000:
            finance_alpaca = finance_alpaca.select(range(50000))
            print(f"Limited dataset to 5000 samples for faster training")
            
    except:
        print("Finance Alpaca not available, creating sample dataset")
        # Create sample financial data if dataset not accessible
        sample_data = [
            {
                "instruction": "What is the P/E ratio and how is it calculated?",
                "input": "",
                "output": "The P/E ratio (Price-to-Earnings ratio) is calculated by dividing the market price per share by the earnings per share (EPS). It indicates how much investors are willing to pay for each dollar of earnings."
            },
            {
                "instruction": "Explain the importance of SEC 10-K filings for investors.",
                "input": "",
                "output": "SEC 10-K filings are annual reports that provide a comprehensive overview of a company's business, financial condition, and results of operations. They are crucial for investors as they contain audited financial statements, risk factors, and management discussion."
            },
            {
                "instruction": "What are the key financial ratios for stock analysis?",
                "input": "",
                "output": "Key financial ratios include P/E ratio, P/B ratio, ROE, ROA, debt-to-equity ratio, current ratio, and profit margins. These help evaluate profitability, liquidity, and financial health."
            },
            {
                "instruction": "How do interest rates affect stock valuations?",
                "input": "",
                "output": "Higher interest rates increase the discount rate for future cash flows, reducing present value of stocks. They also increase borrowing costs and may slow economic growth, negatively impacting stock prices."
            }
        ] * 1000  # Replicate for training
        finance_alpaca = Dataset.from_list(sample_data)

    return finance_alpaca

def format_financial_dataset(dataset):
    """Format dataset for instruction tuning with TinyLlama format"""

    def format_prompt(example):
        # TinyLlama chat format[1]
        if example.get("input", "").strip():
            prompt = f"<|system|>\nYou are a helpful financial analyst assistant.\n<|user|>\n{example['instruction']}\n{example['input']}\n<|assistant|>\n{example['output']}"
        else:
            prompt = f"<|system|>\nYou are a helpful financial analyst assistant.\n<|user|>\n{example['instruction']}\n<|assistant|>\n{example['output']}"
        return {"text": prompt}

    formatted_dataset = dataset.map(format_prompt)
    return formatted_dataset

# Load and format dataset
financial_dataset = load_financial_datasets()
formatted_dataset = format_financial_dataset(financial_dataset)
print(f"Formatted dataset size: {len(formatted_dataset)}")

## Model and Tokenizer Configuration

In [None]:
# Model configuration for TinyLlama - optimized for speed
BASE_MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
NEW_MODEL_NAME = "TinyLlama-1.1B-Financial-LoRA"

def setup_model_and_tokenizer():
    """Setup quantized TinyLlama model and tokenizer with optimized configuration"""

    # Optimized quantization configuration for faster training
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,  # Use FP16 for T4 GPUs
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_storage=torch.uint8,
    )

    # Load TinyLlama tokenizer
    tokenizer = AutoTokenizer.from_pretrained(
        BASE_MODEL_NAME,
        trust_remote_code=True,
        use_fast=True
    )
    
    # TinyLlama tokenizer configuration
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

    # Load TinyLlama model with optimized quantization
    model = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL_NAME,
        quantization_config=quantization_config,
        device_map="auto",
        trust_remote_code=True,
        torch_dtype=torch.float16,  # Use FP16 for T4 GPUs
        low_cpu_mem_usage=True,
    )

    # Disable gradient checkpointing for speed (we have enough memory with TinyLlama)
    # model.gradient_checkpointing_enable()

    # Prepare model for k-bit training - essential for QLoRA
    model = prepare_model_for_kbit_training(model)

    # Disable caching for training
    model.config.use_cache = False

    return model, tokenizer

model, tokenizer = setup_model_and_tokenizer()
print("TinyLlama model and tokenizer loaded successfully")

## LoRA Configuration

In [None]:
def create_tinyllama_lora_config():
    """Create LoRA configuration optimized for TinyLlama architecture and speed"""

    # TinyLlama-specific LoRA configuration optimized for speed[2]
    lora_config = LoraConfig(
        task_type=TaskType.CAUSAL_LM,
        r=8,  # Reduced rank for faster training
        lora_alpha=16,  # Proportional to rank
        target_modules=[
            "q_proj", "v_proj",  # Target only essential attention layers for speed
        ],
        lora_dropout=0.05,  # Reduced dropout
        bias="none",
        use_rslora=True,  # Enable RSLoRA for better efficiency
        inference_mode=False,
    )

    return lora_config

# Apply LoRA to TinyLlama model
lora_config = create_tinyllama_lora_config()
model = get_peft_model(model, lora_config)

# Print trainable parameters
def print_trainable_parameters(model):
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()

    print(f"Trainable params: {trainable_params:,}")
    print(f"All params: {all_param:,}")
    print(f"Trainable%: {100 * trainable_params / all_param:.2f}%")

print_trainable_parameters(model)

## Training Configuration

In [None]:
def create_fast_training_arguments():
    """Create training arguments optimized for speed"""

    training_args = TrainingArguments(
        output_dir=f"./results/{NEW_MODEL_NAME}",
        per_device_train_batch_size=8,  # Increased batch size for TinyLlama
        per_device_eval_batch_size=8,
        gradient_accumulation_steps=2,  # Reduced for faster training
        num_train_epochs=2,  # Single epoch often sufficient for financial tasks
        max_steps=500,  # Limit total steps for speed
        learning_rate=2e-4,  # Higher learning rate for faster convergence
        fp16=True,  # Use FP16 for T4 GPUs
        bf16=False,
        logging_steps=25,
        save_steps=50,
        evaluation_strategy="steps",
        eval_steps=50,
        warmup_steps=25,  # Reduced warmup
        lr_scheduler_type="linear",
        optim="adamw_torch",  # Faster than paged_adamw_8bit
        weight_decay=0.01,
        max_grad_norm=0.3,
        group_by_length=True,
        dataloader_pin_memory=True,
        dataloader_num_workers=2,
        remove_unused_columns=False,
        gradient_checkpointing=False,  # Disabled for speed
        report_to=None,
        save_total_limit=2,  # Limit saved checkpoints
        load_best_model_at_end=False,  # Skip for speed
    )

    return training_args

training_args = create_fast_training_arguments()

In [None]:
import wandb
wandb.login(key='dceb696a48075fb2e7a8039ee03e6064acdfa5b4')


## Training Setup and Execution

In [None]:
def create_tinyllama_trainer(model, tokenizer, dataset, training_args, lora_config):
    """Create SFT trainer optimized for TinyLlama and speed"""

    # Use smaller eval dataset for speed
    eval_size = min(100, len(dataset) // 10)
    
    trainer = SFTTrainer(
        model=model,
        train_dataset=dataset,
        eval_dataset=dataset.select(range(eval_size)),
        peft_config=lora_config,
        dataset_text_field="text",
        tokenizer=tokenizer,
        args=training_args,
        max_seq_length=1024,  # Reduced from 2048 for speed[3]
        packing=True,  # Enable packing for efficiency
        dataset_kwargs={
            "add_special_tokens": False,
            "append_concat_token": False,
        }
    )

    return trainer

# Create trainer
trainer = create_tinyllama_trainer(model, tokenizer, formatted_dataset, training_args, lora_config)

# Memory cleanup before training
torch.cuda.empty_cache()
gc.collect()

# Start training
print("Starting TinyLlama financial fine-tuning...")
print(f"Memory allocated before training: {torch.cuda.memory_allocated()/1024**3:.2f} GB")
trainer.train()

# Save the fine-tuned model
trainer.save_model()
print(f"Model saved to ./results/{NEW_MODEL_NAME}")

## Model Testing and Inference

In [None]:
def test_tinyllama_financial_model(model, tokenizer):
    """Test the fine-tuned TinyLlama model on financial queries"""

    # Test prompts in TinyLlama format[1]
    test_prompts = [
        "<|system|>\nYou are a helpful financial analyst assistant.\n<|user|>\nExplain what investors should look for in a company's 10-K filing.\n<|assistant|>\n",
        "<|system|>\nYou are a helpful financial analyst assistant.\n<|user|>\nWhat are the key financial ratios for evaluating a stock?\n<|assistant|>\n",
        "<|system|>\nYou are a helpful financial analyst assistant.\n<|user|>\nHow do interest rate changes affect stock market valuations?\n<|assistant|>\n"
    ]

    # Create pipeline for inference with TinyLlama
    pipe = pipeline(
        task="text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=150,
        temperature=0.1,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id,
        torch_dtype=torch.float16
    )

    print("Testing fine-tuned TinyLlama financial model:")
    print("=" * 50)

    for i, prompt in enumerate(test_prompts, 1):
        print(f"\nTest {i}:")
        question = prompt.split("<|user|>\n")[1].split("\n<|assistant|>")[0]
        print("Input:", question)

        result = pipe(prompt)
        full_response = result[0]['generated_text']
        
        # Extract model response
        if "<|assistant|>\n" in full_response:
            response = full_response.split("<|assistant|>\n")[-1]
            if "<|" in response:
                response = response.split("<|")[0]
        else:
            response = full_response

        print("Output:", response.strip()[:200] + "..." if len(response.strip()) > 200 else response.strip())
        print("-" * 30)

# Test the model
test_tinyllama_financial_model(model, tokenizer)

## Model Saving and Loading

In [None]:
import os
import subprocess
import torch
import gc
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

def setup_llamacpp_robust():
    """Robust setup of llama.cpp with better error handling"""
    
    llama_cpp_dir = "/kaggle/working/llama.cpp"
    
    # Clean start - remove any existing directory
    if os.path.exists(llama_cpp_dir):
        print("Removing existing llama.cpp directory...")
        subprocess.run(["rm", "-rf", llama_cpp_dir], check=True)
    
    # Change to working directory first
    os.chdir("/kaggle/working")
    
    try:
        # Install system dependencies first
        print("Installing system dependencies...")
        subprocess.run(["apt-get", "update", "-qq"], check=True)
        subprocess.run(["apt-get", "install", "-y", "-qq", "build-essential", "cmake"], check=True)
        
        # Clone with verbose output
        print("Cloning llama.cpp repository...")
        result = subprocess.run([
            "git", "clone", 
            "https://github.com/ggerganov/llama.cpp.git",
            "--depth", "1"  # Shallow clone for faster download
        ], check=True, capture_output=True, text=True)
        
        print("✅ Repository cloned successfully")
        
        # Verify directory and key files exist
        if not os.path.exists(llama_cpp_dir):
            raise Exception("Directory not created after cloning")
            
        convert_script = os.path.join(llama_cpp_dir, "convert_hf_to_gguf.py")
        if not os.path.exists(convert_script):
            raise Exception("convert_hf_to_gguf.py not found in repository")
            
        print(f"✅ Convert script found at: {convert_script}")
        
        # Install requirements
        requirements_path = os.path.join(llama_cpp_dir, "requirements.txt")
        if os.path.exists(requirements_path):
            print("Installing requirements...")
            subprocess.run(["pip", "install", "-r", requirements_path], check=True)
        
        return True
        
    except Exception as e:
        print(f"❌ Setup failed: {e}")
        return False

def build_llamacpp():
    """Build llama.cpp binaries with linker fix for DSO error"""
    
    llama_cpp_dir = "/kaggle/working/llama.cpp"
    quantize_binary = os.path.join(llama_cpp_dir, "build", "bin", "llama-quantize")
    
    if os.path.exists(quantize_binary):
        print("✅ llama-quantize already built")
        return True
    
    if not os.path.exists(llama_cpp_dir):
        print("❌ llama.cpp directory not found")
        return False
    
    print("Building llama.cpp...")
    
    # Change to llama.cpp directory
    original_dir = os.getcwd()
    
    try:
        os.chdir(llama_cpp_dir)
        
        # Clean any previous build
        if os.path.exists("build"):
            subprocess.run(["rm", "-rf", "build"], check=True)
        
        # Create build directory
        subprocess.run(["mkdir", "-p", "build"], check=True)
        
        # Set environment variables to fix DSO missing error[2][3][4][5]
        env = os.environ.copy()
        env['LDFLAGS'] = "-Wl,--copy-dt-needed-entries -lm -lpthread"
        env['CXXFLAGS'] = "-O2"
        
        # Configure with cmake with explicit linker flags
        print("Configuring build...")
        cmake_result = subprocess.run([
            "cmake", "-B", "build", 
            "-DCMAKE_BUILD_TYPE=Release",
            "-DLLAMA_CUBLAS=OFF",
            "-DLLAMA_METAL=OFF",
            "-DCMAKE_EXE_LINKER_FLAGS=-lm -lpthread",  # Explicitly link math and pthread libraries[4][5]
            "-DCMAKE_SHARED_LINKER_FLAGS=-lm -lpthread"
        ], capture_output=True, text=True, check=True, env=env)
        
        # Build with explicit linking and reduced parallelism
        print("Compiling (this may take a few minutes)...")
        build_result = subprocess.run([
            "cmake", "--build", "build", 
            "--config", "Release", 
            "-j", "1",  # Single thread to avoid memory issues
            "--target", "llama-quantize"
        ], capture_output=True, text=True, check=True, env=env)
        
        # Verify binary was created
        if os.path.exists(quantize_binary):
            print("✅ llama.cpp built successfully")
            return True
        else:
            print("❌ Build completed but binary not found")
            return False
            
    except subprocess.CalledProcessError as e:
        print(f"❌ Build failed: {e}")
        if hasattr(e, 'stdout') and e.stdout:
            print(f"Build output: {e.stdout[-500:]}")
        if hasattr(e, 'stderr') and e.stderr:
            print(f"Build errors: {e.stderr[-500:]}")
        return False
        
    finally:
        os.chdir(original_dir)

def convert_to_gguf_with_quantization(merged_model_dir, output_path):
    """Convert to GGUF with quantization using fixed build"""
    
    # Create output directory
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    
    convert_script = "/kaggle/working/llama.cpp/convert_hf_to_gguf.py"
    
    if not os.path.exists(convert_script):
        print(f"❌ Convert script not found: {convert_script}")
        return None
    
    # First convert to f16 format
    temp_f16_path = output_path.replace(".gguf", "_f16.gguf")
    
    conversion_cmd = [
        "python", convert_script,
        merged_model_dir,
        "--outfile", temp_f16_path,
        "--outtype", "f16"
    ]
    
    print(f"Running conversion: {' '.join(conversion_cmd)}")
    
    try:
        subprocess.run(conversion_cmd, check=True)
        print(f"✅ F16 GGUF conversion completed: {temp_f16_path}")
        
        # Now try to quantize to q4_k_m
        final_path = quantize_to_q4km(temp_f16_path, output_path)
        return final_path if final_path else temp_f16_path
        
    except subprocess.CalledProcessError as e:
        print(f"❌ Conversion failed: {e}")
        return None

def quantize_to_q4km(input_gguf, output_gguf):
    """Quantize GGUF model to q4_k_m using fixed llama-quantize"""
    
    # Try to build llama.cpp with fixed linker flags
    if not build_llamacpp():
        print("❌ Failed to build llama.cpp. Keeping F16 model.")
        return None
    
    quantize_binary = "/kaggle/working/llama.cpp/build/bin/llama-quantize"
    
    quantize_cmd = [
        quantize_binary,
        input_gguf,
        output_gguf,
        "q4_k_m"
    ]
    
    print(f"Running quantization: {' '.join(quantize_cmd)}")
    
    try:
        subprocess.run(quantize_cmd, check=True, capture_output=True, text=True)
        print("✅ Quantization to q4_k_m completed successfully")
        
        # Remove temporary f16 file
        if os.path.exists(input_gguf):
            os.remove(input_gguf)
            print(f"Removed temporary file: {input_gguf}")
        
        return output_gguf
        
    except subprocess.CalledProcessError as e:
        print(f"❌ Quantization failed: {e}")
        print(f"F16 model available at: {input_gguf}")
        return None

def convert_to_gguf_simple(merged_model_dir, output_path):
    """Simple GGUF conversion without quantization (fallback)"""
    
    # Create output directory
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    
    convert_script = "/kaggle/working/llama.cpp/convert_hf_to_gguf.py"
    
    if not os.path.exists(convert_script):
        print(f"❌ Convert script not found: {convert_script}")
        return None
    
    # Convert to f16 format (most compatible)
    conversion_cmd = [
        "python", convert_script,
        merged_model_dir,
        "--outfile", output_path,
        "--outtype", "f16"
    ]
    
    print(f"Running conversion: {' '.join(conversion_cmd)}")
    
    try:
        subprocess.run(conversion_cmd, check=True)
        print(f"✅ GGUF conversion completed: {output_path}")
        return output_path
        
    except subprocess.CalledProcessError as e:
        print(f"❌ Conversion failed: {e}")
        return None

def save_model_for_llamacpp_complete():
    """Complete version with quantization attempt and fallback"""
    
    # Constants
    BASE_MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
    NEW_MODEL_NAME = "TinyLlama-1.1B-Financial-LoRA"
    
    # Paths
    LORA_ADAPTER_DIR = f"./saved_models/{NEW_MODEL_NAME}"
    MERGED_MODEL_DIR = f"./merged_models/{NEW_MODEL_NAME}"
    GGUF_OUTPUT_PATH = f"./gguf_models/{NEW_MODEL_NAME}.gguf"
    
    print("Starting complete GGUF conversion process...")
    
    # STEP 1: Setup llama.cpp
    print("Setting up llama.cpp...")
    if not setup_llamacpp_robust():
        print("❌ Failed to setup llama.cpp")
        return None
    
    # STEP 2: Load and merge model (if not already done)
    if not os.path.exists(MERGED_MODEL_DIR):
        print("Loading and merging model...")
        
        # Load base model
        base_model = AutoModelForCausalLM.from_pretrained(
            BASE_MODEL_NAME,
            torch_dtype=torch.float16,
            device_map="auto",
            low_cpu_mem_usage=True
        )
        
        # Load LoRA adapter
        model_with_lora = PeftModel.from_pretrained(base_model, LORA_ADAPTER_DIR)
        
        # Merge
        merged_model = model_with_lora.merge_and_unload()
        
        # Save merged model
        os.makedirs(MERGED_MODEL_DIR, exist_ok=True)
        merged_model.save_pretrained(MERGED_MODEL_DIR)
        
        tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
        tokenizer.save_pretrained(MERGED_MODEL_DIR)
        
        print(f"✅ Merged model saved to {MERGED_MODEL_DIR}")
        
        # Clean up memory
        del base_model, model_with_lora, merged_model
        torch.cuda.empty_cache()
        gc.collect()
    else:
        print(f"✅ Using existing merged model at {MERGED_MODEL_DIR}")
    
    # STEP 3: Convert to GGUF with quantization attempt
    print("Converting to GGUF format with quantization...")
    gguf_path = convert_to_gguf_with_quantization(MERGED_MODEL_DIR, GGUF_OUTPUT_PATH)
    
    # STEP 4: Fallback to simple conversion if quantization failed
    if not gguf_path or not os.path.exists(gguf_path):
        print("Quantization failed, falling back to F16 conversion...")
        gguf_path = convert_to_gguf_simple(MERGED_MODEL_DIR, GGUF_OUTPUT_PATH)
    
    if gguf_path:
        print(f"✅ GGUF model ready at: {gguf_path}")
        create_usage_instructions(gguf_path)
        return gguf_path
    else:
        print("❌ All GGUF conversion methods failed")
        return None

def create_usage_instructions(gguf_path):
    """Create detailed usage instructions"""
    
    instructions = f"""
=== LLAMA.CPP USAGE INSTRUCTIONS ===

Your GGUF model is ready: {gguf_path}

1. DOWNLOAD MODEL:
   - Copy the GGUF file to your local machine
   - File size: ~{os.path.getsize(gguf_path) / (1024*1024):.1f} MB

2. INSTALL LLAMA.CPP:
   Mac: brew install llama.cpp
   Linux: Build from source or use package manager
   Windows: Download pre-built binaries

3. RUN INFERENCE:
   llama-cli -m {gguf_path} -p "What is financial analysis?"

4. START SERVER:
   llama-server -m {gguf_path} --port 8080

5. PYTHON USAGE:
   pip install llama-cpp-python
   
   from llama_cpp import Llama
   llm = Llama(model_path="{gguf_path}")
   response = llm("Explain P/E ratios", max_tokens=100)

6. API EXAMPLE:
   curl http://localhost:8080/completion \\
     -H "Content-Type: application/json" \\
     -d '{{"prompt": "Financial metrics", "n_predict": 128}}'

=== MODEL INFO ===
- Base: TinyLlama-1.1B-Chat-v1.0
- Fine-tuned: Financial Analysis
- Format: GGUF (llama.cpp compatible)
- Quantization: Q4_K_M or F16
"""
    
    print(instructions)
    
    # Save to file
    with open("llamacpp_usage_instructions.txt", "w") as f:
        f.write(instructions)

def test_llamacpp_setup():
    """Test if llama.cpp setup works"""
    print("Testing llama.cpp setup...")
    
    if setup_llamacpp_robust():
        # List files in the directory
        llama_dir = "/kaggle/working/llama.cpp"
        if os.path.exists(llama_dir):
            files = os.listdir(llama_dir)
            print(f"Files in llama.cpp directory: {files[:10]}")  # Show first 10 files
            
            convert_script = os.path.join(llama_dir, "convert_hf_to_gguf.py")
            print(f"Convert script exists: {os.path.exists(convert_script)}")
            
        return True
    return False

# MAIN EXECUTION
if __name__ == "__main__":
    # First test the setup
    if test_llamacpp_setup():
        print("\n" + "="*50)
        print("Setup successful! Starting conversion...")
        print("="*50)
        
        # Run the complete conversion with quantization attempt
        result = save_model_for_llamacpp_complete()
        
        if result:
            print(f"\n🎉 SUCCESS! GGUF model created at: {result}")
            print("Check 'llamacpp_usage_instructions.txt' for detailed usage guide.")
        else:
            print("\n❌ Conversion failed")
    else:
        print("❌ Setup test failed")

In [None]:
# Final memory cleanup
torch.cuda.empty_cache()
gc.collect()
print("Training completed successfully!")
print(f"Expected training time: 4-6 hours on Kaggle T4 GPU")
print(f"Model size: ~1.5GB (suitable for Mac deployment)")