# Kenya Clinical Reasoning - PRODUCTION ML TRAINING
**FLAN-T5-small Fine-tuning on Expert Clinical Data**

**Target:** Competition-winning model using REAL expert responses  
**Hardware:** Kaggle P100 GPU acceleration  
**Model:** Google FLAN-T5-small (77M params, edge-deployable)

In [1]:
# Install dependencies (run once)
!pip install rouge-score datasets accelerate -q

# Setup
import torch
import pandas as pd
import numpy as np
from datetime import datetime
import json
import sys
import os

# Check PyTorch and transformers compatibility
print(f"🔥 PyTorch version: {torch.__version__}")

# Test AdamW import (fixed in newer versions)
try:
    from torch.optim import AdamW
    print("✅ AdamW imported from torch.optim (recommended)")
except ImportError:
    try:
        from transformers import AdamW
        print("⚠️ AdamW imported from transformers (deprecated)")
    except ImportError:
        print("❌ AdamW not found - installing latest transformers")
        !pip install --upgrade transformers torch

# Check GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"🔥 Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB")
else:
    print("⚠️ No GPU available - training will be slower on CPU")

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m00:01[0mmm00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m28.8 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.5/207.5 MB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m:00:01[0m

In [5]:
!git clone https://github.com/jnopareboateng/kenyan-medical-reasoning.git

In [7]:
!git pull origin main

remote: Enumerating objects: 16, done.[K
remote: Counting objects: 100% (16/16), done.[K
remote: Compressing objects: 100% (3/3), done.[K
remote: Total 10 (delta 7), reused 10 (delta 7), pack-reused 0 (from 0)[K
Unpacking objects: 100% (10/10), 4.59 KiB | 939.00 KiB/s, done.
From https://github.com/jnopareboateng/kenyan-medical-reasoning
 * branch            main       -> FETCH_HEAD
   0260873..c027a1f  main       -> origin/main
Updating 0260873..c027a1f
Unpacking objects: 100% (10/10), 4.59 KiB | 939.00 KiB/s, done.
From https://github.com/jnopareboateng/kenyan-medical-reasoning
 * branch            main       -> FETCH_HEAD
   0260873..c027a1f  main       -> origin/main
Updating 0260873..c027a1f
Fast-forward
 core/llama32_model.py  |  52 [32m+++++++++++++++++++++++[m[31m-[m
 core/meditron_model.py |  52 [32m+++++++++++++++++++++++[m[31m-[m
 core/phi4_model.py     |  56 [32m+++++++++++++++++++++++[m[31m--[m
 utils/cache_manager.py | 153 [32m++++++++++++++++++++++++++++

In [2]:
!rm -rf kenyan-medical-reasoning

In [2]:
os.getcwd()

'/kaggle/working'

In [3]:
path = "kenyan-medical-reasoning"
working = "kaggle/working/"
os.listdir()

['.virtual_documents', 'kenyan-medical-reasoning']

In [4]:
%cd kenyan-medical-reasoning

/kaggle/working/kenyan-medical-reasoning


In [5]:
# Ensure all dependencies are imported first
import torch
import numpy as np
import pandas as pd

# Import our existing modules
import sys

sys.path.append(".")
# from core.ml_model import MLPipeline, ClinicalT5Model, ClinicalExample
from utils.logger import CompetitionLogger

# Initialize
logger = CompetitionLogger("ML_Training")
logger.info("🚀 PRODUCTION ML TRAINING STARTED")

# Load training data
train_df = pd.read_csv("data/train.csv")
print(f"📊 Loaded {len(train_df)} training cases")
print(f"Columns: {list(train_df.columns)}")

# Check expert response columns
expert_cols = [
    "Nursing Competency",
    "Clinical Panel",
    "Clinician",
    "GPT4.0",
    "LLAMA",
    "GEMINI",
]
for col in expert_cols:
    if col in train_df.columns:
        filled = train_df[col].notna().sum()
        print(
            f"✅ {col}: {filled}/{len(train_df)} responses ({filled/len(train_df)*100:.1f}%)"
        )

INFO | 🚀 PRODUCTION ML TRAINING STARTED
📊 Loaded 400 training cases
Columns: ['Master_Index', 'County', 'Health level', 'Years of Experience', 'Prompt', 'Nursing Competency', 'Clinical Panel', 'Clinician', 'GPT4.0', 'LLAMA', 'GEMINI', 'DDX SNOMED']
✅ Nursing Competency: 400/400 responses (100.0%)
✅ Clinical Panel: 400/400 responses (100.0%)
✅ Clinician: 400/400 responses (100.0%)
✅ GPT4.0: 400/400 responses (100.0%)
✅ LLAMA: 400/400 responses (100.0%)
✅ GEMINI: 400/400 responses (100.0%)
📊 Loaded 400 training cases
Columns: ['Master_Index', 'County', 'Health level', 'Years of Experience', 'Prompt', 'Nursing Competency', 'Clinical Panel', 'Clinician', 'GPT4.0', 'LLAMA', 'GEMINI', 'DDX SNOMED']
✅ Nursing Competency: 400/400 responses (100.0%)
✅ Clinical Panel: 400/400 responses (100.0%)
✅ Clinician: 400/400 responses (100.0%)
✅ GPT4.0: 400/400 responses (100.0%)
✅ LLAMA: 400/400 responses (100.0%)
✅ GEMINI: 400/400 responses (100.0%)


In [None]:
# Quick import test to verify everything works
print("🔍 Testing imports...")

try:
    from transformers import T5ForConditionalGeneration, T5Tokenizer
    from torch.optim import AdamW
    print("✅ Transformers and PyTorch imports successful")
    
    from core.ml_model import ClinicalT5Model
    print("✅ Custom ML model import successful")
    
    print("🎯 All imports working - ready for training!")
    
except ImportError as e:
    print(f"❌ Import error: {e}")
    print("Installing missing dependencies...")
    !pip install rouge-score datasets accelerate -q

In [None]:
# Initialize FLAN-T5 model WITH CACHING (no more repeated downloads!)
from core.ml_model import ClinicalT5Model

# Use caching to prevent repeated model downloads
model = ClinicalT5Model(
    model_name="google/flan-t5-small",  # 77M params, edge-deployable
    cache_dir="./models",  # Persistent disk cache
    force_download=False   # Use cache if available
)

logger.info(
    f"Model loaded: {sum(p.numel() for p in model.model.parameters()):,} parameters"
)

# Prepare training examples from REAL expert data
training_examples = model.prepare_training_data(train_df)
logger.info(f"✅ Prepared {len(training_examples)} training examples")

# Show sample
if training_examples:
    sample = training_examples[0]
    print("📋 SAMPLE TRAINING EXAMPLE:")
    print(f"Input: {sample.input_text[:200]}...")
    print(f"Target: {sample.target_response[:200]}...")
    print(f"Length: {len(sample.target_response)} chars")

# 🧹 CACHE MANAGEMENT: Check memory usage
from utils.cache_manager import cache_status
print("\n💾 CACHE STATUS AFTER MODEL LOADING:")
cache_status()

In [None]:
# 🧹 FLAN-T5 CACHE MANAGEMENT
# Use these utilities to manage memory when training multiple models

from utils.cache_manager import cleanup_all, cache_status, emergency

print("🔍 CURRENT CACHE STATUS:")
cache_status()

print("\n💡 MEMORY MANAGEMENT OPTIONS:")
print("- model.cleanup_model() - Clean up this T5 model")
print("- cleanup_all() - Clear all cached models")
print("- emergency() - Nuclear cleanup if memory issues")

# Example: Clean up after training
# model.cleanup_model()  # Uncomment when done with this model
# cleanup_all()          # Uncomment to clear everything

In [None]:
# Split training data
train_size = int(0.85 * len(training_examples))
train_examples = training_examples[:train_size]
val_examples = training_examples[train_size:]

logger.info(f"📈 Training: {len(train_examples)}, Validation: {len(val_examples)}")

# Training configuration for GPU acceleration
config = {
    "epochs": 3,
    "batch_size": 8,  # Increase for P100
    "learning_rate": 3e-5,
}

logger.info(f"🔧 Training config: {config}")

# Start training (this will take several minutes on P100)
print("🚀 STARTING FINE-TUNING...")
training_results = model.fine_tune(
    train_examples=train_examples, val_examples=val_examples, **config
)

logger.info("✅ Training completed!")
print("📊 Training Results:")
for stat in training_results["training_stats"]:
    print(
        f"Epoch {stat['epoch']}: Loss={stat['train_loss']:.4f}, ROUGE-L={stat.get('rouge_l', 0):.4f}"
    )

# 🚀 NEW: State-of-the-Art LLM Models Available

**STOP USING OUTDATED MODELS!** We now have three cutting-edge implementations:

1. **Phi-4-mini-instruct (3.8B)** - Microsoft's latest reasoning model
2. **Meditron-7B** - Medical-specialized model trained on clinical data  
3. **Llama-3.2-3B-Instruct** - Meta's latest instruction-tuned model

All models use **Unsloth optimization** for 2x faster training and 70% less VRAM usage.

## Quick Model Comparison
- **Phi-4-mini**: Best overall reasoning, MIT license, 128K context
- **Meditron-7B**: Medical specialist, trained on PubMed + clinical guidelines
- **Llama-3.2**: Solid general performance, good instruction following

**Choose based on your priority: General reasoning (Phi-4), Medical knowledge (Meditron), or Balanced performance (Llama-3.2)**

In [6]:
%%capture
!pip install pip3-autoremove
!pip install torch torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu124
!pip install unsloth vllm
# !pip install --upgrade transformers==4.52.3

In [7]:
import os

os.listdir()
%cd kenyan-medical-reasoning/

[Errno 2] No such file or directory: 'kenyan-medical-reasoning/'
/kaggle/working/kenyan-medical-reasoning


In [18]:
!git pull origin main

In [None]:
# EXAMPLE: Using the new state-of-the-art models
# Install Unsloth first: pip install "unsloth[cu121-torch240] @ git+https://github.com/unslothai/unsloth.git"

# CRITICAL FIX: Force reload modules to get latest versions
import importlib
import sys

# Clear any cached imports
if "core.phi4_model" in sys.modules:
    importlib.reload(sys.modules["core.phi4_model"])
if "core.meditron_model" in sys.modules:
    importlib.reload(sys.modules["core.meditron_model"])
if "core.llama32_model" in sys.modules:
    importlib.reload(sys.modules["core.llama32_model"])

# Option 1: Phi-4-mini-instruct (Recommended for best reasoning)
print("🚀 OPTION 1: Microsoft Phi-4-mini-instruct")
from unsloth import FastLanguageModel
from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()
HF_TOKEN = user_secrets.get_secret("HF_TOKEN")

# try:
#     from core.phi4_model import ClinicalPhi4Model

#     # Initialize Phi-4 model with 4-bit quantization and caching
#     phi4_model = ClinicalPhi4Model(
#         "microsoft/Phi-4-mini-instruct",
#         load_in_4bit=True,
#         cache_dir="./models",  # This should now work!
#     )

#     # Use same training examples as before
#     phi4_training_examples = phi4_model.prepare_training_data(train_df)
#     print(f"✅ Phi-4: {len(phi4_training_examples)} training examples prepared")

#     # Show memory usage
#     if torch.cuda.is_available():
#         memory_used = torch.cuda.max_memory_allocated() / 1e9
#         print(f"GPU Memory: {memory_used:.1f}GB (4-bit quantized)")

# except ImportError as e:
#     print(f"⚠️ Unsloth not installed: {e}")
#     print(
#         "Run: pip install 'unsloth[cu121-torch240] @ git+https://github.com/unslothai/unsloth.git'"
#     )
# except Exception as e:
#     print(f"❌ Error loading model: {e}")
#     print("Try restarting the kernel if imports are cached")

# print("\n" + "=" * 60)

# Option 2: Meditron-7B (Medical specialist)
# print("🏥 OPTION 2: Meditron-7B Medical Specialist")
# try:
#     from core.meditron_model import ClinicalMeditronModel

#     # Initialize medical specialist model with caching
#     meditron_model = ClinicalMeditronModel(
#         "epfl-llm/meditron-7b", load_in_4bit=True, cache_dir="./models"
#     )

#     # Prepare medical training data
#     meditron_training_examples = meditron_model.prepare_training_data(train_df)
#     print(f"✅ Meditron: {len(meditron_training_examples)} medical examples prepared")

# except ImportError as e:
#     print(f"⚠️ Dependencies missing: {e}")
# except Exception as e:
#     print(f"❌ Error loading Meditron: {e}")

# print("\n" + "=" * 60)

# Option 3: Llama-3.2-3B-Instruct (Balanced performance)
print("🦙 OPTION 3: Llama-3.2-3B-Instruct")
try:
    from core.llama32_model import ClinicalLlama32Model

    # Initialize Llama model with caching
    llama32_model = ClinicalLlama32Model(
        "unsloth/Llama-3.2-3B-Instruct", load_in_4bit=True, cache_dir="./models"
    )

    # Prepare training data
    llama32_training_examples = llama32_model.prepare_training_data(train_df)
    print(f"✅ Llama-3.2: {len(llama32_training_examples)} examples prepared")

except ImportError as e:
    print(f"⚠️ Dependencies missing: {e}")
except Exception as e:
    print(f"❌ Error loading Llama-3.2: {e}")

print("\n🎯 Choose your weapon and replace the model variable below!")
print("Recommended: phi4_model for best competition performance")

🚀 OPTION 1: Microsoft Phi-4-mini-instruct
🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


2025-06-20 06:01:25.706691: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1750399285.901466      74 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1750399285.957089      74 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


🦥 Unsloth Zoo will now patch everything to make training faster!
INFO 06-20 06:01:45 [importing.py:53] Triton module has been replaced with a placeholder.
INFO 06-20 06:01:45 [importing.py:53] Triton module has been replaced with a placeholder.
INFO 06-20 06:01:45 [__init__.py:239] Automatically detected platform cuda.
INFO 06-20 06:01:45 [__init__.py:239] Automatically detected platform cuda.
🦙 OPTION 3: Llama-3.2-3B-Instruct
🦙 OPTION 3: Llama-3.2-3B-Instruct
INFO | Loading unsloth/Llama-3.2-3B-Instruct with caching optimization
INFO | Loading unsloth/Llama-3.2-3B-Instruct with caching optimization
INFO | Downloading/Loading from cache: unsloth/Llama-3.2-3B-Instruct
INFO | Downloading/Loading from cache: unsloth/Llama-3.2-3B-Instruct
==((====))==  Unsloth 2025.6.2: Fast Llama patching. Transformers: 4.51.3. vLLM: 0.8.5.post1.
   \\   /|    Tesla P100-PCIE-16GB. Num GPUs = 1. Max memory: 15.888 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 6.0. CUDA Toolkit: 12.4. Triton

Unsloth 2025.6.2 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


INFO | Llama-3.2-3B loaded with 1865526272 parameters
INFO | Prepared 400 training examples for Llama-3.2
✅ Llama-3.2: 400 examples prepared

🎯 Choose your weapon and replace the model variable below!
Recommended: phi4_model for best competition performance


In [None]:
# 🔥 COMPETITION-WINNING TRAINING EXAMPLE
# Use this instead of the outdated FLAN-T5 approach

# SELECT YOUR MODEL (uncomment one):
# model = phi4_model  # Recommended: Best reasoning capability
# model = meditron_model       # Medical specialist option
model = llama32_model  # Balanced general performance

# For demonstration, let's use Phi-4 (replace with your choice)
if "llama32_model" in locals():
    model = model
    training_examples = llama32_training_examples
    model_name = "unsloth/llama-3.2-3b-instruct"
    print(f"✅ Loaded model: {model.model_name}")

    print(f"🚀 TRAINING WITH {model_name.upper()}")
    print(f"Training examples: {len(training_examples)}")

    # Split training data
    train_size = int(0.80 * len(training_examples))
    train_examples = training_examples[:train_size]
    val_examples = training_examples[train_size:]

    # Training configuration optimized for modern LLMs
    config = {
        "epochs": 2,  # Fewer epochs needed for pretrained models
        "batch_size": 4,  # Smaller batch for better quality
        "learning_rate": 2e-5,  # Lower LR for fine-tuning
    }

    print(f"📈 Training: {len(train_examples)}, Validation: {len(val_examples)}")
    print(f"🔧 Config: {config}")

    # START MODERN LLM TRAINING
    print("🚀 STARTING STATE-OF-THE-ART FINE-TUNING...")
    print("⚡ Using Unsloth: 2x faster, 70% less VRAM")

    # Uncomment to actually train:
    training_results = model.fine_tune(
        train_examples=train_examples, val_examples=val_examples, **config
    )

    print("✅ Ready to train! Uncomment the training code above to start.")

else:
    print("❌ No modern models loaded. Install Unsloth first!")
    print(
        "Command: pip install 'unsloth[cu121-torch240] @ git+https://github.com/unslothai/unsloth.git'"
    )

✅ Loaded model: unsloth/Llama-3.2-3B-Instruct
🚀 TRAINING WITH UNSLOTH/LLAMA-3.2-3B-INSTRUCT
Training examples: 400
📈 Training: 320, Validation: 80
🔧 Config: {'epochs': 2, 'batch_size': 4, 'learning_rate': 2e-05}
🚀 STARTING STATE-OF-THE-ART FINE-TUNING...
⚡ Using Unsloth: 2x faster, 70% less VRAM


Unsloth: Tokenizing ["text"] (num_proc=4):   0%|          | 0/320 [00:00<?, ? examples/s]

INFO | Starting Llama-3.2-3B fine-tuning with Unsloth...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 320 | Num Epochs = 8 | Total steps = 160
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 24,313,856/3,000,000,000 (0.81% trained)
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

# ⚠️ COMPETITIVE REALITY CHECK

**You're about to compete with outdated technology while your competitors are using 2025 models.**

## Installation Required for Competition Success:

```bash
# Install Unsloth (adjust for your CUDA version)
pip install "unsloth[cu121-torch240] @ git+https://github.com/unslothai/unsloth.git"

# Additional dependencies
pip install trl datasets xformers bitsandbytes
```

## Model Performance Comparison:
- **FLAN-T5-small (77M, 2022)**: Your current model 📉
- **Phi-4-mini (3.8B, 2025)**: 10x better reasoning, MIT license 🚀
- **Meditron-7B (7B, 2023)**: Medical specialist, clinical training 🏥
- **Llama-3.2-3B (3B, 2024)**: Balanced performance, instruction-tuned 🦙

**Stop handicapping yourself. Upgrade immediately or lose the competition.**

In [None]:
# 🧹 CACHE MANAGEMENT - PREVENT MEMORY WASTE
# Use these utilities to manage model caching and prevent repeated downloads

from utils.cache_manager import ModelCacheManager, cleanup_all, cache_status, emergency

print("🔍 CHECKING CACHE STATUS:")
cache_status()

print("\n💡 CACHE MANAGEMENT UTILITIES:")
print("- cleanup_all() - Clear all cached models")
print("- cache_status() - Check current memory usage") 
print("- emergency() - Nuclear cleanup if things go wrong")
print("- ModelCacheManager.cleanup_all_models() - Full cleanup")

# Example: Check memory before and after model loading
print("\n📊 BEFORE LOADING MODELS:")
cache_info = ModelCacheManager.get_cache_info()
print(f"Cached models: {cache_info['total_cached_models']}")
if torch.cuda.is_available():
    print(f"GPU Memory: {cache_info['gpu_memory_allocated']:.2f}GB")

# When you're done experimenting, clean up:
# cleanup_all()  # Uncomment to clean up all models

In [None]:
# Load test data and generate predictions
test_df = pd.read_csv("data/test.csv")
logger.info(f"📋 Generating predictions for {len(test_df)} test cases...")

predictions = []
for idx, row in test_df.iterrows():
    # Create input prompt
    input_prompt = model._create_input_prompt(row)

    # Generate response
    response = model.generate_response(input_prompt, max_length=200)
    predictions.append(response)

    if idx % 10 == 0:
        print(f"Generated {idx+1}/{len(test_df)} predictions")

logger.info("✅ All predictions generated!")

# Analyze prediction lengths
lengths = [len(p) for p in predictions]
print(
    f"📏 Prediction lengths: Mean={np.mean(lengths):.1f}, Range={min(lengths)}-{max(lengths)}"
)
target_range = [(l >= 600 and l <= 800) for l in lengths]
print(
    f"🎯 Target range (600-800 chars): {sum(target_range)}/{len(target_range)} ({np.mean(target_range)*100:.1f}%)"
)

In [None]:
# Create submission file
submission_df = pd.DataFrame({"id": range(len(predictions)), "response": predictions})

# Save submission
submission_path = "flan_t5_submission.csv"
submission_df.to_csv(submission_path, index=False)
logger.info(f"💾 Submission saved: {submission_path}")

# Save model
model_path = "flan_t5_clinical_model"
model.save_model(model_path)
logger.info(f"🤖 Model saved: {model_path}")

# Create final summary
summary = {
    "timestamp": datetime.now().isoformat(),
    "model": "FLAN-T5-small",
    "parameters": sum(p.numel() for p in model.model.parameters()),
    "training_examples": len(train_examples),
    "validation_examples": len(val_examples),
    "test_predictions": len(predictions),
    "mean_response_length": float(np.mean(lengths)),
    "target_range_percentage": float(np.mean(target_range) * 100),
    "training_results": training_results,
    "submission_file": submission_path,
    "model_path": model_path,
}

with open("training_summary.json", "w") as f:
    json.dump(summary, f, indent=2)

print("🏆 PRODUCTION ML TRAINING COMPLETE!")
print(f"✅ Model: {summary['parameters']:,} parameters")
print(f"✅ Submission: {submission_path}")
print(f"✅ Mean length: {summary['mean_response_length']:.1f} chars")
print(f"✅ Target range: {summary['target_range_percentage']:.1f}%")

In [None]:
# Show sample predictions
print("🔍 SAMPLE PREDICTIONS:")
for i in range(min(3, len(predictions))):
    print(f"\n--- CASE {i+1} ---")
    print(f"Length: {len(predictions[i])} chars")
    print(f"Response: {predictions[i]}")

# Quantize model for edge deployment (optional)
print("\n🔧 Quantizing model for edge deployment...")
quantized_model = model.quantize_for_edge()
print("✅ Quantized model ready for Jetson Nano deployment")

# Final download instructions
print("\n📥 DOWNLOAD FILES:")
print("1. flan_t5_submission.csv - Competition submission")
print("2. flan_t5_clinical_model/ - Trained model directory")
print("3. training_summary.json - Training metrics")

logger.info("🎯 READY FOR COMPETITION SUBMISSION!")