# Kenya Clinical Reasoning - PRODUCTION ML TRAINING
**FLAN-T5-small Fine-tuning on Expert Clinical Data**

**Target:** Competition-winning model using REAL expert responses  
**Hardware:** Kaggle P100 GPU acceleration  
**Model:** Google FLAN-T5-small (77M params, edge-deployable)

In [1]:
# Install dependencies (run once)
!pip install rouge-score datasets accelerate -q

# Setup
import torch
import pandas as pd
import numpy as np
from datetime import datetime
import json
import sys
import os

# Check PyTorch and transformers compatibility
print(f"🔥 PyTorch version: {torch.__version__}")

# Test AdamW import (fixed in newer versions)
try:
    from torch.optim import AdamW
    print("✅ AdamW imported from torch.optim (recommended)")
except ImportError:
    try:
        from transformers import AdamW
        print("⚠️ AdamW imported from transformers (deprecated)")
    except ImportError:
        print("❌ AdamW not found - installing latest transformers")
        !pip install --upgrade transformers torch

# Check GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"🔥 Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB")
else:
    print("⚠️ No GPU available - training will be slower on CPU")

🔥 PyTorch version: 2.4.1+cu121
✅ AdamW imported from torch.optim (recommended)
🔥 Using device: cuda
GPU: Tesla P100-PCIE-16GB
Memory: 17.1GB


In [5]:
!git clone https://github.com/jnopareboateng/kenyan-medical-reasoning.git

Cloning into 'kenyan-medical-reasoning'...
remote: Enumerating objects: 120, done.[K
remote: Counting objects: 100% (120/120), done.[K
remote: Compressing objects: 100% (87/87), done.[K
remote: Total 120 (delta 37), reused 114 (delta 31), pack-reused 0 (from 0)[K
Receiving objects: 100% (120/120), 2.66 MiB | 20.35 MiB/s, done.
Resolving deltas: 100% (37/37), done.


In [2]:
!rm -rf kenyan-medical-reasoning

In [3]:
os.getcwd()

'/kaggle/working'

In [4]:
path = "kenyan-medical-reasoning"
working = "kaggle/working/"
os.listdir()

['.virtual_documents']

In [6]:
%cd kenyan-medical-reasoning

/kaggle/working/kenyan-medical-reasoning


In [None]:
# Ensure all dependencies are imported first
import torch
import numpy as np
import pandas as pd

# Import our existing modules
import sys

sys.path.append(".")
from core.ml_model import MLPipeline, ClinicalT5Model, ClinicalExample
from utils.logger import CompetitionLogger

# Initialize
logger = CompetitionLogger("ML_Training")
logger.info("🚀 PRODUCTION ML TRAINING STARTED")

# Load training data
train_df = pd.read_csv("data/train.csv")
print(f"📊 Loaded {len(train_df)} training cases")
print(f"Columns: {list(train_df.columns)}")

# Check expert response columns
expert_cols = [
    "Nursing Competency",
    "Clinical Panel",
    "Clinician",
    "GPT4.0",
    "LLAMA",
    "GEMINI",
]
for col in expert_cols:
    if col in train_df.columns:
        filled = train_df[col].notna().sum()
        print(
            f"✅ {col}: {filled}/{len(train_df)} responses ({filled/len(train_df)*100:.1f}%)"
        )

In [None]:
# Quick import test to verify everything works
print("🔍 Testing imports...")

try:
    from transformers import T5ForConditionalGeneration, T5Tokenizer
    from torch.optim import AdamW
    print("✅ Transformers and PyTorch imports successful")
    
    from core.ml_model import ClinicalT5Model
    print("✅ Custom ML model import successful")
    
    print("🎯 All imports working - ready for training!")
    
except ImportError as e:
    print(f"❌ Import error: {e}")
    print("Installing missing dependencies...")
    !pip install rouge-score datasets accelerate -q

In [None]:
# Initialize FLAN-T5 model
model = ClinicalT5Model("google/flan-t5-small")
logger.info(
    f"Model loaded: {sum(p.numel() for p in model.model.parameters()):,} parameters"
)

# Prepare training examples from REAL expert data
training_examples = model.prepare_training_data(train_df)
logger.info(f"✅ Prepared {len(training_examples)} training examples")

# Show sample
if training_examples:
    sample = training_examples[0]
    print("📋 SAMPLE TRAINING EXAMPLE:")
    print(f"Input: {sample.input_text[:200]}...")
    print(f"Target: {sample.target_response[:200]}...")
    print(f"Length: {len(sample.target_response)} chars")

In [None]:
# Split training data
train_size = int(0.85 * len(training_examples))
train_examples = training_examples[:train_size]
val_examples = training_examples[train_size:]

logger.info(f"📈 Training: {len(train_examples)}, Validation: {len(val_examples)}")

# Training configuration for GPU acceleration
config = {
    "epochs": 3,
    "batch_size": 8,  # Increase for P100
    "learning_rate": 3e-5,
}

logger.info(f"🔧 Training config: {config}")

# Start training (this will take several minutes on P100)
print("🚀 STARTING FINE-TUNING...")
training_results = model.fine_tune(
    train_examples=train_examples, val_examples=val_examples, **config
)

logger.info("✅ Training completed!")
print("📊 Training Results:")
for stat in training_results["training_stats"]:
    print(
        f"Epoch {stat['epoch']}: Loss={stat['train_loss']:.4f}, ROUGE-L={stat.get('rouge_l', 0):.4f}"
    )

# 🚀 NEW: State-of-the-Art LLM Models Available

**STOP USING OUTDATED MODELS!** We now have three cutting-edge implementations:

1. **Phi-4-mini-instruct (3.8B)** - Microsoft's latest reasoning model
2. **Meditron-7B** - Medical-specialized model trained on clinical data  
3. **Llama-3.2-3B-Instruct** - Meta's latest instruction-tuned model

All models use **Unsloth optimization** for 2x faster training and 70% less VRAM usage.

## Quick Model Comparison
- **Phi-4-mini**: Best overall reasoning, MIT license, 128K context
- **Meditron-7B**: Medical specialist, trained on PubMed + clinical guidelines
- **Llama-3.2**: Solid general performance, good instruction following

**Choose based on your priority: General reasoning (Phi-4), Medical knowledge (Meditron), or Balanced performance (Llama-3.2)**

In [7]:
!pip install "unsloth[cu121-torch240] @ git+https://github.com/unslothai/unsloth.git"

Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[cu121-torch240]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-_jev_51v/unsloth_65732cac1d094e6a88f2b21a9ff160d6
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-_jev_51v/unsloth_65732cac1d094e6a88f2b21a9ff160d6
  Resolved https://github.com/unslothai/unsloth.git to commit 9d984899e00a624bd3d07e3c02102b55f027c7c3
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting xformers@ https://download.pytorch.org/whl/cu121/xformers-0.0.28.post1-cp311-cp311-manylinux_2_28_x86_64.whl (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[cu121-torch240]@ git+https://github.com/unslothai/unsloth.git)
  Using cached https://download.pytorch.org/whl/

In [9]:
# EXAMPLE: Using the new state-of-the-art models
# Install Unsloth first: pip install "unsloth[cu121-torch240] @ git+https://github.com/unslothai/unsloth.git"

# Option 1: Phi-4-mini-instruct (Recommended for best reasoning)
# print("🚀 OPTION 1: Microsoft Phi-4-mini-instruct")
# try:
#     from core.phi4_model import ClinicalPhi4Model

#     # Initialize Phi-4 model with 4-bit quantization
#     phi4_model = ClinicalPhi4Model("microsoft/Phi-4-mini-instruct", load_in_4bit=True)

#     # Use same training examples as before
#     phi4_training_examples = phi4_model.prepare_training_data(train_df)
#     print(f"✅ Phi-4: {len(phi4_training_examples)} training examples prepared")

#     # Show memory usage
#     if torch.cuda.is_available():
#         memory_used = torch.cuda.max_memory_allocated() / 1e9
#         print(f"GPU Memory: {memory_used:.1f}GB (4-bit quantized)")

# except ImportError as e:
#     print(f"⚠️ Unsloth not installed: {e}")
#     print(
#         "Run: pip install 'unsloth[cu121-torch240] @ git+https://github.com/unslothai/unsloth.git'"
#     )

# print("\n" + "=" * 60)

# Option 2: Meditron-7B (Medical specialist)
print("🏥 OPTION 2: Meditron-7B Medical Specialist")
try:
    from core.meditron_model import ClinicalMeditronModel

    # Initialize medical specialist model
    meditron_model = ClinicalMeditronModel("epfl-llm/meditron-7b", load_in_4bit=True)

    # Prepare medical training data
    meditron_training_examples = meditron_model.prepare_training_data(train_df)
    print(f"✅ Meditron: {len(meditron_training_examples)} medical examples prepared")

except ImportError as e:
    print(f"⚠️ Dependencies missing: {e}")

print("\n" + "=" * 60)

# Option 3: Llama-3.2-3B-Instruct (Balanced performance)
print("🦙 OPTION 3: Llama-3.2-3B-Instruct")
try:
    from core.llama32_model import ClinicalLlama32Model

    # Initialize Llama model
    llama32_model = ClinicalLlama32Model(
        "meta-llama/Llama-3.2-3B-Instruct", load_in_4bit=True
    )

    # Prepare training data
    llama32_training_examples = llama32_model.prepare_training_data(train_df)
    print(f"✅ Llama-3.2: {len(llama32_training_examples)} examples prepared")

except ImportError as e:
    print(f"⚠️ Dependencies missing: {e}")

print("\n🎯 Choose your weapon and replace the model variable below!")
print("Recommended: phi4_model for best competition performance")

🏥 OPTION 2: Meditron-7B Medical Specialist



Please restructure your imports with 'import unsloth' at the top of your file.
  from unsloth import FastLanguageModel


RuntimeError: Failed to import transformers.integrations.integration_utils because of the following error (look up to see its traceback):
Failed to import transformers.modeling_utils because of the following error (look up to see its traceback):
partially initialized module 'torchvision' has no attribute 'extension' (most likely due to a circular import)

In [None]:
# 🔥 COMPETITION-WINNING TRAINING EXAMPLE
# Use this instead of the outdated FLAN-T5 approach

# SELECT YOUR MODEL (uncomment one):
# model = phi4_model           # Recommended: Best reasoning capability
# model = meditron_model       # Medical specialist option  
# model = llama32_model        # Balanced general performance

# For demonstration, let's use Phi-4 (replace with your choice)
if 'phi4_model' in locals():
    model = phi4_model
    training_examples = phi4_training_examples
    model_name = "Phi-4-mini-instruct"
    
    print(f"🚀 TRAINING WITH {model_name.upper()}")
    print(f"Parameters: ~1B after 4-bit quantization (original: 3.8B)")
    print(f"Training examples: {len(training_examples)}")
    
    # Split training data
    train_size = int(0.85 * len(training_examples))
    train_examples = training_examples[:train_size]
    val_examples = training_examples[train_size:]
    
    # Training configuration optimized for modern LLMs
    config = {
        "epochs": 2,          # Fewer epochs needed for pretrained models
        "batch_size": 4,      # Smaller batch for better quality
        "learning_rate": 2e-5 # Lower LR for fine-tuning
    }
    
    print(f"📈 Training: {len(train_examples)}, Validation: {len(val_examples)}")
    print(f"🔧 Config: {config}")
    
    # START MODERN LLM TRAINING
    print("🚀 STARTING STATE-OF-THE-ART FINE-TUNING...")
    print("⚡ Using Unsloth: 2x faster, 70% less VRAM")
    
    # Uncomment to actually train:
    # training_results = model.fine_tune(
    #     train_examples=train_examples, 
    #     val_examples=val_examples, 
    #     **config
    # )
    
    print("✅ Ready to train! Uncomment the training code above to start.")
    
else:
    print("❌ No modern models loaded. Install Unsloth first!")
    print("Command: pip install 'unsloth[cu121-torch240] @ git+https://github.com/unslothai/unsloth.git'")

# ⚠️ COMPETITIVE REALITY CHECK

**You're about to compete with outdated technology while your competitors are using 2025 models.**

## Installation Required for Competition Success:

```bash
# Install Unsloth (adjust for your CUDA version)
pip install "unsloth[cu121-torch240] @ git+https://github.com/unslothai/unsloth.git"

# Additional dependencies
pip install trl datasets xformers bitsandbytes
```

## Model Performance Comparison:
- **FLAN-T5-small (77M, 2022)**: Your current model 📉
- **Phi-4-mini (3.8B, 2025)**: 10x better reasoning, MIT license 🚀
- **Meditron-7B (7B, 2023)**: Medical specialist, clinical training 🏥
- **Llama-3.2-3B (3B, 2024)**: Balanced performance, instruction-tuned 🦙

**Stop handicapping yourself. Upgrade immediately or lose the competition.**

In [None]:
# Load test data and generate predictions
test_df = pd.read_csv("data/test.csv")
logger.info(f"📋 Generating predictions for {len(test_df)} test cases...")

predictions = []
for idx, row in test_df.iterrows():
    # Create input prompt
    input_prompt = model._create_input_prompt(row)

    # Generate response
    response = model.generate_response(input_prompt, max_length=200)
    predictions.append(response)

    if idx % 10 == 0:
        print(f"Generated {idx+1}/{len(test_df)} predictions")

logger.info("✅ All predictions generated!")

# Analyze prediction lengths
lengths = [len(p) for p in predictions]
print(
    f"📏 Prediction lengths: Mean={np.mean(lengths):.1f}, Range={min(lengths)}-{max(lengths)}"
)
target_range = [(l >= 600 and l <= 800) for l in lengths]
print(
    f"🎯 Target range (600-800 chars): {sum(target_range)}/{len(target_range)} ({np.mean(target_range)*100:.1f}%)"
)

In [None]:
# Create submission file
submission_df = pd.DataFrame({"id": range(len(predictions)), "response": predictions})

# Save submission
submission_path = "flan_t5_submission.csv"
submission_df.to_csv(submission_path, index=False)
logger.info(f"💾 Submission saved: {submission_path}")

# Save model
model_path = "flan_t5_clinical_model"
model.save_model(model_path)
logger.info(f"🤖 Model saved: {model_path}")

# Create final summary
summary = {
    "timestamp": datetime.now().isoformat(),
    "model": "FLAN-T5-small",
    "parameters": sum(p.numel() for p in model.model.parameters()),
    "training_examples": len(train_examples),
    "validation_examples": len(val_examples),
    "test_predictions": len(predictions),
    "mean_response_length": float(np.mean(lengths)),
    "target_range_percentage": float(np.mean(target_range) * 100),
    "training_results": training_results,
    "submission_file": submission_path,
    "model_path": model_path,
}

with open("training_summary.json", "w") as f:
    json.dump(summary, f, indent=2)

print("🏆 PRODUCTION ML TRAINING COMPLETE!")
print(f"✅ Model: {summary['parameters']:,} parameters")
print(f"✅ Submission: {submission_path}")
print(f"✅ Mean length: {summary['mean_response_length']:.1f} chars")
print(f"✅ Target range: {summary['target_range_percentage']:.1f}%")

In [None]:
# Show sample predictions
print("🔍 SAMPLE PREDICTIONS:")
for i in range(min(3, len(predictions))):
    print(f"\n--- CASE {i+1} ---")
    print(f"Length: {len(predictions[i])} chars")
    print(f"Response: {predictions[i]}")

# Quantize model for edge deployment (optional)
print("\n🔧 Quantizing model for edge deployment...")
quantized_model = model.quantize_for_edge()
print("✅ Quantized model ready for Jetson Nano deployment")

# Final download instructions
print("\n📥 DOWNLOAD FILES:")
print("1. flan_t5_submission.csv - Competition submission")
print("2. flan_t5_clinical_model/ - Trained model directory")
print("3. training_summary.json - Training metrics")

logger.info("🎯 READY FOR COMPETITION SUBMISSION!")