# SPR 2026 - MedGemma 1.5 4B Chain-of-Thought

**Modelo:** google/medgemma-1.5-4b-it

**Características:**
- Chain-of-Thought reasoning antes da classificação
- Modelo pensa passo-a-passo antes de responder
- 4B parâmetros - cabe em T4

**Hipótese:** Raciocínio explícito melhora a classificação

---
## CONFIGURAÇÃO KAGGLE:
1. **Add Input** → **Models** → `medgemma-1.5-4b-it`
2. **Add Input** → **Competition** → `spr-2026-mammography-report-classification`
3. **Settings** → Internet → **OFF**, GPU → **T4 x2**
---

In [None]:
# ===== MEDGEMMA 1.5 4B COT =====

import os
import numpy as np
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from tqdm.auto import tqdm
import warnings
warnings.filterwarnings('ignore')

print("="*60)
print("SPR 2026 - MedGemma 1.5 4B Chain-of-Thought")
print("="*60)

SEED = 42
MAX_NEW_TOKENS = 150  # Mais tokens para CoT
DATA_DIR = '/kaggle/input/competitions/spr-2026-mammography-report-classification'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.manual_seed(SEED)
np.random.seed(SEED)

def find_model_path():
    base = '/kaggle/input'
    def search_dir(directory, depth=0, max_depth=10):
        if depth > max_depth: return None
        try:
            for item in os.listdir(directory):
                path = os.path.join(directory, item)
                if os.path.isdir(path) and os.path.exists(os.path.join(path, 'config.json')):
                    return path
                result = search_dir(path, depth + 1, max_depth) if os.path.isdir(path) else None
                if result: return result
        except: pass
        return None
    return search_dir(base)

MODEL_PATH = find_model_path()
print(f"Device: {device}")
print(f"Model: {MODEL_PATH}")

In [None]:
# ===== CHAIN-OF-THOUGHT PROMPT =====

SYSTEM_PROMPT = """You are a senior breast radiologist expert in BI-RADS classification.

## BI-RADS Categories:
- **0**: Incomplete - needs additional imaging
- **1**: Negative - normal mammogram  
- **2**: Benign - definitely benign findings (0% malignancy)
- **3**: Probably Benign - <2% malignancy, 6-month follow-up
- **4**: Suspicious - 2-95% malignancy, biopsy recommended
- **5**: Highly Suggestive of Malignancy - >95% malignancy
- **6**: Known Biopsy-Proven Malignancy

## Instructions:
Analyze the mammography report step by step:
1. Identify key findings (masses, calcifications, distortions)
2. Assess morphology and margins
3. Consider associated features
4. Determine the most appropriate BI-RADS category

Think through your reasoning, then provide your final answer as:
FINAL: [number]"""

USER_TEMPLATE = """Mammography Report:
{report}

Analyze step by step and provide your BI-RADS classification:"""

In [None]:
# ===== CARREGAR MODELO =====
print("Carregando modelo...")

tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, local_files_only=True)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH, local_files_only=True,
    torch_dtype=torch.bfloat16, device_map="auto", low_cpu_mem_usage=True
)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print(f"Modelo: {model.config.architectures}")

In [None]:
# ===== DADOS =====
train_df = pd.read_csv(f'{DATA_DIR}/train.csv')
test_df = pd.read_csv(f'{DATA_DIR}/test.csv')
print(f"Train: {len(train_df)}, Test: {len(test_df)}")

In [None]:
# ===== CLASSIFICAÇÃO COM COT =====
import re

def classify_report(report, model, tokenizer):
    messages = [{"role": "user", "content": f"{SYSTEM_PROMPT}\n\n{USER_TEMPLATE.format(report=report)}"}]
    
    if hasattr(tokenizer, 'apply_chat_template'):
        text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    else:
        text = f"<start_of_turn>user\n{SYSTEM_PROMPT}\n\n{USER_TEMPLATE.format(report=report)}<end_of_turn>\n<start_of_turn>model\n"
    
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=4096)
    inputs = {k: v.to(model.device) for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=MAX_NEW_TOKENS, do_sample=False,
                                  pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id)
    
    response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
    
    # Procura por FINAL: X primeiro
    final_match = re.search(r'FINAL[:\s]*([0-6])', response, re.IGNORECASE)
    if final_match:
        return int(final_match.group(1))
    
    # Fallback: último dígito 0-6 mencionado
    digits = re.findall(r'[0-6]', response)
    if digits:
        return int(digits[-1])
    
    return 2

# Teste
sample = train_df.iloc[0]
print(f"Real: {sample['target']}, Pred: {classify_report(sample['report'], model, tokenizer)}")

In [None]:
# ===== VALIDAÇÃO =====
from sklearn.metrics import f1_score, classification_report

val_sample = train_df.groupby('target', group_keys=False).apply(lambda x: x.sample(min(20, len(x)), random_state=SEED))
val_preds = [classify_report(row['report'], model, tokenizer) for _, row in tqdm(val_sample.iterrows(), total=len(val_sample))]
val_labels = val_sample['target'].values

print(f"\nF1-Macro: {f1_score(val_labels, val_preds, average='macro'):.5f}")
print(classification_report(val_labels, val_preds))

In [None]:
# ===== SUBMISSION =====
test_preds = [classify_report(row['report'], model, tokenizer) for _, row in tqdm(test_df.iterrows(), total=len(test_df))]

submission = pd.DataFrame({'ID': test_df['ID'], 'target': test_preds})
submission.to_csv('submission.csv', index=False)
print(f"Submission salva!")
print(submission['target'].value_counts().sort_index())