# Treinamento Classificação de Idade Óssea (anos)
Notebook adaptado para imagens binárias e classificação discreta em anos.
- CSV contém colunas: `id`, `boneage`
- Ignora coluna `male`
- Relatórios finais: Classification Report + Matriz de Confusão

In [None]:
# =========================================================
# Instalar dependências
# =========================================================
!pip install timm albumentations pandas scikit-learn matplotlib seaborn tqdm torch torchvision

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# =========================================================
# Descompactar zip
# =========================================================

!unzip /content/drive/MyDrive/TCC/famele_380x380.zip -d /content/saida2

In [4]:
import os, gc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from sklearn.metrics import classification_report, confusion_matrix

import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from PIL import Image

import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2

In [None]:
# =========================================================
# Carregar CSV
# =========================================================
csv_path = "/content/famele_gruped_classification_training.csv"  # adapte o caminho
img_dir = "/content/saida2/b380x380"  # adapte o caminho

df = pd.read_csv(csv_path)
df['boneage'] = df['boneage'].astype(int)
print(df.head())
num_classes = df['boneage'].nunique()
print('Número de classes:', num_classes)

In [None]:
# =========================================================
# Data Augmentation com Albumentations
# =========================================================
train_tfms = A.Compose([
    A.Resize(380, 380),
    A.HorizontalFlip(p=0.5),
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.7),
    A.RandomBrightnessContrast(p=0.5),
    A.CLAHE(clip_limit=2.0, p=0.3),
    A.Normalize(mean=(0.5,), std=(0.5,)),
    ToTensorV2()
])

val_tfms = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=(0.5,), std=(0.5,)),
    ToTensorV2()
])

In [7]:
# =========================================================
# Dataset adaptado
# =========================================================
class BoneAgeDataset(Dataset):
    def __init__(self, df, img_dir, transform=None):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(self.img_dir, f"{row['id']}.png")
        image = np.array(Image.open(img_path).convert("L"))  # grayscale

        label = int(row["boneage"])

        if self.transform:
            image = self.transform(image=image)["image"]

        return image, label


In [None]:
# =========================================================
# Split estratificado
# =========================================================
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(
    df,
    test_size=0.2,
    stratify=df['boneage'],
    random_state=42
)

train_ds = BoneAgeDataset(train_df, img_dir, transform=train_tfms)
val_ds = BoneAgeDataset(val_df, img_dir, transform=val_tfms)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=32, shuffle=False)

# =========================================================
# Modelo EfficientNet-B4 com Dropout + Fine-tuning parcial
# =========================================================

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# cria o modelo base
backbone = timm.create_model(
    'efficientnet_b4',
    pretrained=True,
    num_classes=num_classes,
    in_chans=1   # importante se suas imagens forem grayscale
)

# congelar quase tudo
for param in backbone.parameters():
    param.requires_grad = False

# descongelar últimas camadas
for param in backbone.blocks[-1].parameters():   # último bloco do EfficientNet
    param.requires_grad = True
for param in backbone.classifier.parameters():
    param.requires_grad = True

# substituir classifier
in_features = backbone.classifier.in_features
backbone.classifier = nn.Sequential(
    nn.Dropout(0.5),
    nn.Linear(in_features, num_classes)
)

model = backbone.to(device)

# loss e otimizador
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)

# scheduler
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode="min",
    factor=0.5,
    patience=2
)

In [9]:
# =========================================================
# Funções de treino/validação
# =========================================================
def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    running_loss, correct, total = 0.0, 0, 0

    loop = tqdm(loader, desc="Treinando", leave=False)
    for imgs, labels in loop:
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * imgs.size(0)
        _, preds = outputs.max(1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

        # mostra andamento da época
        loop.set_postfix({
            "loss": f"{running_loss/total:.4f}",
            "acc": f"{correct/total:.4f}"
        })

    return running_loss/total, correct/total


def validate(model, loader, criterion, device):
    model.eval()
    running_loss, correct, total = 0.0, 0, 0

    loop = tqdm(loader, desc="Validando", leave=False)
    with torch.no_grad():
        for imgs, labels in loop:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * imgs.size(0)
            _, preds = outputs.max(1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

            # mostra andamento da época
            loop.set_postfix({
                "loss": f"{running_loss/total:.4f}",
                "acc": f"{correct/total:.4f}"
            })

    return running_loss/total, correct/total

In [None]:
# criar a coluna corretamente
df["boneage_year"] = df["boneage"]

# verificar distribuição
print("Distribuição por idade (anos):")
print(df["boneage_year"].value_counts().sort_index())


In [None]:
# =========================================================
# Loop de Treinamento com Early Stopping + Scheduler
# =========================================================
EPOCHS = 100
PATIENCE = 5
best_val_loss = float("inf")
best_acc = 0
epochs_no_improve = 0

history = {"train_loss":[], "val_loss":[], "train_acc":[], "val_acc":[]}

for epoch in range(EPOCHS):
    print(f"\n===== Época {epoch+1}/{EPOCHS} =====", flush=True)

    # Treino
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion, device)

    # Validação
    val_loss, val_acc = validate(model, val_loader, criterion, device)

    history['train_loss'].append(train_loss)
    history['val_loss'].append(val_loss)
    history['train_acc'].append(train_acc)
    history['val_acc'].append(val_acc)

    print(f"Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f} | "
          f"Val Loss: {val_loss:.4f}, Acc: {val_acc:.4f}", flush=True)

    # Scheduler
    scheduler.step(val_loss)
    print(f"Learning Rate atual: {optimizer.param_groups[0]['lr']}", flush=True)

    # Early Stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_acc = val_acc
        torch.save(model.state_dict(), "/content/best_model.pth")
        print("Modelo salvo!", flush=True)
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1
        print(f"Nenhuma melhora em {epochs_no_improve} época(s).", flush=True)

    if epochs_no_improve >= PATIENCE:
        print("Early stopping acionado!", flush=True)
        break


In [None]:
# =========================================================
# Relatórios finais
# =========================================================
model.load_state_dict(torch.load("/content/best_model.pth"))
model.eval()

all_labels, all_preds = [], []
with torch.no_grad():
    for imgs, labels in val_loader:
        imgs = imgs.to(device)
        outputs = model(imgs)
        _, preds = outputs.max(1)
        all_labels.extend(labels.numpy())
        all_preds.extend(preds.cpu().numpy())

print("== Classification Report ==")
print(classification_report(all_labels, all_preds))

cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(10,8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predito')
plt.ylabel('Verdadeiro')
plt.show()