In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
import random

In [2]:
class MURABinaryDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.samples = []
        self.transform = transform

        for root, _, files in os.walk(root_dir):
            for file in files:
                if file.endswith(".png") and "._" not in file:
                    full_path = os.path.join(root, file)
                    label = 1 if "positive" in root.lower() else 0
                    self.samples.append((full_path, label))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, label = self.samples[idx]
        image = Image.open(path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label

In [3]:
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

transform_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

In [4]:
DATA_DIR = "/Users/alvarosanchez/Downloads/MURA-v1.1"

train_dataset = MURABinaryDataset(os.path.join(DATA_DIR, "train"), transform=transform_train)
valid_dataset = MURABinaryDataset(os.path.join(DATA_DIR, "valid"), transform=transform_val)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=32)

print(f"Train imgs: {len(train_dataset)} | Valid imgs: {len(valid_dataset)}")

Train imgs: 36808 | Valid imgs: 3197


In [5]:
model = models.resnet18(weights='IMAGENET1K_V1')
for param in model.parameters():
    param.requires_grad = False

model.fc = nn.Linear(model.fc.in_features, 2)
model = model.to("mps")

In [6]:
from collections import Counter
from torch.nn.functional import cross_entropy

# Configuración
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
EPOCHS = 20
LEARNING_RATE = 1e-4
WEIGHT_DECAY = 1e-4
PATIENCE = 3

# Calcular pesos de clase para pérdida ponderada
labels = [label for _, label in train_dataset]
class_counts = Counter(labels)
total = sum(class_counts.values())
class_weights = [total / class_counts[i] for i in range(len(class_counts))]
class_weights = torch.FloatTensor(class_weights).to(DEVICE)
print(f"Pesos de clase: {class_weights}")

Pesos de clase: tensor([1.6780, 2.4748], device='mps:0')


In [8]:
# Definimos el criterio con pesos para clases desbalanceadas
criterion = nn.CrossEntropyLoss(weight=class_weights)

# Solo entrenamos la última capa (fc) ya que el resto está congelada
optimizer = optim.Adam(model.fc.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

# Reducir el LR si la validación no mejora
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=1)

# Variables para control de entrenamiento
best_val_loss = float('inf')
epochs_no_improve = 0
train_losses, val_losses = [], []

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    avg_train_loss = running_loss / len(train_loader)
    train_losses.append(avg_train_loss)

    # VALIDACIÓN
    model.eval()
    val_loss = 0.0
    y_true, y_pred = [], []
    with torch.no_grad():
        for inputs, labels in valid_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

    avg_val_loss = val_loss / len(valid_loader)
    val_losses.append(avg_val_loss)
    scheduler.step(avg_val_loss)

    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    print(f"Epoch {epoch+1} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Acc: {acc:.4f} | F1: {f1:.4f}")

    # Guardar el mejor modelo
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        epochs_no_improve = 0
        torch.save(model.state_dict(), "/Users/alvarosanchez/ONLINE_DS_THEBRIDGE_ALVAROSMMS-1/ML_Clasificacion_Radiografias_Muscoesqueleticas/src/models/resnet18_optimizedV2.pt")
        print("Mejor modelo guardado.")
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= PATIENCE:
            print("Early stopping activado.")
            break

Epoch 1 | Train Loss: 0.6603 | Val Loss: 0.6595 | Acc: 0.6331 | F1: 0.5857
Mejor modelo guardado.
Epoch 2 | Train Loss: 0.6293 | Val Loss: 0.6919 | Acc: 0.6315 | F1: 0.4987
Epoch 3 | Train Loss: 0.6177 | Val Loss: 0.6493 | Acc: 0.6537 | F1: 0.5831
Mejor modelo guardado.
Epoch 4 | Train Loss: 0.6112 | Val Loss: 0.6498 | Acc: 0.6591 | F1: 0.5915
Epoch 5 | Train Loss: 0.6075 | Val Loss: 0.6540 | Acc: 0.6550 | F1: 0.5759
Epoch 6 | Train Loss: 0.6042 | Val Loss: 0.6371 | Acc: 0.6616 | F1: 0.6054
Mejor modelo guardado.
Epoch 7 | Train Loss: 0.6034 | Val Loss: 0.6431 | Acc: 0.6597 | F1: 0.5885
Epoch 8 | Train Loss: 0.6001 | Val Loss: 0.6490 | Acc: 0.6584 | F1: 0.5758
Epoch 9 | Train Loss: 0.5987 | Val Loss: 0.6318 | Acc: 0.6669 | F1: 0.6100
Mejor modelo guardado.
Epoch 10 | Train Loss: 0.6001 | Val Loss: 0.6296 | Acc: 0.6678 | F1: 0.6161
Mejor modelo guardado.
Epoch 11 | Train Loss: 0.6010 | Val Loss: 0.6341 | Acc: 0.6631 | F1: 0.5989
Epoch 12 | Train Loss: 0.5970 | Val Loss: 0.6488 | Acc: 0.