In [None]:
MODEL_NAME = 'DenseNet121'

In [None]:
# Verificar GPU
import torch
print(f"GPU disponible: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

In [None]:
# Montar Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install SimpleITK scikit-learn

In [None]:
import sys
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from pathlib import Path
from tqdm import tqdm
import json

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
RANDOM_SEED = 42
BATCH_SIZE = 32
NUM_EPOCHS = 20
LEARNING_RATE = 0.01

torch.manual_seed(RANDOM_SEED)
print(f"Device: {DEVICE}")

In [None]:
# Configurar proyecto en Colab: clonar repo y preparar imports
import os, sys, shutil
from pathlib import Path

# Ruta del dataset en Drive (fija)
DATASET_PATH = Path('/content/drive/MyDrive/dataset_node21')

# Clonar repo en Colab si no existe
if not Path('/content/aprendizaje_automatico').exists():
    !git clone https://github.com/joacoesperon/aprendizaje_automatico /content/aprendizaje_automatico

# Ruta del proyecto en Colab
PROJECT_PATH = Path('/content/aprendizaje_automatico')
SRC_IN_COLAB = PROJECT_PATH / 'src'

# Asegurar que src est√© en el PYTHONPATH
sys.path.insert(0, '/content/aprendizaje_automatico')
sys.path.insert(0, '/content')

from src.models import get_model
from src.data_loader import NODE21Dataset, get_train_transforms, get_val_test_transforms
from src.evaluate import evaluate_model, calculate_metrics

print(f"Repo listo en: {PROJECT_PATH}")
print(f"Dataset en Drive: {DATASET_PATH}")

In [None]:
# Cargar metadata y generar splits
METADATA_FILE = DATASET_PATH / 'cxr_images' / 'proccessed_data' / 'metadata.csv'
IMAGE_DIR = DATASET_PATH / 'cxr_images' / 'proccessed_data' / 'images'

meta = pd.read_csv(METADATA_FILE)
unique_imgs = meta[['img_name', 'label']].drop_duplicates()
train_imgs, test_imgs = train_test_split(
    unique_imgs, test_size=0.20, stratify=unique_imgs['label'], random_state=RANDOM_SEED
)
print(f"Train: {len(train_imgs)}, Test: {len(test_imgs)}")

In [None]:
# Preparar datasets
train_dataset = NODE21Dataset(
    image_dir=IMAGE_DIR,
    image_names=train_imgs['img_name'].tolist(),
    labels=train_imgs['label'].tolist(),
    transform=get_train_transforms()
)

test_dataset = NODE21Dataset(
    image_dir=IMAGE_DIR,
    image_names=test_imgs['img_name'].tolist(),
    labels=test_imgs['label'].tolist(),
    transform=get_val_test_transforms()
)

In [None]:
# Entrenamiento simple con early stopping basado en train loss
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

model = get_model(MODEL_NAME, num_classes=2, pretrained=True).to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

best_train_loss = float('inf')
patience_counter = 0
patience = 3  # Parar si el loss no mejora en 3 epochs

print(f"Entrenando {MODEL_NAME} por hasta {NUM_EPOCHS} epochs (early stopping: paciencia={patience})...")

for epoch in range(NUM_EPOCHS):
    model.train()
    train_loss = 0.0
    
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS}"):
        images, labels = images.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * images.size(0)
    
    train_loss = train_loss / len(train_dataset)
    print(f"Epoch {epoch+1}/{NUM_EPOCHS}: train_loss={train_loss:.4f}")
    
    # Early stopping
    if train_loss < best_train_loss:
        best_train_loss = train_loss
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f"Early stopping: loss no mejora desde hace {patience} epochs.")
            break

print("Entrenamiento completado.")

In [None]:
# Evaluar en test set
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
y_true, y_pred, y_probs = evaluate_model(model, test_loader, DEVICE)
metrics = calculate_metrics(y_true, y_pred, y_probs)

print("\nMetricas en Test Set:")
for metric_name, value in metrics.items():
    print(f"  {metric_name}: {value:.4f}")

In [None]:
# Guardar modelo
save_dir = PROJECT_PATH / 'models' / MODEL_NAME
save_dir.mkdir(parents=True, exist_ok=True)
torch.save(model.state_dict(), save_dir / 'best_model.pth')

with open(save_dir / 'metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"Modelo guardado en: {save_dir}")