In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
os.chdir('/content/drive/MyDrive/TFI/') # Cambiar a la ruta donde están los datasets
!pip install -r requirements.txt



In [3]:
import config
import model
import evaluate
import data_loader
import importlib

importlib.reload(config)

importlib.reload(model)

importlib.reload(evaluate)
importlib.reload(data_loader)

<module 'data_loader' from '/content/drive/MyDrive/TFI/data_loader.py'>

In [None]:
from config import TRAIN_CSV, VALID_CSV, BATCH_SIZE, EPOCHS, COMPETITION_TASKS
from data_loader import load_and_preprocess_data, create_dataset
from model import build_model
from evaluate import evaluate_model
import tensorflow as tf
import importlib
importlib.reload(config)

importlib.reload(model)

importlib.reload(evaluate)

def main():
    """Función principal para ejecutar el pipeline de entrenamiento y evaluación."""
    print("Cargando y preprocesando datos...")
    train_df = load_and_preprocess_data(TRAIN_CSV)
    valid_df = load_and_preprocess_data(VALID_CSV)

    # Para una ejecución de prueba más rápida, usemos una fracción menor de los datos.
    # Eliminar estas líneas para ejecutar en el conjunto de datos completo.
    train_df = train_df.sample(frac=0.50, random_state=42)
    valid_df = valid_df.sample(frac=0.50, random_state=42)

    print("Creando datasets...")
    train_dataset = create_dataset(train_df, BATCH_SIZE)
    # Crear un dataset de validación sin mezclar para la evaluación
    valid_dataset = create_dataset(valid_df, BATCH_SIZE, shuffle=False)

    print("Construyendo modelo...")
    model = build_model(len(COMPETITION_TASKS))

    print("Entrenando modelo...")
    model.fit(
      train_dataset,
      epochs=EPOCHS,
      validation_data=valid_dataset
    )

    print("\nEvaluando modelo...")
    evaluate_model(model, valid_dataset, valid_df)

if __name__ == '__main__':
    main()



Cargando y preprocesando datos...
Creando datasets...
Construyendo modelo...
Entrenando modelo...
Epoch 1/8
[1m6982/6982[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m292s[0m 16ms/step - auc: 0.7429 - loss: 0.4742 - val_auc: 0.7926 - val_loss: 0.5990
Epoch 2/8
[1m6982/6982[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m271s[0m 15ms/step - auc: 0.7905 - loss: 0.4418 - val_auc: 0.7771 - val_loss: 0.4915
Epoch 3/8
[1m6982/6982[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m274s[0m 15ms/step - auc: 0.8074 - loss: 0.4279 - val_auc: 0.8129 - val_loss: 0.4703
Epoch 4/8
[1m6982/6982[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m251s[0m 15ms/step - auc: 0.8169 - loss: 0.4181 - val_auc: 0.7895 - val_loss: 0.4878
Epoch 5/8
[1m6982/6982[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m247s[0m 15ms/step - auc: 0.8244 - loss: 0.4115 - val_auc: 0.8078 - val_loss: 0.4666
Epoch 6/8
[1m6982/6982[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m245s[0m 15ms/step - auc: 0.8285 - loss: 0.4070 - v

# **Para encontrar los mejores thresolds**

In [None]:
from config import TRAIN_CSV, VALID_CSV, BATCH_SIZE, EPOCHS, COMPETITION_TASKS
from data_loader import load_and_preprocess_data, create_dataset
from model import build_model
from evaluate import evaluate_model
import tensorflow as tf
import importlib
importlib.reload(config)

importlib.reload(model)

importlib.reload(evaluate)
import numpy as np
from sklearn.metrics import precision_recall_curve

train_df  = load_and_preprocess_data(TRAIN_CSV).sample(frac=0.50, random_state=42)
valid_df  = load_and_preprocess_data(VALID_CSV).sample(frac=0.50, random_state=42)

train_ds  = create_dataset(train_df,  BATCH_SIZE)
valid_ds  = create_dataset(valid_df,  BATCH_SIZE, shuffle=False)

net = build_model(len(COMPETITION_TASKS))
net.fit(train_ds, epochs=EPOCHS, validation_data=valid_ds)

y_pred_probs = net.predict(valid_ds)

best_thr = {}
print("Umbrales optimizados en validación (criterio: F1 máximo)")
for i, task in enumerate(COMPETITION_TASKS):
    y_true = valid_df[task].values
    y_prob = y_pred_probs[:, i]

    pr, rc, thr = precision_recall_curve(y_true, y_prob)
    f1 = 2 * pr * rc / (pr + rc + 1e-8)       # evita división por 0
    idx = np.argmax(f1)                       # índice del F1 máximo
    best_thr[task] = thr[idx]                 # guarda el umbral óptimo

    print(f"{task:15s}:  {thr[idx]:.3f}   (F1 = {f1[idx]:.3f})")

evaluate_model(net, valid_ds, valid_df)

Epoch 1/8


In [None]:
import config
import importlib
importlib.reload(config)
print(config.EPOCHS)

8


# Sección nueva

In [None]:
from sklearn.metrics import precision_recall_curve
import numpy as np

task = 'Cardiomegaly'
i    = COMPETITION_TASKS.index(task)
y_true = valid_df[task].values
y_prob = y_pred_probs[:, i]

pr, rc, thr = precision_recall_curve(y_true, y_prob)
thr = np.append(thr, 1.0)                 # alinear longitudes

target_prec = 0.50                        # mínimo deseado
mask = pr >= target_prec

if mask.any():
    # elige el umbral con mayor recall dentro de Prec ≥ 0.50
    idx = np.argmax(rc[mask])
    new_thr = float(thr[mask][idx])
    new_prec, new_rec = pr[mask][idx], rc[mask][idx]
else:
    # nunca alcanza 0.50 → pon 0.99 y reconsidera entrenamiento
    new_thr, new_prec, new_rec = 0.99, pr[-1], rc[-1]

print(f"Cardiomegaly  nuevo thr = {new_thr:.3f}  |  Prec={new_prec:.3f}  Rec={new_rec:.3f}")

Cardiomegaly  nuevo thr = 0.067  |  Prec=0.500  Rec=0.921
