# Examen Segundo Interciclo — Cuaderno 2  
**Tema:** Predicción con reentrenamiento (feedback) con MLflow  
**Autores:** Zahid Armijos y Cristopher Jara — **Usuario GitHub:** criss16-nimoi  
**Experimento MLflow:** `Examen_Segundo_Interciclo`

> Este cuaderno permite predecir imágenes, corregir etiquetas y reentrenar el modelo guardando cada run en MLflow.


## Objetivo
1) Realizar predicciones sobre nuevas imágenes.  
2) Permitir **feedback** (corrección de etiqueta).  
3) Ejecutar **reentrenamiento incremental** y registrar cada run en **MLflow**.


## Dataset y rutas
Este cuaderno trabaja con:

- Dataset principal: `dataset_animales/raw/{vaca,cerdo,gallina}`
- Imágenes de prueba: `test_images/`
- Feedback (se crea automáticamente): `feedback_images/` y `feedback_labels.csv`
- MLflow tracking: `./mlruns` (en la carpeta del proyecto)

> Si el conteo por clase sale en **0**, la ruta está mal o la carpeta está vacía.


In [40]:
# =========================================================
# CELDA 1) IMPORTS + CONFIG + RUTAS (Examen_Segundo_Interciclo)
# =========================================================
from pathlib import Path
from datetime import datetime
import time
import json
import shutil

import numpy as np
from PIL import Image as PILImage

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.applications.efficientnet import preprocess_input

import mlflow
from mlflow.tracking import MlflowClient


# -------- CONFIG --------
CLASSES = ["vaca", "cerdo", "gallina"]   # en minúsculas (como tus carpetas)
NUM_CLASSES = len(CLASSES)

IMG_SIZE = 224
BATCH_SIZE = 16
SEED = 7

# Proyecto = carpeta donde está abierto el notebook
PROJECT_DIR = Path.cwd().resolve()

DATASET_DIR = PROJECT_DIR / "dataset_animales"
RAW_DIR = DATASET_DIR / "raw"
MODELS_DIR = DATASET_DIR / "models"

TEST_DIR = PROJECT_DIR / "test_images"

FEEDBACK_DIR = DATASET_DIR / "feedback_data"
FEEDBACK_IMG_DIR = FEEDBACK_DIR / "images"
FEEDBACK_LABELS_CSV = FEEDBACK_DIR / "labels.csv"

BEST_MODEL_PATH = MODELS_DIR / "best_multilabel.keras"
THRESH_PATH = MODELS_DIR / "threshold.json"

# MLflow (FILE STORE, sin sqlite) -> evita problemas de schema
MLRUNS_DIR = PROJECT_DIR / "mlruns"
EXPERIMENT_NAME = "Examen_Segundo_Interciclo"

IMG_EXTS = {".jpg",".jpeg",".png",".webp",".bmp",".tif",".tiff"}


def ensure_dirs():
    for p in [DATASET_DIR, RAW_DIR, MODELS_DIR, TEST_DIR, FEEDBACK_DIR, FEEDBACK_IMG_DIR, MLRUNS_DIR]:
        p.mkdir(parents=True, exist_ok=True)

def list_images(folder: Path):
    files = []
    for p in folder.rglob("*"):
        if p.is_file() and p.suffix.lower() in IMG_EXTS:
            files.append(p)
    return sorted(files, key=lambda x: str(x).lower())

def ensure_feedback_csv():
    if not FEEDBACK_LABELS_CSV.exists():
        FEEDBACK_LABELS_CSV.write_text("filename,labels,timestamp\n", encoding="utf-8")

ensure_dirs()

print("CWD:", PROJECT_DIR)
print("RAW_DIR exists?", RAW_DIR.exists())
print("TEST_DIR exists?", TEST_DIR.exists())
print("CLASSES:", CLASSES)


CWD: C:\Users\leoos\Examen_Segundo_Interciclo
RAW_DIR exists? True
TEST_DIR exists? True
CLASSES: ['vaca', 'cerdo', 'gallina']


In [64]:
from pathlib import Path
import time, json, tempfile, shutil
import numpy as np
import mlflow
from mlflow.tracking import MlflowClient
import tensorflow as tf
from tensorflow import keras

# ========= CONFIG MLflow (FIJO) =========
PROJECT_DIR = Path.cwd()
MLRUNS_DIR = PROJECT_DIR / "mlruns"
EXPERIMENT_NAME = "Examen_Segundo_Interciclo"

def setup_mlflow():
    """
    Usa SIEMPRE file store (mlruns) para que lo veas en MLflow UI con:
    mlflow ui --backend-store-uri "file:///.../mlruns"
    """
    tracking_uri = MLRUNS_DIR.resolve().as_uri()  # file:///C:/.../mlruns
    mlflow.set_tracking_uri(tracking_uri)
    mlflow.set_experiment(EXPERIMENT_NAME)
    return tracking_uri

def safe_log_keras_model(model: keras.Model, name: str = "model"):
    """
    Loguea el modelo sin signature/input_example (evita FileNotFoundError en Temp).
    Si mlflow.keras.log_model falla por compatibilidad, hace fallback y lo sube como artifacts.
    """
    pip_reqs = [
        f"tensorflow=={tf.__version__}",
        "numpy",
        f"mlflow=={mlflow.__version__}",
    ]
    try:
        # MLflow nuevo (recomendado)
        mlflow.keras.log_model(model, name=name, pip_requirements=pip_reqs)
        return
    except TypeError:
        # MLflow viejo (compatibilidad)
        mlflow.keras.log_model(model, artifact_path=name, pip_requirements=pip_reqs)
        return
    except Exception as e:
        # Fallback: guardo en carpeta temporal y lo subo como artefacto normal
        tmp_dir = Path(tempfile.mkdtemp())
        out_dir = tmp_dir / name
        out_dir.mkdir(parents=True, exist_ok=True)
        try:
            model.save(out_dir / "model.keras")
            mlflow.log_artifacts(str(out_dir), artifact_path=name)
        finally:
            shutil.rmtree(tmp_dir, ignore_errors=True)

def train_and_log(run_name: str, model: keras.Model, train_ds, val_ds, epochs: int, lr: float, note: str,
                  CLASSES=None, THRESH_PATH: Path=None, BEST_MODEL_PATH: Path=None):
    """
    Entrena, registra métricas, guarda threshold.json y guarda el modelo en MLflow.
    IMPORTANTE: retorna float (macro_f1), NO retorna History.
    """
    setup_mlflow()

    with mlflow.start_run(run_name=run_name):
        mlflow.set_tag("note", note)
        mlflow.log_param("epochs", epochs)
        mlflow.log_param("lr", lr)
        if CLASSES is not None:
            mlflow.log_param("classes", "|".join(CLASSES))

        # Entrenamiento
        hist = model.fit(train_ds, validation_data=val_ds, epochs=epochs, verbose=1)

        # Log últimas métricas del history
        last = {k: float(v[-1]) for k, v in hist.history.items() if len(v)}
        for k, v in last.items():
            mlflow.log_metric(k.replace("@","_"), v)

        # Guardar modelo "best" local (si lo usas en tu flujo)
        if BEST_MODEL_PATH is not None:
            BEST_MODEL_PATH.parent.mkdir(parents=True, exist_ok=True)
            model.save(BEST_MODEL_PATH)

        # Guardar thresholds si existe THRESH_PATH (opcional)
        if THRESH_PATH is not None:
            THRESH_PATH.parent.mkdir(parents=True, exist_ok=True)
            # Si tú ya calculas thresholds en otra parte, aquí solo se asegura que exista
            if not THRESH_PATH.exists():
                obj = {"thresholds": {}, "created_at": time.strftime("%Y-%m-%d %H:%M:%S")}
                THRESH_PATH.write_text(json.dumps(obj, indent=2, ensure_ascii=False), encoding="utf-8")
            # IMPORTANTE: sin artifact_path="artifacts" para que NO cree artifacts/artifacts/
            mlflow.log_artifact(str(THRESH_PATH))

        # Guardar modelo en MLflow (aparece en el RUN -> Artifacts -> model/)
        safe_log_keras_model(model, name="model")

        # Macro-F1 si existe en history (si no, devuelvo 0.0)
        macro_f1 = 0.0
        for key in ["val_f1_macro", "f1_macro", "val_f1", "f1"]:
            if key in hist.history:
                macro_f1 = float(np.max(hist.history[key]))
                break

        mlflow.log_metric("macro_f1_reported", macro_f1)
        return macro_f1

# ========= Verificación rápida =========
tracking_uri = setup_mlflow()
client = MlflowClient()
print(" Tracking URI:", mlflow.get_tracking_uri())
print(" Experimentos:", [e.name for e in client.search_experiments()])
print(" Para abrir la UI, usa este comando:")
print(f'mlflow ui --backend-store-uri "{MLRUNS_DIR.resolve().as_uri()}" --port 5000')


 Tracking URI: file:///C:/Users/leoos/Examen_Segundo_Interciclo/mlruns
 Experimentos: ['Examen_Segundo_Interciclo']
 Para abrir la UI, usa este comando:
mlflow ui --backend-store-uri "file:///C:/Users/leoos/Examen_Segundo_Interciclo/mlruns" --port 5000


In [65]:
# =========================================================
# CELDA 2) SETUP MLFLOW (Experimento: Examen_Segundo_Interciclo)
# =========================================================
def setup_mlflow():
    ensure_dirs()

    # tracking uri tipo file:///.../mlruns
    mlflow.set_tracking_uri(MLRUNS_DIR.as_uri())

    client = MlflowClient()
    exp = client.get_experiment_by_name(EXPERIMENT_NAME)
    if exp is None:
        exp_id = client.create_experiment(EXPERIMENT_NAME)
    else:
        exp_id = exp.experiment_id

    mlflow.set_experiment(EXPERIMENT_NAME)
    return exp_id

def safe_log_keras_model(model: keras.Model, name: str = "model"):
    """
    Versión estable en Windows:
    - No usa input_example ni signature (evita FileNotFoundError en Temp)
    - Fija pip_requirements para evitar el warning de inferencia
    - Fallback: si MLflow falla igual, guarda el .keras como artifact
    """
    pip_reqs = [
        f"tensorflow=={tf.__version__}",
        f"keras=={keras.__version__}",
        f"mlflow=={mlflow.__version__}",
        "numpy",
        "pillow",
    ]

    try:
        # MLflow nuevo: usa name=
        mlflow.keras.log_model(
            model,
            name=name,
            pip_requirements=pip_reqs,
        )
        return

    except TypeError:
        # MLflow viejo: usa artifact_path=
        mlflow.keras.log_model(
            model,
            artifact_path=name,
            pip_requirements=pip_reqs,
        )
        return

    except FileNotFoundError as e:
        print("⚠️ MLflow falló guardando el modelo (tema Temp en Windows). Haré fallback a artifact .keras.")
        tmp_path = PROJECT_DIR / "_model_fallback.keras"
        model.save(tmp_path)
        mlflow.log_artifact(str(tmp_path), artifact_path="model_fallback")
        try:
            tmp_path.unlink()
        except Exception:
            pass
        print("✅ Fallback guardado en artifacts/model_fallback")


In [66]:
# =========================================================
# CELDA 3) DATASET: RAW RECORDS (raw/vaca, raw/cerdo, raw/gallina)
# =========================================================
def build_raw_records(train_split=0.8):
    rng = np.random.default_rng(SEED)
    train, val = [], []

    for cls_idx, cls in enumerate(CLASSES):
        cls_dir = RAW_DIR / cls
        imgs = list_images(cls_dir)

        rng.shuffle(imgs)
        n_train = int(len(imgs) * train_split)

        for i, p in enumerate(imgs):
            y = np.zeros((NUM_CLASSES,), dtype=np.float32)
            y[cls_idx] = 1.0
            item = (str(p), y)
            (train if i < n_train else val).append(item)

    rng.shuffle(train)
    rng.shuffle(val)
    return train, val

raw_train, raw_val = build_raw_records(train_split=0.8)
print("Raw train:", len(raw_train), "| Raw val:", len(raw_val))


Raw train: 122 | Raw val: 33


In [67]:
# =========================================================
# CELDA 4) FEEDBACK: GUARDAR Y CARGAR (multi-label)
# =========================================================
def load_feedback_records():
    ensure_feedback_csv()
    if not FEEDBACK_LABELS_CSV.exists():
        return []

    lines = FEEDBACK_LABELS_CSV.read_text(encoding="utf-8").strip().splitlines()
    if len(lines) <= 1:
        return []

    recs = []
    for row in lines[1:]:
        parts = row.split(",", 2)
        if len(parts) < 3:
            continue

        filename, labels_csv, ts = parts[0], parts[1], parts[2]
        img_path = FEEDBACK_IMG_DIR / filename
        if not img_path.exists():
            continue

        labels = labels_csv.split("|") if labels_csv else []
        y = np.zeros((NUM_CLASSES,), dtype=np.float32)
        for lab in labels:
            if lab in CLASSES:
                y[CLASSES.index(lab)] = 1.0

        recs.append((str(img_path), y))
    return recs

def save_feedback(img_path: Path, selected_labels):
    ensure_dirs()
    ensure_feedback_csv()

    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
    out_name = f"{img_path.stem}_{ts}{img_path.suffix.lower()}"
    out_path = FEEDBACK_IMG_DIR / out_name
    shutil.copy2(img_path, out_path)

    labels_csv = "|".join(selected_labels)
    with open(FEEDBACK_LABELS_CSV, "a", encoding="utf-8") as f:
        f.write(f"{out_name},{labels_csv},{ts}\n")

    return out_path

print("Feedback actual:", len(load_feedback_records()))


Feedback actual: 6


In [68]:
# =========================================================
# CELDA 5) TF PIPELINE (decode + tf.data)
# =========================================================
def decode_image(path: tf.Tensor) -> tf.Tensor:
    img = tf.io.read_file(path)
    img = tf.image.decode_image(img, channels=3, expand_animations=False)
    img.set_shape([None, None, 3])
    img = tf.image.resize(img, [IMG_SIZE, IMG_SIZE])
    img = tf.cast(img, tf.float32)
    img = preprocess_input(img)  # EfficientNet preprocess
    return img

def make_ds(records, training: bool, shuffle: bool):
    paths = np.array([r[0] for r in records], dtype=object)
    labels = np.stack([r[1] for r in records]).astype(np.float32)

    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    if shuffle and training:
        ds = ds.shuffle(buffer_size=min(len(records), 2000), seed=SEED, reshuffle_each_iteration=True)

    ds = ds.map(lambda p, y: (decode_image(p), y), num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
    return ds


In [69]:
# =========================================================
# CELDA 6) MÉTRICAS: F1Macro + THRESHOLDS (macro-f1 real)
# =========================================================
class F1Macro(tf.keras.metrics.Metric):
    def __init__(self, num_classes: int, threshold: float = 0.35, name="f1_macro", **kwargs):
        super().__init__(name=name, **kwargs)
        self.num_classes = num_classes
        self.threshold = threshold
        self.tp = self.add_weight(shape=(num_classes,), initializer="zeros", name="tp")
        self.fp = self.add_weight(shape=(num_classes,), initializer="zeros", name="fp")
        self.fn = self.add_weight(shape=(num_classes,), initializer="zeros", name="fn")

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = tf.cast(y_true, tf.float32)
        y_hat = tf.cast(y_pred >= self.threshold, tf.float32)
        tp = tf.reduce_sum(y_true * y_hat, axis=0)
        fp = tf.reduce_sum((1.0 - y_true) * y_hat, axis=0)
        fn = tf.reduce_sum(y_true * (1.0 - y_hat), axis=0)
        self.tp.assign_add(tp)
        self.fp.assign_add(fp)
        self.fn.assign_add(fn)

    def result(self):
        precision = self.tp / (self.tp + self.fp + 1e-7)
        recall = self.tp / (self.tp + self.fn + 1e-7)
        f1 = 2 * precision * recall / (precision + recall + 1e-7)
        return tf.reduce_mean(f1)

    def reset_states(self):
        for v in [self.tp, self.fp, self.fn]:
            v.assign(tf.zeros_like(v))


def evaluate_probs(model: keras.Model, ds):
    y_true_list, y_prob_list = [], []
    for xb, yb in ds:
        prob = model.predict(xb, verbose=0)
        y_true_list.append(yb.numpy())
        y_prob_list.append(prob)
    y_true = np.vstack(y_true_list) if y_true_list else np.zeros((0, NUM_CLASSES), dtype=np.float32)
    y_prob = np.vstack(y_prob_list) if y_prob_list else np.zeros((0, NUM_CLASSES), dtype=np.float32)
    return y_true, y_prob

def metrics_at_thresholds(y_true: np.ndarray, y_prob: np.ndarray, thr: np.ndarray):
    y_hat = (y_prob >= thr.reshape(1, -1)).astype(int)
    yt = y_true.astype(int)

    tp = np.sum((yt == 1) & (y_hat == 1), axis=0)
    fp = np.sum((yt == 0) & (y_hat == 1), axis=0)
    fn = np.sum((yt == 1) & (y_hat == 0), axis=0)

    prec = tp / (tp + fp + 1e-7)
    rec  = tp / (tp + fn + 1e-7)
    f1   = 2 * prec * rec / (prec + rec + 1e-7)
    macro_f1 = float(np.mean(f1)) if f1.size else 0.0
    return prec, rec, f1, macro_f1

def find_best_thresholds(y_true: np.ndarray, y_prob: np.ndarray):
    thrs = np.full((NUM_CLASSES,), 0.35, dtype=np.float32)
    grid = np.linspace(0.05, 0.95, 19)

    for c in range(NUM_CLASSES):
        best_f1 = -1.0
        best_t = 0.35
        yt = y_true[:, c].astype(int)
        yp = y_prob[:, c]
        for t in grid:
            yh = (yp >= t).astype(int)
            tp = np.sum((yt == 1) & (yh == 1))
            fp = np.sum((yt == 0) & (yh == 1))
            fn = np.sum((yt == 1) & (yh == 0))
            prec = tp / (tp + fp + 1e-7)
            rec  = tp / (tp + fn + 1e-7)
            f1 = 2 * prec * rec / (prec + rec + 1e-7)
            if f1 > best_f1:
                best_f1 = f1
                best_t = float(t)
        thrs[c] = best_t
    return thrs

def save_thresholds(thr: np.ndarray):
    obj = {
        "classes": CLASSES,
        "thresholds": {CLASSES[i]: float(thr[i]) for i in range(NUM_CLASSES)},
        "created_at": datetime.now().isoformat(timespec="seconds"),
    }
    THRESH_PATH.write_text(json.dumps(obj, indent=2, ensure_ascii=False), encoding="utf-8")

def load_thresholds_or_default():
    if THRESH_PATH.exists():
        cfg = json.loads(THRESH_PATH.read_text(encoding="utf-8"))
        thr_dict = cfg.get("thresholds", {})
        return np.array([float(thr_dict.get(c, 0.35)) for c in CLASSES], dtype=np.float32)
    return np.full((NUM_CLASSES,), 0.35, dtype=np.float32)


In [70]:
# =========================================================
# CELDA 7) MODELO + LOAD BEST
# =========================================================
def build_model(lr=1e-3, threshold_metric=0.35):
    base = keras.applications.EfficientNetB0(
        include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3), weights="imagenet"
    )
    base.trainable = False

    inp = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    x = base(inp, training=False)
    x = keras.layers.GlobalAveragePooling2D()(x)
    x = keras.layers.Dropout(0.25)(x)
    out = keras.layers.Dense(NUM_CLASSES, activation="sigmoid")(x)

    model = keras.Model(inp, out)
    model.compile(
        optimizer=keras.optimizers.Adam(lr),
        loss="binary_crossentropy",
        metrics=[
            tf.keras.metrics.BinaryAccuracy(name="bin_acc", threshold=threshold_metric),
            tf.keras.metrics.AUC(curve="PR", name="auc_pr"),
            tf.keras.metrics.Precision(name="precision", thresholds=threshold_metric),
            tf.keras.metrics.Recall(name="recall", thresholds=threshold_metric),
            F1Macro(NUM_CLASSES, threshold=threshold_metric, name="f1_macro"),
        ],
    )
    return model

def finetune_unfreeze(model: keras.Model, n_layers: int = 30):
    backbone = None
    for layer in model.layers:
        if isinstance(layer, keras.Model) and "efficientnet" in layer.name:
            backbone = layer
            break
    if backbone is None:
        return
    backbone.trainable = True
    for l in backbone.layers[:-n_layers]:
        l.trainable = False

def load_best_model_if_exists():
    if BEST_MODEL_PATH.exists():
        m = keras.models.load_model(BEST_MODEL_PATH, compile=False)
        m.compile(
            optimizer=keras.optimizers.Adam(1e-4),
            loss="binary_crossentropy",
            metrics=[
                tf.keras.metrics.BinaryAccuracy(name="bin_acc", threshold=0.35),
                tf.keras.metrics.AUC(curve="PR", name="auc_pr"),
                tf.keras.metrics.Precision(name="precision", thresholds=0.35),
                tf.keras.metrics.Recall(name="recall", thresholds=0.35),
                F1Macro(NUM_CLASSES, threshold=0.35, name="f1_macro"),
            ],
        )
        return m
    return build_model(lr=1e-3, threshold_metric=0.35)

thr_vec = load_thresholds_or_default()
model_ui = load_best_model_if_exists()

print("Modelo listo:", BEST_MODEL_PATH.exists(), "| Thresholds:", THRESH_PATH.exists())


Modelo listo: True | Thresholds: True


In [71]:
# =========================================================
# CELDA 8) TRAIN + LOG MLFLOW (CORREGIDO: devuelve macro_f1 float)
# =========================================================
def train_and_log(run_name: str, model: keras.Model, train_ds, val_ds, epochs: int, lr: float, note: str):
    setup_mlflow()

    with mlflow.start_run(run_name=run_name):
        mlflow.set_tag("note", note)
        mlflow.log_param("epochs", epochs)
        mlflow.log_param("lr", lr)
        mlflow.log_param("img_size", IMG_SIZE)
        mlflow.log_param("batch_size", BATCH_SIZE)
        mlflow.log_param("classes", "|".join(CLASSES))

        hist = model.fit(train_ds, validation_data=val_ds, epochs=epochs, verbose=1)

        # log últimas métricas
        last = {k: float(v[-1]) for k, v in hist.history.items() if len(v)}
        for k, v in last.items():
            mlflow.log_metric(k.replace("@", "_"), v)

        # thresholds óptimos en val
        y_true, y_prob = evaluate_probs(model, val_ds)
        thr = find_best_thresholds(y_true, y_prob)
        save_thresholds(thr)

        # macro-f1 real con esos thresholds
        prec, rec, f1, macro_f1 = metrics_at_thresholds(y_true, y_prob, thr)

        mlflow.log_metric("val_macro_f1_best_thr", float(macro_f1))
        for i, c in enumerate(CLASSES):
            mlflow.log_metric(f"val_f1_{c}", float(f1[i]))
            mlflow.log_metric(f"thr_{c}", float(thr[i]))

        # guardar best
        model.save(BEST_MODEL_PATH)
        mlflow.log_artifact(str(THRESH_PATH), artifact_path="artifacts")
        safe_log_keras_model(model, name="model")

        return float(macro_f1)


In [72]:
# =========================================================
# CELDA 9) ENTRENAMIENTO BASE (si no existe best_multilabel.keras)
# =========================================================
if not BEST_MODEL_PATH.exists():
    print("No existe BEST_MODEL_PATH -> entrenando modelo BASE...")

    train_ds = make_ds(raw_train, training=True, shuffle=True)
    val_ds = make_ds(raw_val, training=False, shuffle=False)

    model_base = build_model(lr=1e-3, threshold_metric=0.35)

    macro_f1 = train_and_log(
        run_name=f"train_base_{time.strftime('%Y%m%d_%H%M%S')}",
        model=model_base,
        train_ds=train_ds,
        val_ds=val_ds,
        epochs=8,
        lr=1e-3,
        note="Entrenamiento base inicial"
    )

    print(f"Base listo. Macro-F1(val, best thr)={macro_f1:.4f}")

# Recargar
thr_vec = load_thresholds_or_default()
model_ui = load_best_model_if_exists()
print("Modelo listo:", BEST_MODEL_PATH.exists(), "| Thresholds:", THRESH_PATH.exists())


Modelo listo: True | Thresholds: True


## Predicción
Selecciona una imagen y el sistema mostrará:
- Probabilidades por clase
- Predicción final
- Opción de corrección (feedback)


## Reentrenamiento incremental
Al reentrenar:
- Se mezclan datos originales + feedback
- Se ajusta el número de epochs según cantidad de feedback
- Se guarda el mejor modelo y se registra un nuevo run en MLflow


In [75]:
# =========================================================
# CELDA 10) PREDICCIÓN + UI (solo test_images) + FEEDBACK + REENTRENAR
# =========================================================
import ipywidgets as widgets
from IPython.display import display, clear_output

def predict_one(model, img_path: Path):
    x = decode_image(tf.convert_to_tensor(str(img_path)))
    x = tf.expand_dims(x, 0)
    prob = model.predict(x, verbose=0)[0]
    return prob

def pretty_print_probs(prob, thr):
    order = np.argsort(-prob)
    lines = []
    for i in order:
        estado = "Existe" if prob[i] >= thr[i] else "No existe"
        lines.append(f"{CLASSES[i]:10s} | {estado:9s} | prob={prob[i]:.3f} | thr={thr[i]:.2f}")
    return "\n".join(lines)

def detected_labels(prob, thr):
    labs = [CLASSES[i] for i in range(NUM_CLASSES) if prob[i] >= thr[i]]
    if len(labs) == 0:
        labs = [CLASSES[int(np.argmax(prob))]]
    return labs[:3]

def compute_retrain_epochs(n_fb: int) -> int:
    if n_fb <= 2: return 2
    if n_fb <= 5: return 4
    if n_fb <= 10: return 6
    return 8

def compute_feedback_boost(n_fb: int) -> int:
    if n_fb < 5: return 6
    if n_fb < 10: return 4
    return 2

def show_preview(img_path: Path, max_side=360):
    try:
        im = PILImage.open(img_path).convert("RGB")
        w, h = im.size
        scale = max(w, h) / max_side
        if scale > 1:
            im = im.resize((int(w/scale), int(h/scale)))
        display(im)
    except Exception as e:
        print("No se pudo mostrar la imagen:", e)

# Widgets
imgs = list_images(TEST_DIR)
img_dd = widgets.Dropdown(
    options=[(p.name, str(p)) for p in imgs] if imgs else [("No hay imágenes en test_images", "")],
    description="Imagen:",
    layout=widgets.Layout(width="65%")
)

btn_refresh = widgets.Button(description="Refrescar", button_style="")
btn_predict = widgets.Button(description="Predecir", button_style="info")
btn_save = widgets.Button(description="Guardar corrección", button_style="warning")
btn_retrain = widgets.Button(description="Reentrenar con feedback", button_style="success")

labels_select = widgets.SelectMultiple(
    options=CLASSES,
    description="Correcta:",
    layout=widgets.Layout(width="65%", height="110px")
)

out = widgets.Output()

_last_img_path = None

def get_selected_image_path():
    global _last_img_path
    if img_dd.value:
        p = Path(img_dd.value)
        if p.exists():
            _last_img_path = p
            return p
    return _last_img_path

def on_refresh(_):
    with out:
        clear_output()
        imgs2 = list_images(TEST_DIR)
        img_dd.options = [(p.name, str(p)) for p in imgs2] if imgs2 else [("No hay imágenes en test_images", "")]
        print(" Lista actualizada. Imágenes:", len(imgs2))

btn_refresh.on_click(on_refresh)

def on_dropdown_change(change):
    if change.get("name") != "value":
        return
    with out:
        clear_output()
        if not change["new"]:
            print("Selecciona una imagen.")
            return
        p = Path(change["new"])
        show_preview(p)
        print(f"Seleccionada: {p.name}")
        print("Presiona **Predecir**.")

img_dd.observe(on_dropdown_change, names="value")

def on_predict(_):
    global model_ui, thr_vec
    with out:
        clear_output()
        img_path = get_selected_image_path()
        if img_path is None:
            print("No hay imagen seleccionada.")
            return

        show_preview(img_path)
        prob = predict_one(model_ui, img_path)

        print("\nPredicción (multi-label):")
        print(pretty_print_probs(prob, thr_vec))

        labs = detected_labels(prob, thr_vec)
        print("\nDetectado:", labs)

btn_predict.on_click(on_predict)

def on_save(_):
    with out:
        clear_output()
        img_path = get_selected_image_path()
        if img_path is None:
            print("No hay imagen seleccionada.")
            return

        show_preview(img_path)

        sel = list(labels_select.value)
        if len(sel) == 0:
            print("Selecciona al menos 1 etiqueta correcta.")
            return

        saved_path = save_feedback(img_path, sel)
        print(" Corrección guardada:", saved_path.name)
        print("Etiquetas:", sel)
        print("Ahora puedes presionar **Reentrenar con feedback**.")

btn_save.on_click(on_save)

def on_retrain(_):
    global model_ui, thr_vec
    with out:
        clear_output()

        fb = load_feedback_records()
        if len(fb) == 0:
            print("No hay feedback aún. Primero guarda correcciones.")
            return

        boost = compute_feedback_boost(len(fb))
        fb_aug = fb * boost

        raw_train, raw_val = build_raw_records(train_split=0.8)
        retrain_records = raw_train + fb_aug

        train_ds = make_ds(retrain_records, training=True, shuffle=True)
        val_ds = make_ds(raw_val, training=False, shuffle=False)

        model_ui = load_best_model_if_exists()

        if len(fb) >= 10:
            finetune_unfreeze(model_ui, n_layers=30)
            lr = 1e-4
            note = f"Reentreno fine-tune. fb={len(fb)} boost={boost}"
        else:
            lr = 2e-4
            note = f"Reentreno head. fb={len(fb)} boost={boost}"

        model_ui.compile(
            optimizer=keras.optimizers.Adam(lr),
            loss="binary_crossentropy",
            metrics=[
                tf.keras.metrics.BinaryAccuracy(name="bin_acc", threshold=0.35),
                tf.keras.metrics.AUC(curve="PR", name="auc_pr"),
                tf.keras.metrics.Precision(name="precision", thresholds=0.35),
                tf.keras.metrics.Recall(name="recall", thresholds=0.35),
                F1Macro(NUM_CLASSES, threshold=0.35, name="f1_macro"),
            ]
        )

        epochs = compute_retrain_epochs(len(fb))
        print(f" Reentrenando... epochs={epochs} | fb={len(fb)} | boost={boost} | lr={lr}")

        macro_f1 = train_and_log(
            run_name=f"retrain_{time.strftime('%Y%m%d_%H%M%S')}",
            model=model_ui,
            train_ds=train_ds,
            val_ds=val_ds,
            epochs=epochs,
            lr=lr,
            note=note
        )

        thr_vec = load_thresholds_or_default()
        model_ui = load_best_model_if_exists()

        print(f" Listo. Macro-F1(val, best thr)={macro_f1:.4f}")
        print("Thresholds recargados. Ahora vuelve a presionar **Predecir**.")

btn_retrain.on_click(on_retrain)

ui = widgets.VBox([
    widgets.HBox([img_dd, btn_refresh, btn_predict]),
    labels_select,
    widgets.HBox([btn_save, btn_retrain]),
    out
])

display(ui)

# preview inicial
if img_dd.value:
    on_dropdown_change({"name":"value", "new": img_dd.value})


VBox(children=(HBox(children=(Dropdown(description='Imagen:', layout=Layout(width='65%'), options=(('Clasifica…

In [74]:
import mlflow, os
from pathlib import Path

print("CWD:", Path.cwd())
print("Tracking URI:", mlflow.get_tracking_uri())

print("Existe ./mlruns ?", (Path.cwd()/ "mlruns").exists())
print("Existe ./mlflow.db ?", (Path.cwd()/ "mlflow.db").exists())


CWD: C:\Users\leoos\Examen_Segundo_Interciclo
Tracking URI: file:///C:/Users/leoos/Examen_Segundo_Interciclo/mlruns
Existe ./mlruns ? True
Existe ./mlflow.db ? True
