# Tercer notebook
En este notebook se uniran los archivos json creados con el dataset reducido para el entreno, se usara un modelo preentrenado Faster R-CNN por su precision y ahorro de recursos, ademas de empezar el registro de modelos en MLFLOW

## Objetivos
1. Cargar el dataset reducido generado en `data/processed/`.
2. Crear un DataLoader compatible con modelos de detección (torchvision).
3. Construir un modelo preentrenado (Faster R-CNN) y adaptar la cabeza a 3 clases.
4. Entrenar en CPU con configuración eficiente.
5. Registrar en MLflow:
   - parámetros
   - métricas
   - artefactos (pesos, labelmap, config)
6. Guardar un checkpoint local para usar en inferencia y en reentrenamiento.


In [None]:
"""
- Importa librerías necesarias para el entrenamiento.
"""

import os
import json
import time
from pathlib import Path
from datetime import datetime
from typing import Dict, List

import random
import numpy as np

import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image

from tqdm import tqdm

import mlflow

import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F


1. Carga de archivos .json y validacion de estructura del proyecto

In [None]:
"""
En esta secccion se  Carga project_config.json y labelmap.json creados en el Notebook 01.
Aemas define rutas absolutas sin depender de la carpeta actual del notebook.
"""

def find_project_root(start: Path, max_up: int = 8) -> Path:
    cur = start.resolve()
    for _ in range(max_up):
        if (cur / "data" / "processed" / "project_config.json").exists():
            return cur
        cur = cur.parent
    raise FileNotFoundError("No se encontró data/processed/project_config.json. Ejecuta Notebook 01.")

PROJECT_ROOT = find_project_root(Path.cwd())
PROCESSED_DIR = (PROJECT_ROOT / "data" / "processed").resolve()

PROJECT_CONFIG_PATH = (PROCESSED_DIR / "project_config.json").resolve()
LABELMAP_PATH = (PROCESSED_DIR / "labelmap.json").resolve()

with open(PROJECT_CONFIG_PATH, "r", encoding="utf-8") as f:
    project_config = json.load(f)

with open(LABELMAP_PATH, "r", encoding="utf-8") as f:
    labelmap = json.load(f)

COCO_ROOT = Path(project_config["coco_root"])
TRAIN_IMG_DIR = Path(project_config["train_dir"])
VAL_IMG_DIR = Path(project_config["val_dir"])

TARGET_CLASSES = project_config["target_classes"]
target_cat_ids = labelmap["target_category_ids"]

TRAIN_JSON = (PROCESSED_DIR / "coco_person_car_airplane_train.json").resolve()
VAL_JSON = (PROCESSED_DIR / "coco_person_car_airplane_val.json").resolve()

MODELS_DIR = (PROJECT_ROOT / "models" / "local_checkpoints").resolve()
MODELS_DIR.mkdir(parents=True, exist_ok=True)

print("Mismo contenido visto anteriormente, solo para confirmar las carpetas ")
print("PROJECT_ROOT:", PROJECT_ROOT)
print("TRAIN_JSON:", TRAIN_JSON)
print("VAL_JSON  :", VAL_JSON)
print("TARGET_CLASSES:", TARGET_CLASSES)
print("target_cat_ids:", target_cat_ids)
print("MODELS_DIR:", MODELS_DIR)


PROJECT_ROOT: C:\Users\Johnny\Desktop\IA-final
TRAIN_JSON: C:\Users\Johnny\Desktop\IA-final\data\processed\coco_person_car_airplane_train.json
VAL_JSON  : C:\Users\Johnny\Desktop\IA-final\data\processed\coco_person_car_airplane_val.json
TARGET_CLASSES: ['person', 'car', 'airplane']
target_cat_ids: [1, 3, 5]
MODELS_DIR: C:\Users\Johnny\Desktop\IA-final\models\local_checkpoints


2. Configuracion de entrenamiento limitado

In [None]:
"""
En esta seccion se configurara el entrenaimeito en cpu, dado que no se cuenta con un gpu potente en la maquina donde se entrenara
pero si cuentas con grafica de buena calidad puedes cambiar estos parametros para que tu entrenamiento se da mas rapido, ademas
se establece threads y semillas para reproducibilidad, para evitar que toma imagenes aleatorias y se definen hiperparámetros conservadores para CPU.
"""

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

DEVICE = torch.device("cpu")

CPU_NUM_THREADS = max(1, (os.cpu_count() or 2) - 1)
torch.set_num_threads(CPU_NUM_THREADS)

CONFIG = {
    "seed": SEED,
    "device": str(DEVICE),
    "cpu_num_threads": CPU_NUM_THREADS,
    "batch_size": 2,
    "num_workers": 0,
    "learning_rate": 1e-4,
    "weight_decay": 1e-4,
    "epochs": 2,
    "score_threshold": 0.5,
    "model_name": "fasterrcnn_resnet50_fpn",
    "pretrained_weights": "DEFAULT",
}

print("CONFIG:")
for k, v in CONFIG.items():
    print(f"{k}: {v}")


CONFIG:
seed: 42
device: cpu
cpu_num_threads: 11
batch_size: 2
num_workers: 0
learning_rate: 0.0001
weight_decay: 0.0001
epochs: 2
score_threshold: 0.5
model_name: fasterrcnn_resnet50_fpn
pretrained_weights: DEFAULT


In [None]:
"""
Aqui principalemnte se verifica que TRAIN_JSON y VAL_JSON existan, ademas se usa el limite ya establecido de  cuántas imágenes contienen
y en caso de ocurrir algo anormal, boquea ejecución si el train supera un límite 
"""

def assert_file(path: Path, label: str) -> None:
    if not path.exists():
        raise FileNotFoundError(f"Archivo requerido no encontrado: {label}: {path}")

assert_file(TRAIN_JSON, "TRAIN_JSON reducido (Notebook 02)")
assert_file(VAL_JSON, "VAL_JSON reducido (Notebook 02)")

with open(TRAIN_JSON, "r", encoding="utf-8") as f:
    train_coco = json.load(f)

with open(VAL_JSON, "r", encoding="utf-8") as f:
    val_coco = json.load(f)

train_images_n = len(train_coco["images"])
val_images_n = len(val_coco["images"])

print("TRAIN images:", train_images_n)
print("VAL images  :", val_images_n)

MAX_ALLOWED_TRAIN_IMAGES = 10000  # seguridad dura
if train_images_n > MAX_ALLOWED_TRAIN_IMAGES:
    raise RuntimeError(
        f"TRAIN_JSON tiene {train_images_n} imágenes, supera el límite {MAX_ALLOWED_TRAIN_IMAGES}. "
        "Regenera el JSON en Notebook 02 con recorte."
    )


TRAIN images: 3300
VAL images  : 500


In [None]:
"""
En esta sección se define un Dataset personalizado el que ya fue reducido en el segundo notebook, para trabajar con datos en formato COCO,
adaptado específicamente para modelos de detección de objetos de torchvision 
El objetivo principal es cargar las imágenes, procesar sus anotaciones y devolver los datos en el formato exacto que el modelo necesita para entrenar.
"""

class CocoReducedDetectionDataset(Dataset):
    def __init__(self, images_dir: Path, coco_json: dict):
        self.images_dir = images_dir
        self.coco = coco_json

        self.images = self.coco["images"]
        self.annotations = self.coco["annotations"]

        self.img_id_to_anns = {}
        for ann in self.annotations:
            self.img_id_to_anns.setdefault(ann["image_id"], []).append(ann)

        self.id_to_image = {img["id"]: img for img in self.images}
        self.image_ids = list(self.id_to_image.keys())

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx: int):
        img_id = self.image_ids[idx]
        img_meta = self.id_to_image[img_id]
        img_path = self.images_dir / img_meta["file_name"]

        img = Image.open(img_path).convert("RGB")
        img_tensor = F.to_tensor(img)

        anns = self.img_id_to_anns.get(img_id, [])

        boxes = []
        labels = []
        areas = []
        iscrowd = []

        for a in anns:
            x, y, w, h = a["bbox"]
            boxes.append([x, y, x + w, y + h])
            labels.append(a["category_id"])
            areas.append(a.get("area", w * h))
            iscrowd.append(a.get("iscrowd", 0))

        target = {
            "boxes": torch.tensor(boxes, dtype=torch.float32),
            "labels": torch.tensor(labels, dtype=torch.int64),
            "image_id": torch.tensor([img_id], dtype=torch.int64),
            "area": torch.tensor(areas, dtype=torch.float32),
            "iscrowd": torch.tensor(iscrowd, dtype=torch.int64),
        }

        return img_tensor, target


def collate_fn(batch):
    images, targets = zip(*batch)
    return list(images), list(targets)


In [None]:
"""
En esta sección se crean los datasets de entrenamiento y validación, y luego se construyen los DataLoaders que serán usados durante el entrenamiento del modelo.
tabien garantiza que train_loader y val_loader existan antes de entrenar.
"""

train_ds = CocoReducedDetectionDataset(TRAIN_IMG_DIR, train_coco)
val_ds = CocoReducedDetectionDataset(VAL_IMG_DIR, val_coco)

train_loader = DataLoader(
    train_ds,
    batch_size=CONFIG["batch_size"],
    shuffle=True,
    num_workers=CONFIG["num_workers"],
    collate_fn=collate_fn
)

val_loader = DataLoader(
    val_ds,
    batch_size=1,
    shuffle=False,
    num_workers=CONFIG["num_workers"],
    collate_fn=collate_fn
)

print("Train samples:", len(train_ds))
print("Val samples  :", len(val_ds))
print("Train iters per epoch:", len(train_loader))


Train samples: 3300
Val samples  : 500
Train iters per epoch: 1650


2. Consuntruccion del modelo

In [None]:
"""
En esta sección se construye el modelo de detección Faster R-CNN utilizando pesos preentrenados
 y se ajusta su arquitectura para trabajar únicamente con las clases de interés del proyecto.
Debido a que los identificadores de categoría en COCO no son consecutivos ni empiezan desde 1,
se realiza un remapeo de los category_id originales a etiquetas internas comprendidas entre 1..K,
dejando el valor 0 reservado para el background, tal como lo requiere el modelo.
Asimismo, se redefine la cabeza de clasificación del modelo para que el número de salidas coincida con el total de clases objetivo más la clase de fondo.
"""

target_cat_ids_int = [int(x) for x in target_cat_ids]
coco_to_internal = {cid: i + 1 for i, cid in enumerate(target_cat_ids_int)}
internal_to_name = {i + 1: name for i, name in enumerate(TARGET_CLASSES)}

NUM_CLASSES = len(TARGET_CLASSES) + 1

def remap_targets(targets: List[Dict]) -> List[Dict]:
    out = []
    for t in targets:
        new_t = t.copy()
        labels = t["labels"].clone()
        for i in range(labels.shape[0]):
            labels[i] = coco_to_internal[int(labels[i].item())]
        new_t["labels"] = labels
        out.append(new_t)
    return out

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=CONFIG["pretrained_weights"])
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, NUM_CLASSES)

model.to(DEVICE)
model.train()

print("NUM_CLASSES:", NUM_CLASSES)
print("coco_to_internal:", coco_to_internal)


NUM_CLASSES: 4
coco_to_internal: {1: 1, 3: 2, 5: 3}


In [None]:
"""
En esta sección se define el optimizador AdamW, el cual se utiliza para actualizar los pesos del modelo durante el entrenamiento, ya que ofrece buena estabilidad y control del sobreajuste mediante weight_decay.
Además, se implementan las funciones de entrenamiento por época (train_one_epoch) y evaluación (evaluate_loss), donde se calcula la pérdida promedio por época, lo cual es especialmente útil para el seguimiento del
entrenamiento en CPU y para comparar el desempeño entre entrenamiento y validación.
"""

optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=CONFIG["learning_rate"],
    weight_decay=CONFIG["weight_decay"]
)

def train_one_epoch(model, data_loader, optimizer, epoch: int) -> float:
    model.train()
    total_loss = 0.0
    n = 0

    for images, targets in tqdm(data_loader, desc=f"train e{epoch}", leave=False):
        images = [img.to(DEVICE) for img in images]
        targets = remap_targets(targets)
        targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        total_loss += float(losses.item())
        n += 1

    return total_loss / max(1, n)

@torch.no_grad()
def evaluate_loss(model, data_loader) -> float:
    model.eval()
    total_loss = 0.0
    n = 0

    for images, targets in tqdm(data_loader, desc="val", leave=False):
        images = [img.to(DEVICE) for img in images]
        targets = remap_targets(targets)
        targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        total_loss += float(losses.item())
        n += 1

    model.train()
    return total_loss / max(1, n)


3. Uso del entrenamiento con dataset reducido, modelo preentrenado y optimizador

In [9]:
"""
Esta celda:
- Configura MLflow con SQLite (mlflow.db) para evitar deprecación de FileStore.
- Registra params, métricas y artefactos (mappings + config).
- Entrena en CPU y GUARDA CHECKPOINT POR ÉPOCA (para no perder progreso si falla algo).
- Calcula val_loss correctamente (en torchvision detection el loss sale en modo train, sin grad).
- Guarda además el mejor modelo (best) por menor val_loss.
"""

# -----------------------------
# MLflow: SQLite backend
# -----------------------------
MLFLOW_DB = (PROJECT_ROOT / "mlflow.db").resolve()
mlflow.set_tracking_uri(f"sqlite:///{MLFLOW_DB.as_posix()}")

EXPERIMENT_NAME = "object_detection_coco_cpu"
mlflow.set_experiment(EXPERIMENT_NAME)

run_name = f"base_train_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

best_val_loss = float("inf")
best_ckpt_path = None


@torch.no_grad()
def evaluate_loss(model, data_loader) -> float:
    """
    Esta función:
    - Calcula pérdida en validación sin gradientes.
    - En modelos de detección torchvision, el diccionario de pérdidas se obtiene en modo train.
    - Por eso forzamos model.train() temporalmente y luego restauramos el estado previo.
    """
    was_training = model.training
    model.train()

    total_loss = 0.0
    n = 0

    for images, targets in tqdm(data_loader, desc="val", leave=False):
        images = [img.to(DEVICE) for img in images]
        targets = remap_targets(targets)
        targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)  # dict de pérdidas
        losses = sum(loss for loss in loss_dict.values())

        total_loss += float(losses.item())
        n += 1

    if not was_training:
        model.eval()

    return total_loss / max(1, n)


with mlflow.start_run(run_name=run_name):
    # -----------------------------
    # Params + tags
    # -----------------------------
    mlflow.log_params(CONFIG)
    mlflow.set_tag("stage", "base")
    mlflow.set_tag("train_type", "cpu")
    mlflow.set_tag("classes", ",".join(TARGET_CLASSES))
    mlflow.set_tag("model_arch", CONFIG["model_name"])

    # -----------------------------
    # Artefactos de configuración/mapping
    # -----------------------------
    mapping_artifact = {
        "target_classes": TARGET_CLASSES,
        "target_category_ids": target_cat_ids_int,
        "coco_to_internal": coco_to_internal,
        "internal_to_name": internal_to_name,
        "train_json": str(TRAIN_JSON.as_posix()),
        "val_json": str(VAL_JSON.as_posix()),
    }

    mapping_path = MODELS_DIR / f"mapping_{run_name}.json"
    with open(mapping_path, "w", encoding="utf-8") as f:
        json.dump(mapping_artifact, f, indent=2, ensure_ascii=False)

    mlflow.log_artifact(str(mapping_path), artifact_path="artifacts")
    mlflow.log_artifact(str(PROJECT_CONFIG_PATH), artifact_path="artifacts")
    mlflow.log_artifact(str(LABELMAP_PATH), artifact_path="artifacts")

    # -----------------------------
    # Entrenamiento por épocas
    # -----------------------------
    for epoch in range(1, CONFIG["epochs"] + 1):
        t0 = time.time()

        # TRAIN
        train_loss = train_one_epoch(model, train_loader, optimizer, epoch)

        # CHECKPOINT por época (SIEMPRE, antes de validar)
        epoch_ckpt_path = MODELS_DIR / f"epoch_{epoch}_frcnn_cpu_{run_name}.pt"
        epoch_ckpt = {
            "epoch": epoch,
            "model_state_dict": model.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
            "config": CONFIG,
            "target_classes": TARGET_CLASSES,
            "target_category_ids": target_cat_ids_int,
            "coco_to_internal": coco_to_internal,
            "internal_to_name": internal_to_name,
        }
        torch.save(epoch_ckpt, epoch_ckpt_path)
        mlflow.log_artifact(str(epoch_ckpt_path), artifact_path="checkpoints")

        # VALIDATION (loss)
        val_loss = evaluate_loss(model, val_loader)

        epoch_time = time.time() - t0

        # LOG metrics
        mlflow.log_metric("train_loss", train_loss, step=epoch)
        mlflow.log_metric("val_loss", val_loss, step=epoch)
        mlflow.log_metric("epoch_time_sec", epoch_time, step=epoch)

        print(
            f"Epoch {epoch}/{CONFIG['epochs']} | "
            f"train_loss={train_loss:.4f} | val_loss={val_loss:.4f} | time={epoch_time:.1f}s"
        )

        # BEST model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_ckpt_path = MODELS_DIR / f"best_frcnn_cpu_{run_name}.pt"

            best_ckpt = epoch_ckpt.copy()
            best_ckpt["best_val_loss"] = best_val_loss

            torch.save(best_ckpt, best_ckpt_path)
            mlflow.log_artifact(str(best_ckpt_path), artifact_path="checkpoints")

    mlflow.log_metric("best_val_loss", best_val_loss)

print("Entrenamiento finalizado")
print("best_val_loss:", best_val_loss)
print("best_ckpt_path:", best_ckpt_path)


2026/02/01 08:34:46 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.schemas
2026/02/01 08:34:46 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.tables
2026/02/01 08:34:46 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.types
2026/02/01 08:34:46 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.constraints
2026/02/01 08:34:46 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.defaults
2026/02/01 08:34:46 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.comments
2026/02/01 08:34:48 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2026/02/01 08:34:48 INFO mlflow.store.db.utils: Updating database tables
2026/02/01 08:34:48 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/02/01 08:34:48 INFO alembic.runtime.migration: Will assume non-transactional DDL.
2026/02/01 08:34:48 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/02/01 08:34:48 INFO alembic.runtime

Epoch 1/2 | train_loss=0.4747 | val_loss=0.4836 | time=21692.4s


                                                                 

Epoch 2/2 | train_loss=0.4070 | val_loss=0.4406 | time=20395.6s
Entrenamiento finalizado
best_val_loss: 0.44062818501889706
best_ckpt_path: C:\Users\Johnny\Desktop\IA-final\models\local_checkpoints\best_frcnn_cpu_base_train_20260201_083448.pt


In [None]:
"""
Esta seccion se usa mas por seguridad del modelo para la interfaz,carga el mejor checkpoint
despues ejecuta inferencia sobre una imagen del set val y por ultimo devuelve hasta 3 detecciones con mayor score.
"""

@torch.no_grad()
def load_model_from_ckpt(ckpt_path: Path):
    ckpt = torch.load(ckpt_path, map_location="cpu")

    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=None)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    num_classes = len(ckpt["target_classes"]) + 1
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    model.load_state_dict(ckpt["model_state_dict"])
    model.eval()
    return model, ckpt

@torch.no_grad()
def predict_top3(model, image_path: Path, internal_to_name: Dict[int, str], score_thr: float = 0.5):
    img = Image.open(image_path).convert("RGB")
    img_t = F.to_tensor(img)

    outputs = model([img_t])[0]
    boxes = outputs["boxes"].cpu()
    scores = outputs["scores"].cpu()
    labels = outputs["labels"].cpu()

    keep = scores >= score_thr
    boxes = boxes[keep]
    scores = scores[keep]
    labels = labels[keep]

    if len(scores) == 0:
        return []

    topk = min(3, len(scores))
    idx = torch.argsort(scores, descending=True)[:topk]

    results = []
    for i in idx:
        lbl = int(labels[i].item())
        results.append({
            "name": internal_to_name.get(lbl, "unknown"),
            "score": float(scores[i].item()),
            "box_xyxy": boxes[i].tolist(),
        })
    return results

if best_ckpt_path is None:
    raise FileNotFoundError("No hay best_ckpt_path. Revisa el entrenamiento.")

infer_model, ckpt = load_model_from_ckpt(best_ckpt_path)

sample_img_meta = val_coco["images"][0]
sample_path = VAL_IMG_DIR / sample_img_meta["file_name"]

preds = predict_top3(infer_model, sample_path, ckpt["internal_to_name"], score_thr=CONFIG["score_threshold"])

print("Imagen:", sample_path)
if len(preds) == 0:
    print("no se ha encontrado")
else:
    for p in preds:
        print(p)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\Johnny/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth


100%|██████████| 97.8M/97.8M [00:03<00:00, 31.1MB/s]


Imagen: C:\Users\Johnny\Desktop\IA-final\data\archive\coco2017\val2017\000000397133.jpg
{'name': 'person', 'score': 0.9935327768325806, 'box_xyxy': [385.1822204589844, 76.41061401367188, 497.1370544433594, 343.8106994628906]}
{'name': 'person', 'score': 0.9191341400146484, 'box_xyxy': [352.98193359375, 97.65465545654297, 404.5033264160156, 238.57272338867188]}
{'name': 'person', 'score': 0.6582873463630676, 'box_xyxy': [199.18922424316406, 9.169189453125, 283.14971923828125, 101.2300796508789]}
