In [12]:
from pathlib import Path
import xml.etree.ElementTree as ET
import shutil
import random

# -------------------------
# CONFIGURACIÓN DEL DATASET
# -------------------------

DRONE_ROOT = Path("data/DroneRGBT")      # carpeta original
OUT_ROOT   = DRONE_ROOT / "format_rgbt"  # carpeta de salida

SRC_TRAIN = "Train"   # con mayúscula
SRC_TEST  = "Test"    # con mayúscula

VAL_SPLIT = 0.2        # porcentaje de validación

IMG_EXTS = [".jpg", ".jpeg", ".png", ".bmp"]

# Tamaño fijo de la cajita alrededor de cada punto
BOX_PX = 10


# -------------------------
# FUNCIONES AUXILIARES
# -------------------------

def index_images(dir_path: Path, mode: str):
    """
    Crea un diccionario {clave -> ruta_imagen}.

    - Para RGB: clave = nombre base (ej: "1" para "1.jpg")
    - Para T:   clave = nombre base sin la R final (ej: "1" para "1R.jpg")
    """
    mapping = {}
    if not dir_path.exists():
        print(f"[WARN] Carpeta de imágenes no existe: {dir_path}")
        return mapping

    for p in dir_path.iterdir():
        if not p.is_file():
            continue
        if p.suffix.lower() not in IMG_EXTS:
            continue

        stem = p.stem
        if mode.lower() == "rgb":
            key = stem
        else:  # "t" / "ir"
            # quito una R/r final, si la tiene (1R -> 1, 23r -> 23)
            if stem.lower().endswith("r"):
                key = stem[:-1]
            else:
                key = stem
        mapping[key] = p

    print(f"[INFO] Indexadas {len(mapping)} imágenes en {dir_path} (modo={mode})")
    return mapping


def parse_xml_points(xml_path, box_px=BOX_PX):
    """Convierte un XML de DroneRGBT a líneas YOLO (cx, cy, w, h)."""
    tree = ET.parse(xml_path)
    root = tree.getroot()

    # Intento encontrar ancho/alto de forma robusta
    size = root.find("size")
    if size is None:
        size = root.find("./image/size")

    if size is None:
        raise ValueError(f"No se encontró <size> en {xml_path}")

    w = float(size.find("width").text)
    h = float(size.find("height").text)

    bw = box_px / w
    bh = box_px / h

    yolo = []

    for obj in root.findall("object"):
        name_tag = obj.find("name")
        if name_tag is None:
            continue
        if name_tag.text != "person":
            continue

        # Punto (x, y)
        point = obj.find("point")
        if point is None:
            point = obj.find("./points/point")
        if point is None:
            continue

        px = float(point.find("x").text)
        py = float(point.find("y").text)

        xc = px / w
        yc = py / h
        yolo.append(f"0 {xc:.6f} {yc:.6f} {bw:.6f} {bh:.6f}")

    return yolo

def prepare_split(xml_list, rgb_dir, ir_dir, dst_split):
    out_img_rgb = OUT_ROOT / "images" / dst_split / "rgb"
    out_img_t   = OUT_ROOT / "images" / dst_split / "t"
    out_lbl_dir = OUT_ROOT / "labels" / dst_split

    out_img_rgb.mkdir(parents=True, exist_ok=True)
    out_img_t.mkdir(parents=True, exist_ok=True)
    out_lbl_dir.mkdir(parents=True, exist_ok=True)

    # Indexo imágenes UNA sola vez por split
    rgb_map = index_images(rgb_dir, mode="rgb")
    ir_map  = index_images(ir_dir,  mode="t")

    total_xml = len(xml_list)
    ok_pairs  = 0
    no_imgs   = 0
    no_labels = 0

    for xml_path in xml_list:
        xml_stem = xml_path.stem       # p.ej. "1015R"

        # 🔴 clave "base" sin la R final (si la tiene)
        if xml_stem.lower().endswith("r"):
            key = xml_stem[:-1]        # "1015R" -> "1015"
        else:
            key = xml_stem

        print(f"[{dst_split}] Procesando {xml_stem} (key={key})...")

        # ahora sí, buscamos por la clave base
        rgb_path = rgb_map.get(key, None)
        ir_path  = ir_map.get(key, None)

        if rgb_path is None or ir_path is None:
            print(f"[WARN] Faltan imágenes RGB/T para {xml_stem} (RGB={rgb_path}, T={ir_path})")
            no_imgs += 1
            continue

        yolo_lines = parse_xml_points(xml_path)
        if not yolo_lines:
            # sin personas / sin labels
            no_labels += 1
            continue

        base_name = rgb_path.name
        base_stem = Path(base_name).stem

        label_path = out_lbl_dir / f"{base_stem}.txt"
        with open(label_path, "w") as f:
            f.write("\n".join(yolo_lines))

        shutil.copy2(rgb_path, out_img_rgb / base_name)
        shutil.copy2(ir_path,  out_img_t   / base_name)
        ok_pairs += 1

    print(f"[{dst_split}] XML totales: {total_xml}")
    print(f"[{dst_split}] Pares OK (img+T+label): {ok_pairs}")
    print(f"[{dst_split}] Sin imágenes emparejadas: {no_imgs}")
    print(f"[{dst_split}] Sin labels (0 personas): {no_labels}")


# -------------------------
# FUNCIÓN PRINCIPAL MODULAR
# -------------------------

def prepare_dronergbt(val_split=VAL_SPLIT):
    """
    Ejecuta todo el pipeline:
    - divide Train en train/val
    - procesa Test como test
    - convierte puntos a YOLO
    - copia imágenes RGB/T emparejadas
    - deja todo en format_rgbt/
    """

    # --- TRAIN ORIGINAL ---
    src_train = DRONE_ROOT / SRC_TRAIN
    train_gt_dir = src_train / "GT_"

    if not train_gt_dir.exists():
        raise FileNotFoundError(f"No existe carpeta de GT_ de Train: {train_gt_dir}")

    train_gt  = sorted(train_gt_dir.glob("*.xml"))
    train_rgb = src_train / "RGB"
    train_ir  = src_train / "Infrared"

    random.shuffle(train_gt)

    n_val = int(len(train_gt) * val_split)
    val_gt = train_gt[:n_val]
    train_gt_final = train_gt[n_val:]

    print(f"Total Train original: {len(train_gt)}")
    print(f"→ Train nuevo: {len(train_gt_final)}")
    print(f"→ Val nuevo:   {len(val_gt)}")

    prepare_split(train_gt_final, train_rgb, train_ir, "train")
    prepare_split(val_gt,         train_rgb, train_ir, "val")

    # --- TEST ORIGINAL ---
    src_test = DRONE_ROOT / SRC_TEST
    test_gt_dir = src_test / "GT_"

    if not test_gt_dir.exists():
        raise FileNotFoundError(f"No existe carpeta de GT_ de Test: {test_gt_dir}")

    test_gt  = sorted(test_gt_dir.glob("*.xml"))
    test_rgb = src_test / "RGB"
    test_ir  = src_test / "Infrared"

    print(f"Total Test original: {len(test_gt)}")

    prepare_split(test_gt, test_rgb, test_ir, "test")

    print("✔ Dataset DroneRGBT convertido a formato YOLO rgb+t en:", OUT_ROOT)


# Para ejecutarlo directamente como script:
if __name__ == "__main__":
    prepare_dronergbt()


Total Train original: 1807
→ Train nuevo: 1446
→ Val nuevo:   361
[INFO] Indexadas 1807 imágenes en data\DroneRGBT\Train\RGB (modo=rgb)
[INFO] Indexadas 1807 imágenes en data\DroneRGBT\Train\Infrared (modo=t)
[train] Procesando 624R (key=624)...
[train] Procesando 1098R (key=1098)...
[train] Procesando 1617R (key=1617)...
[train] Procesando 724R (key=724)...
[train] Procesando 14R (key=14)...
[train] Procesando 65R (key=65)...
[train] Procesando 1801R (key=1801)...
[train] Procesando 978R (key=978)...
[train] Procesando 1109R (key=1109)...
[train] Procesando 646R (key=646)...
[train] Procesando 1766R (key=1766)...
[train] Procesando 739R (key=739)...
[train] Procesando 709R (key=709)...
[train] Procesando 1265R (key=1265)...
[train] Procesando 1726R (key=1726)...
[train] Procesando 660R (key=660)...
[train] Procesando 1581R (key=1581)...
[train] Procesando 1697R (key=1697)...
[train] Procesando 357R (key=357)...
[train] Procesando 1287R (key=1287)...
[train] Procesando 1255R (key=1255)

In [None]:
from pathlib import Path
from ultralytics import YOLO

# Tus utilidades existentes (NO se tocan)
from src.utils_MF import run_middle_fusion_split, print_metrics
from src.utils_LF import evaluate_yolo_predictions

In [None]:

ROOT = Path().resolve()

DATA_ROOT = ROOT / "data" / "DroneRGBT" / "format_rgbt"

# Rutas RGB y T
VAL_RGB_DIR  = DATA_ROOT / "images" / "val" / "rgb"
VAL_T_DIR    = DATA_ROOT / "images" / "val" / "t"

TEST_RGB_DIR = DATA_ROOT / "images" / "test" / "rgb"
TEST_T_DIR   = DATA_ROOT / "images" / "test" / "t"

GT_VAL_DIR   = DATA_ROOT / "labels" / "val"
GT_TEST_DIR  = DATA_ROOT / "labels" / "test"


In [None]:
model_rgb = YOLO("runs/detect/dronergbt_rgb/weights/best.pt")
model_t   = YOLO("runs/detect/dronergbt_t/weights/best.pt")

print("Modelos cargados para DroneRGBT:")
print("  RGB -> runs/detect/dronergbt_rgb/weights/best.pt")
print("  T   -> runs/detect/dronergbt_t/weights/best.pt")


In [None]:
OUT_IMG_MF_VAL_DIR  = ROOT / "runs" / "middle_fusion_dronergbt" / "val"
OUT_PRED_MF_VAL_DIR = ROOT / "runs" / "middle_fusion_dronergbt" / "preds_val"

OUT_IMG_MF_TEST_DIR  = ROOT / "runs" / "middle_fusion_dronergbt" / "test"
OUT_PRED_MF_TEST_DIR = ROOT / "runs" / "middle_fusion_dronergbt" / "preds_test"

for d in [
    OUT_IMG_MF_VAL_DIR, OUT_PRED_MF_VAL_DIR,
    OUT_IMG_MF_TEST_DIR, OUT_PRED_MF_TEST_DIR
]:
    d.mkdir(parents=True, exist_ok=True)


In [None]:
NUM_CLASSES = 1
CLASS_LABELS = ["Person"]


In [None]:
print("\n=== Middle Fusion (VAL) ===")

run_middle_fusion_split(
    model_rgb=model_rgb,
    model_t=model_t,
    class_names={0: "Person"},
    rgb_dir=VAL_RGB_DIR,
    t_dir=VAL_T_DIR,
    out_img_dir=OUT_IMG_MF_VAL_DIR,
    out_pred_dir=OUT_PRED_MF_VAL_DIR,
    img_size=640,
)

metrics_mf_val = evaluate_yolo_predictions(
    pred_dir=OUT_PRED_MF_VAL_DIR,
    gt_dir=GT_VAL_DIR,
    num_classes=NUM_CLASSES,
    iou_threshold=0.5,
)

print_metrics("Middle Fusion (VAL)", metrics_mf_val)

In [None]:
print("\n=== Middle Fusion (TEST) ===")

run_middle_fusion_split(
    model_rgb=model_rgb,
    model_t=model_t,
    class_names={0: "Person"},
    rgb_dir=TEST_RGB_DIR,
    t_dir=TEST_T_DIR,
    out_img_dir=OUT_IMG_MF_TEST_DIR,
    out_pred_dir=OUT_PRED_MF_TEST_DIR,
    img_size=640,
)

metrics_mf_test = evaluate_yolo_predictions(
    pred_dir=OUT_PRED_MF_TEST_DIR,
    gt_dir=GT_TEST_DIR,
    num_classes=NUM_CLASSES,
    iou_threshold=0.5,
)

print_metrics("Middle Fusion (TEST)", metrics_mf_test)


ModuleNotFoundError: No module named 'utils_MF'