In [16]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
from pathlib import Path
import random
import shutil
import re
from src.utils_IT import *

In [18]:
random.seed(42)

ROOT = Path("data")      
CLASSES = ["Cow", "Deer", "Horse"]
IMG_EXTS = {".jpg", ".jpeg", ".png", ".JPG", ".JPEG", ".PNG"}

TRAIN_P = 0.70
VAL_P   = 0.20  

OUT_RGB  = ROOT / "format_rgb"
OUT_T    = ROOT / "format_t"

for OUT in [OUT_RGB, OUT_T]:
    if OUT.exists():
        shutil.rmtree(OUT)

for OUT in [OUT_RGB, OUT_T]:
    for split in ["train", "val", "test"]:
        (OUT / f"images/{split}").mkdir(parents=True, exist_ok=True)
        (OUT / f"labels/{split}").mkdir(parents=True, exist_ok=True)


pairs = []  
for cls in CLASSES:
    cls_dir = ROOT / cls

    for p in cls_dir.iterdir():
        if p.suffix not in IMG_EXTS:
            continue
        if p.stem.endswith("_R"):
            continue

        rgb_img = p
        rgb_lbl = p.with_suffix(".txt")
        if not rgb_lbl.exists():
            print(f"Sin label RGB para {p.name}, la salto.")
            continue

        t_img = find_thermal_for_rgb(rgb_img, cls_dir)
        if t_img is None:
            print(f"Did not find t for {p.name}.")
            continue

        t_lbl = t_img.with_suffix(".txt")
        if not t_lbl.exists():
            print(f"Without label t for {t_img.name}.")
            continue

        if not t_lbl.exists():
            print(f"Without label t for {t_img.name}")
            continue
        pairs.append((rgb_img, rgb_lbl, t_img, t_lbl))

print(f"\nTotal pairs RGB–T found: {len(pairs)}")


Total pairs RGB–T found: 164


In [19]:
random.shuffle(pairs)
n = len(pairs)
n_train = int(n * TRAIN_P)
n_val   = int(n * VAL_P)
n_test  = n - n_train - n_val

train_pairs = pairs[:n_train]
val_pairs   = pairs[n_train:n_train + n_val]
test_pairs  = pairs[n_train + n_val:]

print(f"Split:")
print(f"- train: {len(train_pairs)}")
print(f"- val:   {len(val_pairs)}")
print(f"- test:  {len(test_pairs)}")

def copy_pairs(pairs_split, split_name: str):
    for rgb_img, rgb_lbl, t_img, t_lbl in pairs_split:
        dst_rgb_img = OUT_RGB / f"images/{split_name}" / rgb_img.name
        dst_rgb_lbl = OUT_RGB / f"labels/{split_name}" / rgb_lbl.name
        shutil.copy2(rgb_img, dst_rgb_img)
        shutil.copy2(rgb_lbl, dst_rgb_lbl)

        dst_t_img = OUT_T / f"images/{split_name}" / t_img.name
        dst_t_lbl = OUT_T / f"labels/{split_name}" / t_lbl.name
        shutil.copy2(t_img, dst_t_img)
        shutil.copy2(t_lbl, dst_t_lbl)

copy_pairs(train_pairs, "train")
copy_pairs(val_pairs,   "val")
copy_pairs(test_pairs,  "test")

Split:
- train: 114
- val:   32
- test:  18


In [20]:
yaml_rgb_path = OUT_RGB / "wildlife_rgb.yaml"
yaml_rgb_content = f"""path: {OUT_RGB}

train: images/train
val: images/val
test: images/test

names:
  0: Cow
  1: Deer
  2: Horse
"""
yaml_rgb_path.write_text(yaml_rgb_content)
yaml_t_path = OUT_T / "wildlife_t.yaml"
yaml_t_content = f"""path: {OUT_T}

train: images/train
val: images/val
test: images/test

names:
  0: Cow
  1: Deer
  2: Horse
"""
yaml_t_path.write_text(yaml_t_content)

113