In [None]:
train_path = "data/00--raw/football-field-detection.v15i.yolov5pytorch/train/"

IMG_PATH = f"{train_path}images/0a2d9b_2_3_png.rf.2b39030ff9f2e93a34aa9ca69abbd77c.jpg"
LBL_PATH = f"{train_path}/labels/0a2d9b_2_3_png.rf.2b39030ff9f2e93a34aa9ca69abbd77c.txt"

# IMG_PATH = f"{train_path}images/0a2d9b_6_11_png.rf.2cfd6b6dad39a0f39eee5eb3d729823f.jpg"
# LBL_PATH = f"{train_path}/labels/0a2d9b_6_11_png.rf.2cfd6b6dad39a0f39eee5eb3d729823f.txt"

# right side
# IMG_PATH = f"{train_path}images/0a2d9b_7_14_png.rf.04beeed5de2d712614d10a1e75aae7b9.jpg"
# LBL_PATH = f"{train_path}/labels/0a2d9b_7_14_png.rf.04beeed5de2d712614d10a1e75aae7b9.txt"


In [None]:
#!/usr/bin/env python3
# clean_kp_dataset.py

import cv2
import numpy as np
from pathlib import Path
from src.visual.field import PitchConfig, FieldVisualizer

# ─── 1) YOUR mapping from Roboflow KP index → canonical model‐pt index ───
#    Update this dict if you ever reorder your 33 reference points.
RF2MY = {
    0:   0,   1:   1,   2:   2,   3:   3,
    4:   4,   5:   5,   6:   6,   7:   7,
    8:   8,   9:   9,  10:  10,  11:  11,
   12:  12,  13:  13,  14:  14,  15:  15,
   16:  16,  17:  17,  18:  18,  19:  19,
   20:  20,  21:  21,  22:  22,  23:  23,
   24:  24,  25:  25,  26:  26,  27:  27,
   28:  28,  29:  29,  30:  30,  31:  31
}
# (In this example it's identity—replace with your actual map!)

def compute_homography_normalized(
    lbl_path: Path,
    model_pts: np.ndarray,
    cfg: PitchConfig
) -> np.ndarray:
    """
    Reads one YOLO-KP .txt (format: class xc yc w h k0_x k0_y k0_v ...),
    builds src_pts = [k.x, k.y] in [0,1], dst_pts = [X_i/L, Y_i/W],
    then fits H_norm via RANSAC.
    """
    vals = list(map(float, lbl_path.read_text().split()))
    kp   = np.array(vals[5:], dtype=np.float32).reshape(-1,3)

    src, dst = [], []
    L, W = cfg.length, cfg.width

    for j, (xn, yn, vis) in enumerate(kp):
        if vis <= 0 or j not in RF2MY:
            continue
        src.append([xn, yn])
        mx, my = model_pts[RF2MY[j]]
        dst.append([mx / L, my / W])

    src = np.asarray(src, dtype=np.float32)
    dst = np.asarray(dst, dtype=np.float32)
    if len(src) < 4:
        return None

    Hn, mask = cv2.findHomography(
        src, dst,
        cv2.RANSAC,
        ransacReprojThreshold=1e-3
    )
    if Hn is None:
        return None

    # normalize so Hn[2,2] == 1
    return (Hn / Hn[2,2]).astype(np.float32)


def clean_dataset(
    input_root: str,
    output_root: str
):
    inp = Path(input_root)
    out = Path(output_root)
    out.mkdir(parents=True, exist_ok=True)

    # get your 33 reference pts in metre‐space
    fv        = FieldVisualizer(PitchConfig())
    model_pts = fv._reference_model_pts()  # [33×2]

    idx = 0
    for split in ("train","valid","test"):
        img_dir = inp/ split / "images"
        lbl_dir = inp/ split / "labels"
        for img_path in sorted(img_dir.glob("*.png")):
            lbl_path = lbl_dir / f"{img_path.stem}.txt"
            if not lbl_path.exists():
                continue

            Hn = compute_homography_normalized(lbl_path, model_pts, fv.cfg)
            if Hn is None:
                print(f"⚠ skipping {img_path.name}: <4 visible pts>")
                continue

            idx += 1
            out_img = out / f"frame_{idx:06d}.png"
            out_H   = out / f"frame_{idx:06d}_H.npy"
            cv2.imwrite(str(out_img), cv2.imread(str(img_path)))
            np.save(str(out_H), Hn)

    print(f"✅ Wrote {idx} samples to {out}")


if __name__ == "__main__":
    clean_dataset(
        "data/00--raw/football-field-detection.v15i.yolov5pytorch",
        "data/01--clean/annotated_homographies"
    )
