In [None]:
import os
import shutil
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import tqdm
import matplotlib.pyplot as plt
import yaml

from ultralytics import YOLO

plt.rcParams["figure.figsize"] = (8, 5)
plt.rcParams["axes.grid"] = True

In [None]:
BASE_DIR = Path("..")

ANN_PATH = BASE_DIR / "data" / "annotations" / "annotations.csv"

TEST_IMAGES_DIR = BASE_DIR / "data" / "images" / "test"
TEST_LIST_TXT   = "C:/Users/joano/Desktop/trabajo_final_master/data/meta/exp1/datasets/v5_full/labels/test"
DATASET_YAML_ORIG = Path(r"C:\Users\joano\Desktop\trabajo_final_master\data\meta\exp1\datasets\v5_full\dataset.yaml")

V7_FULL_WEIGHTS = BASE_DIR / "runs" / "exp1" / "v5_full_seed999" / "weights" / "best.pt"

CONF_TH = 0.25
IOU_NMS = 0.6

In [None]:
df = pd.read_csv(ANN_PATH)

for col in ["image_width", "image_height", "xmin", "ymin", "xmax", "ymax"]:
    df[col] = df[col].astype(float)

df["box_width"]  = df["xmax"] - df["xmin"]
df["box_height"] = df["ymax"] - df["ymin"]
df["box_area"]   = df["box_width"] * df["box_height"]

df["img_area"]   = df["image_width"] * df["image_height"]
df["box_width_rel"]  = df["box_width"]  / df["image_width"]
df["box_height_rel"] = df["box_height"] / df["image_height"]
df["box_area_rel"]   = df["box_area"]   / df["img_area"]

df["cx_rel"] = (df["xmin"] + df["xmax"]) / 2 / df["image_width"]
df["cy_rel"] = (df["ymin"] + df["ymax"]) / 2 / df["image_height"]
df["box_aspect_ratio"] = df["box_width"] / df["box_height"]

df.head(3)

Unnamed: 0,image_id,image_width,image_height,class_name,xmin,ymin,xmax,ymax,tipo_obstaculo,temporalidad,...,box_width,box_height,box_area,img_area,box_width_rel,box_height_rel,box_area_rel,cx_rel,cy_rel,box_aspect_ratio
0,gsv-amsterdam-1071-Obstacle.png,1440.0,960.0,no_obstaculo,266.87747,0.0,433.833992,107.509881,,,...,166.956522,107.509881,17949.475855,1382400.0,0.115942,0.111989,0.012984,0.243303,0.055995,1.552941
1,gsv-amsterdam-1071-Obstacle.png,1440.0,960.0,no_obstaculo,135.335968,0.0,190.988142,118.893281,,,...,55.652174,118.893281,6616.669531,1382400.0,0.038647,0.123847,0.004786,0.113307,0.061924,0.468085
2,gsv-amsterdam-1071-Obstacle.png,1440.0,960.0,no_obstaculo,761.422925,0.0,809.486166,327.588933,,,...,48.063241,327.588933,15744.985861,1382400.0,0.033377,0.341238,0.01139,0.545455,0.170619,0.146718


In [6]:
img_df = (
    df.groupby("image_id")
      .agg(
          image_width=("image_width", "first"),
          image_height=("image_height", "first"),
          n_objects=("class_name", "count"),
          n_classes=("class_name", "nunique"),
      )
)

img_df.head()


Unnamed: 0_level_0,image_width,image_height,n_objects,n_classes
image_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
gsv-amsterdam-1071-Obstacle.png,1440.0,960.0,5,3
gsv-amsterdam-1081-Obstacle.png,1440.0,960.0,4,3
gsv-amsterdam-1105-Obstacle.png,1440.0,960.0,4,3
gsv-amsterdam-1109-Obstacle.png,1440.0,960.0,9,4
gsv-amsterdam-1114-Obstacle.png,1440.0,960.0,10,4


In [7]:
def iou(box_a, box_b):
    xa1, ya1, xa2, ya2 = box_a
    xb1, yb1, xb2, yb2 = box_b

    inter_x1 = max(xa1, xb1)
    inter_y1 = max(ya1, yb1)
    inter_x2 = min(xa2, xb2)
    inter_y2 = min(ya2, yb2)

    inter_w = max(0, inter_x2 - inter_x1)
    inter_h = max(0, inter_y2 - inter_y1)
    inter_area = inter_w * inter_h

    area_a = max(0, (xa2 - xa1)) * max(0, (ya2 - ya1))
    area_b = max(0, (xb2 - xb1)) * max(0, (yb2 - yb1))

    union = area_a + area_b - inter_area
    if union <= 0:
        return 0.0
    return inter_area / union

overlap_list = []
iou_threshold = 0.1

for img_id, group in tqdm(df.groupby("image_id"), desc="Overlap por imagen"):
    boxes = group[["xmin", "ymin", "xmax", "ymax"]].values
    n = len(boxes)

    if n < 2:
        overlap_ratio = 0.0
    else:
        total_pairs = n * (n - 1) / 2
        overlapping_pairs = 0
        for i in range(n):
            for j in range(i + 1, n):
                if iou(boxes[i], boxes[j]) > iou_threshold:
                    overlapping_pairs += 1
        overlap_ratio = overlapping_pairs / total_pairs

    overlap_list.append((img_id, overlap_ratio))

overlap_df = pd.DataFrame(overlap_list, columns=["image_id", "overlap_ratio"]).set_index("image_id")
img_df = img_df.join(overlap_df, how="left")
img_df["overlap_ratio"] = img_df["overlap_ratio"].fillna(0.0)

img_df[["n_objects", "n_classes", "overlap_ratio"]].describe()


Overlap por imagen: 100%|██████████| 1250/1250 [00:00<00:00, 1885.62it/s]


Unnamed: 0,n_objects,n_classes,overlap_ratio
count,1250.0,1250.0,1250.0
mean,7.0064,3.4432,0.181839
std,4.20301,0.54462,0.189746
min,1.0,1.0,0.0
25%,4.0,3.0,0.066667
50%,6.0,3.0,0.133333
75%,9.0,4.0,0.2
max,32.0,4.0,1.0


In [None]:
obst_counts = (
    df[df["class_name"] == "obstaculo"]
    .groupby("image_id")["class_name"]
    .count()
    .rename("n_obstaculo")
)

no_obst_counts = (
    df[df["class_name"] == "no_obstaculo"]
    .groupby("image_id")["class_name"]
    .count()
    .rename("n_no_obstaculo")
)

img_df = img_df.join(obst_counts, how="left").join(no_obst_counts, how="left")
img_df["n_obstaculo"] = img_df["n_obstaculo"].fillna(0).astype(int)
img_df["n_no_obstaculo"] = img_df["n_no_obstaculo"].fillna(0).astype(int)

# Normalizaciones
features = ["n_objects", "n_classes", "n_obstaculo", "n_no_obstaculo"]
for f in features:
    f_min, f_max = img_df[f].min(), img_df[f].max()
    img_df[f + "_norm"] = (img_df[f] - f_min) / (f_max - f_min) if f_max > f_min else 0.0

# Índice ponderado
img_df["complexity_index"] = (
    0.25 * img_df["n_objects_norm"]
    + 0.15 * img_df["n_classes_norm"]
    + 0.2  * img_df["n_obstaculo_norm"]
    + 0.15 * img_df["n_no_obstaculo_norm"]
    + 0.25 * img_df["overlap_ratio"]
)

q_low, q_high = img_df["complexity_index"].quantile([1/3, 2/3])
print("Umbral baja/media:", q_low)
print("Umbral media/alta:", q_high)

def assign_complexity_level(x, q1, q2):
    if x <= q1:
        return "baja"
    elif x <= q2:
        return "media"
    else:
        return "alta"

img_df["complexity_level"] = img_df["complexity_index"].apply(lambda x: assign_complexity_level(x, q_low, q_high))
img_df["complexity_level"].value_counts().sort_index()


Umbral baja/media: 0.2204650188521156
Umbral media/alta: 0.273435972629521


complexity_level
alta     406
baja     421
media    423
Name: count, dtype: int64

In [None]:
exts = {".jpg", ".jpeg", ".png", ".bmp", ".webp"}
test_images = sorted([p for p in Path(TEST_IMAGES_DIR).iterdir() if p.suffix.lower() in exts])

print("N test images:", len(test_images))
test_images[:3]

N test images: 125


[WindowsPath('../data/images/test/gsv-amsterdam-1105-Obstacle.png'),
 WindowsPath('../data/images/test/gsv-amsterdam-233-Obstacle.png'),
 WindowsPath('../data/images/test/gsv-amsterdam-34-Obstacle.png')]

In [10]:
stem_to_path = {p.stem: p.resolve() for p in test_images}

rows = []
missing = 0

for img_id in img_df.index.astype(str):
    stem = Path(img_id).stem
    p = stem_to_path.get(stem, None)
    if p is None:
        missing += 1
        continue
    rows.append((img_id, p))

print("Mapeadas en test:", len(rows), "| missing:", missing)

test_map = pd.DataFrame(rows, columns=["image_id", "image_path"]).set_index("image_id")
img_test_df = img_df.join(test_map, how="inner")

img_test_df["complexity_level"].value_counts().sort_index()


Mapeadas en test: 124 | missing: 1126


complexity_level
alta     44
baja     32
media    48
Name: count, dtype: int64

In [None]:
orig = yaml.safe_load(Path(DATASET_YAML_ORIG).read_text(encoding="utf-8"))
names_dict = orig["names"]

cls2id = {v: int(k) for k, v in names_dict.items()}
cls2id


{'obstaculo': 0, 'no_obstaculo': 1, 'acera': 2, 'carretera': 3}

In [12]:
level_to_paths = {
    lvl: img_test_df[img_test_df["complexity_level"] == lvl]["image_path"].astype(str).tolist()
    for lvl in ["baja", "media", "alta"]
}
{lvl: len(v) for lvl, v in level_to_paths.items()}

{'baja': 32, 'media': 48, 'alta': 44}

In [None]:
def write_yolo_labels_from_csv(df_ann: pd.DataFrame, out_labels_dir: Path, cls2id: dict):
    out_labels_dir.mkdir(parents=True, exist_ok=True)

    for img_id, g in df_ann.groupby("image_id"):
        lines = []
        W = float(g["image_width"].iloc[0])
        H = float(g["image_height"].iloc[0])

        for _, r in g.iterrows():
            cname = str(r["class_name"])
            if cname not in cls2id:
                continue
            cid = cls2id[cname]

            xmin, ymin, xmax, ymax = map(float, [r["xmin"], r["ymin"], r["xmax"], r["ymax"]])

            xmin = max(0.0, min(xmin, W))
            xmax = max(0.0, min(xmax, W))
            ymin = max(0.0, min(ymin, H))
            ymax = max(0.0, min(ymax, H))

            bw = max(0.0, xmax - xmin)
            bh = max(0.0, ymax - ymin)
            if bw <= 0 or bh <= 0:
                continue

            cx = xmin + bw / 2.0
            cy = ymin + bh / 2.0

            xc = cx / W
            yc = cy / H
            wn = bw / W
            hn = bh / H

            lines.append(f"{cid} {xc:.6f} {yc:.6f} {wn:.6f} {hn:.6f}")

        stem = Path(str(img_id)).stem
        (out_labels_dir / f"{stem}.txt").write_text("\n".join(lines), encoding="utf-8")

EVAL_ROOT = BASE_DIR / "notebooks" / "eval_by_complexity_v7_full"
EVAL_ROOT.mkdir(parents=True, exist_ok=True)

def build_eval_dataset_for_level(level: str, image_paths: list[str], df_full: pd.DataFrame):
    level_root = EVAL_ROOT / level
    img_dir = level_root / "images" / "test"
    lab_dir = level_root / "labels" / "test"
    img_dir.mkdir(parents=True, exist_ok=True)
    lab_dir.mkdir(parents=True, exist_ok=True)

    stems = set()
    for p in image_paths:
        p = Path(p)
        stems.add(p.stem)
        dst = img_dir / p.name
        if not dst.exists():
            shutil.copy2(p, dst)

    df_sub = df_full.copy()
    df_sub["_stem"] = df_sub["image_id"].astype(str).apply(lambda x: Path(x).stem)
    df_sub = df_sub[df_sub["_stem"].isin(stems)].drop(columns=["_stem"])

    write_yolo_labels_from_csv(df_sub, lab_dir, cls2id)

    eval_yaml = {
        "path": str(level_root),
        "train": "images/test",
        "val": "images/test",
        "test": "images/test",
        "names": names_dict,
    }
    yaml_path = level_root / "dataset.yaml"
    yaml_path.write_text(yaml.safe_dump(eval_yaml, sort_keys=False), encoding="utf-8")

    return yaml_path


In [14]:
level_to_yaml = {}
for lvl in ["baja", "media", "alta"]:
    level_to_yaml[lvl] = build_eval_dataset_for_level(lvl, level_to_paths[lvl], df)

level_to_yaml


{'baja': WindowsPath('../notebooks/eval_by_complexity_v7_full/baja/dataset.yaml'),
 'media': WindowsPath('../notebooks/eval_by_complexity_v7_full/media/dataset.yaml'),
 'alta': WindowsPath('../notebooks/eval_by_complexity_v7_full/alta/dataset.yaml')}

In [None]:
def get_class_metrics(r, class_name: str):
    name_to_id = {v: k for k, v in r.names.items()}
    if class_name not in name_to_id:
        raise ValueError(f"Clase '{class_name}' no existe en r.names: {r.names}")
    cid = int(name_to_id[class_name])

    out = {"class_id": cid, "class_name": class_name}

    out["precision"] = float(r.box.p[cid]) if hasattr(r.box, "p") and r.box.p is not None else np.nan
    out["recall"]    = float(r.box.r[cid]) if hasattr(r.box, "r") and r.box.r is not None else np.nan

    out["mAP50"]     = float(r.box.ap50[cid]) if hasattr(r.box, "ap50") and r.box.ap50 is not None else np.nan
    out["mAP50-95"]  = float(r.box.ap[cid]) if hasattr(r.box, "ap") and r.box.ap is not None else np.nan

    return out


In [None]:
CLASS_FOCUS = "obstaculo"

model = YOLO(str(V7_FULL_WEIGHTS))

metrics_by_level = []
for lvl in ["baja", "media", "alta"]:
    yml = level_to_yaml[lvl]

    r = model.val(
        data=str(yml),
        split="test",
        conf=CONF_TH,
        iou=IOU_NMS,
        cache=False,
        verbose=False,
        project=str(EVAL_ROOT / "runs"),
        name=f"val_{lvl}",
    )

    cm = get_class_metrics(r, CLASS_FOCUS)

    metrics_by_level.append({
        "complexity_level": lvl,
        "precision": cm["precision"],
        "recall": cm["recall"],
        "mAP50": cm["mAP50"],
        "mAP50-95": cm["mAP50-95"],
        "n_images": len(level_to_paths[lvl]),
    })

metrics_level_df = pd.DataFrame(metrics_by_level)
metrics_level_df

Ultralytics 8.4.14  Python-3.11.13 torch-2.7.1+cu118 CUDA:0 (NVIDIA GeForce RTX 2060, 6144MiB)
YOLO11s summary (fused): 100 layers, 9,414,348 parameters, 0 gradients, 21.3 GFLOPs
[34m[1mval: [0mFast image access  (ping: 0.10.1 ms, read: 1102.697.1 MB/s, size: 3050.4 KB)
[K[34m[1mval: [0mScanning C:\Users\joano\Desktop\trabajo_final_master\notebooks\eval_by_complexity_v7_full\baja\labels\test.cache... 32 images, 0 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 32/32  0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 2.4s/it 4.8s<14.1s
                   all         32        147      0.638      0.778      0.681      0.545
Speed: 4.3ms preprocess, 11.9ms inference, 0.0ms loss, 5.0ms postprocess per image
Results saved to [1mC:\Users\joano\Desktop\trabajo_final_master\notebooks\runs\notebooks\eval_by_complexity_v7_full\runs\val_baja3[0m
Ultralytics 8.4.14  Python-3.11.13 torch-2.7.1+cu118 CUDA:0 (NVIDIA GeForce RTX 

Unnamed: 0,complexity_level,precision,recall,mAP50,mAP50-95,n_images
0,baja,0.636468,0.636656,0.719856,0.529788,32
1,media,0.433333,0.55914,0.494368,0.333702,48
2,alta,0.496855,0.5,0.47547,0.323642,44


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

order = ["baja", "media", "alta"]
m = metrics_level_df.copy()
m["complexity_level"] = pd.Categorical(m["complexity_level"], order, ordered=True)
m = m.sort_values("complexity_level")

metrics = ["precision", "recall", "mAP50", "mAP50-95"]

x = np.arange(len(order))
width = 0.18

fig, ax = plt.subplots(figsize=(8.6, 4.6))

cmap = plt.get_cmap("tab10")
metric_colors = {
    "precision": cmap(0), 
    "recall":    cmap(1),
    "mAP50":     cmap(2), 
    "mAP50-95":  cmap(3),
}

for i, met in enumerate(metrics):
    xpos = x + (i - (len(metrics) - 1) / 2) * width

    vals = m[met].values
    bars = ax.bar(
        xpos,
        vals,
        width=width,
        edgecolor="black",
        linewidth=1.1,
        color=metric_colors.get(met, cmap(0)),
        alpha=0.85,
        label=met,
        zorder=3,
    )

    for b, val in zip(bars, vals):
        ax.text(
            b.get_x() + b.get_width() / 2,
            float(val) + 0.015,
            f"{val:.2f}",
            ha="center",
            va="bottom",
            fontsize=8,
            color="black",
        )

ax.set_title(
    "v7_full en TEST — métricas por complejidad",
    fontsize=18,
    fontfamily="serif",
    pad=12,
)

ax.set_xlabel("Nivel de complejidad", fontsize=13)
ax.set_ylabel("Valor", fontsize=13)

ax.set_xticks(x)
ax.set_xticklabels(order, fontsize=12)

ax.set_ylim(0, 1.0)

ax.grid(
    True,
    axis="y",
    linestyle=":",
    linewidth=0.9,
    alpha=0.30,
    zorder=0,
)
ax.grid(False, axis="x")

for spine in ax.spines.values():
    spine.set_visible(True)
    spine.set_linewidth(1.6)

ax.legend(
    loc="center left",
    bbox_to_anchor=(1.02, 0.5),
    frameon=False,
    fontsize=11,
)

fig.tight_layout(rect=[0.0, 0.0, 0.82, 1.0])
plt.show()

fig_pdf = EVAL_ROOT / "metrics_by_complexity_level_v7_full_framed_color.pdf"
fig_png = EVAL_ROOT / "metrics_by_complexity_level_v7_full_framed_color.png"

fig.savefig(fig_pdf, bbox_inches="tight", dpi=300)
fig.savefig(fig_png, bbox_inches="tight", dpi=300)

print("Figuras guardadas:", fig_pdf, "y", fig_png)

<Figure size 860x460 with 1 Axes>

Figuras guardadas: ..\notebooks\eval_by_complexity_v7_full\metrics_by_complexity_level_v7_full_framed_color.pdf y ..\notebooks\eval_by_complexity_v7_full\metrics_by_complexity_level_v7_full_framed_color.png


In [None]:
import re
# Estilo
plt.rcParams["font.family"] = "serif"
plt.rcParams["font.size"] = 11
plt.rcParams["axes.linewidth"] = 0.8

def extract_city(image_id: str) -> str:
    s = str(image_id)
    m = re.search(r"^gsv-([^-]+(?:-[^-]+)*)-\d+-", s)
    if m:
        city = m.group(1)
    else:
        parts = s.split("-")
        city = parts[1] if len(parts) > 1 else "unknown"
    return city.lower()

df_city = df.copy()
df_city["city"] = df_city["image_id"].apply(extract_city)

test_image_ids = set(img_test_df.index.astype(str).tolist())

df_city_test = df_city[df_city["image_id"].astype(str).isin(test_image_ids)].copy()

print("Imágenes test:", len(test_image_ids))
print("BBoxes test:", len(df_city_test))
df_city_test[["image_id","city","class_name"]].head()


Imágenes test: 124
BBoxes test: 868


Unnamed: 0,image_id,city,class_name
9,gsv-amsterdam-1105-Obstacle.png,amsterdam,obstaculo
10,gsv-amsterdam-1105-Obstacle.png,amsterdam,obstaculo
11,gsv-amsterdam-1105-Obstacle.png,amsterdam,acera
12,gsv-amsterdam-1105-Obstacle.png,amsterdam,carretera
266,gsv-amsterdam-233-Obstacle.png,amsterdam,obstaculo


In [None]:
FOCUS_CLASS = "obstaculo"
NAMES_1CLASS = {0: FOCUS_CLASS}

def write_yolo_labels_focus_1class(df_ann: pd.DataFrame, out_labels_dir: Path, focus_class: str):
    out_labels_dir.mkdir(parents=True, exist_ok=True)
    df_ann = df_ann[df_ann["class_name"].astype(str) == focus_class].copy()

    for img_id, g in df_ann.groupby("image_id"):
        lines = []
        W = float(g["image_width"].iloc[0])
        H = float(g["image_height"].iloc[0])

        for _, r in g.iterrows():
            xmin, ymin, xmax, ymax = map(float, [r["xmin"], r["ymin"], r["xmax"], r["ymax"]])

            xmin = max(0.0, min(xmin, W)); xmax = max(0.0, min(xmax, W))
            ymin = max(0.0, min(ymin, H)); ymax = max(0.0, min(ymax, H))

            bw = max(0.0, xmax - xmin)
            bh = max(0.0, ymax - ymin)
            if bw <= 0 or bh <= 0:
                continue

            cx = xmin + bw/2.0
            cy = ymin + bh/2.0

            xc = cx / W
            yc = cy / H
            wn = bw / W
            hn = bh / H

            lines.append(f"0 {xc:.6f} {yc:.6f} {wn:.6f} {hn:.6f}")

        stem = Path(str(img_id)).stem
        (out_labels_dir / f"{stem}.txt").write_text("\n".join(lines), encoding="utf-8")


In [None]:
EVAL_CITY_ROOT = BASE_DIR / "notebooks" / "eval_by_city_v7_full_obstaculo"
EVAL_CITY_ROOT.mkdir(parents=True, exist_ok=True)

def build_eval_dataset_for_city(city: str, img_paths: list[str], df_city_test: pd.DataFrame):
    city_root = EVAL_CITY_ROOT / city
    img_dir = city_root / "images" / "test"
    lab_dir = city_root / "labels" / "test"
    img_dir.mkdir(parents=True, exist_ok=True)
    lab_dir.mkdir(parents=True, exist_ok=True)

    stems = set()
    for p in img_paths:
        p = Path(p)
        stems.add(p.stem)
        dst = img_dir / p.name
        if not dst.exists():
            shutil.copy2(p, dst)

    df_sub = df_city_test.copy()
    df_sub["_stem"] = df_sub["image_id"].astype(str).apply(lambda x: Path(x).stem)
    df_sub = df_sub[df_sub["_stem"].isin(stems)].drop(columns=["_stem"])

    write_yolo_labels_focus_1class(df_sub, lab_dir, focus_class=FOCUS_CLASS)

    eval_yaml = {
        "path": str(city_root),
        "train": "images/test",
        "val": "images/test",
        "test": "images/test",
        "names": NAMES_1CLASS,
    }
    yaml_path = city_root / "dataset.yaml"
    yaml_path.write_text(yaml.safe_dump(eval_yaml, sort_keys=False), encoding="utf-8")

    return yaml_path, df_sub


In [None]:
tmp = img_test_df.copy()
tmp["city"] = tmp.index.astype(str).map(extract_city)

city_to_paths = tmp.groupby("city")["image_path"].apply(lambda s: s.astype(str).tolist()).to_dict()

obs_by_city = (
    df_city_test[df_city_test["class_name"].astype(str) == FOCUS_CLASS]
    .groupby("city")["class_name"].count()
    .to_dict()
)

city_stats = []
for city, paths in city_to_paths.items():
    n_imgs = len(paths)
    n_obs = int(obs_by_city.get(city, 0))
    city_stats.append((city, n_imgs, n_obs))

city_stats_df = pd.DataFrame(city_stats, columns=["city", "n_images", "n_obstaculos"]).sort_values("n_images", ascending=False)
city_stats_df.head(20)


Unnamed: 0,city,n_images,n_obstaculos
7,seattle,39,82
8,spgg,13,50
1,cdmx,13,46
3,columbus,13,28
2,chicago,13,25
0,amsterdam,10,31
5,oradell,10,9
4,newberg,7,22
6,pittsburgh,6,24


In [None]:
import pandas as pd
from pathlib import Path

tmp = img_test_df.copy()
tmp["city"] = tmp.index.astype(str).map(extract_city)

city_to_paths = tmp.groupby("city")["image_path"].apply(lambda s: s.astype(str).tolist()).to_dict()

FOCUS_CLASS = "obstaculo"
obs_by_city = (
    df_city_test[df_city_test["class_name"].astype(str) == FOCUS_CLASS]
    .groupby("city")["class_name"]
    .count()
    .to_dict()
)

city_stats = []
for city, paths in city_to_paths.items():
    n_imgs = len(paths)
    n_obs = int(obs_by_city.get(city, 0))
    city_stats.append((city, n_imgs, n_obs))

city_stats_df = (
    pd.DataFrame(city_stats, columns=["city", "n_images", "n_obstaculos"])
      .sort_values(["n_images", "n_obstaculos"], ascending=False)
      .reset_index(drop=True)
)

display(city_stats_df.head(20))

MIN_IMAGES = 5
MIN_OBS_INSTANCES = 5

valid_cities = city_stats_df[
    (city_stats_df["n_images"] >= MIN_IMAGES) &
    (city_stats_df["n_obstaculos"] >= MIN_OBS_INSTANCES)
]["city"].tolist()

print(f"Ciudades totales en TEST: {len(city_stats_df)}")
print(f"Ciudades válidas (>= {MIN_IMAGES} imgs y >= {MIN_OBS_INSTANCES} obst): {len(valid_cities)}")
print("Ejemplos valid_cities:", valid_cities[:10])


Unnamed: 0,city,n_images,n_obstaculos
0,seattle,39,82
1,spgg,13,50
2,cdmx,13,46
3,columbus,13,28
4,chicago,13,25
5,amsterdam,10,31
6,oradell,10,9
7,newberg,7,22
8,pittsburgh,6,24


Ciudades totales en TEST: 9
Ciudades válidas (>= 5 imgs y >= 5 obst): 9
Ejemplos valid_cities: ['seattle', 'spgg', 'cdmx', 'columbus', 'chicago', 'amsterdam', 'oradell', 'newberg', 'pittsburgh']


In [23]:
model = YOLO(str(V7_FULL_WEIGHTS))

rows = []
for city in valid_cities:
    yml, df_sub = build_eval_dataset_for_city(city, city_to_paths[city], df_city_test)

    r = model.val(
        data=str(yml),
        split="test",
        conf=CONF_TH,
        iou=IOU_NMS,
        cache=False,
        verbose=False,
        project=str(EVAL_CITY_ROOT / "runs"),
        name=f"val_{city}",
    )

    rows.append({
        "city": city,
        "n_images": len(city_to_paths[city]),
        "n_obstaculos": int(obs_by_city.get(city, 0)),
        "precision": float(getattr(r.box, "mp", np.nan)),
        "recall": float(getattr(r.box, "mr", np.nan)),
        "mAP50": float(getattr(r.box, "map50", np.nan)),
        "mAP50-95": float(getattr(r.box, "map", np.nan)),
    })

city_metrics_df = pd.DataFrame(rows).sort_values("mAP50-95", ascending=False)
city_metrics_df.head(10)


Ultralytics 8.4.14  Python-3.11.13 torch-2.7.1+cu118 CUDA:0 (NVIDIA GeForce RTX 2060, 6144MiB)
YOLO11s summary (fused): 100 layers, 9,414,348 parameters, 0 gradients, 21.3 GFLOPs
[34m[1mval: [0mFast image access  (ping: 0.10.0 ms, read: 1121.482.1 MB/s, size: 2643.0 KB)
[K[34m[1mval: [0mScanning C:\Users\joano\Desktop\trabajo_final_master\notebooks\eval_by_city_v7_full_obstaculo\seattle\labels\test.cache... 37 images, 2 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 39/39  0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 3/3 1.5s/it 4.5s1.4s3s
                   all         39         82      0.611      0.683      0.657      0.499
Speed: 4.0ms preprocess, 14.4ms inference, 0.1ms loss, 5.2ms postprocess per image
Results saved to [1mC:\Users\joano\Desktop\trabajo_final_master\notebooks\runs\notebooks\eval_by_city_v7_full_obstaculo\runs\val_seattle2[0m
Ultralytics 8.4.14  Python-3.11.13 torch-2.7.1+cu118 CUDA:0 (NVIDI

Unnamed: 0,city,n_images,n_obstaculos,precision,recall,mAP50,mAP50-95
7,newberg,7,22,0.764156,0.58921,0.708452,0.560284
0,seattle,39,82,0.611368,0.682927,0.656547,0.499427
8,pittsburgh,6,24,0.818135,0.375,0.572091,0.449396
6,oradell,10,9,0.553859,0.555556,0.486944,0.356191
1,spgg,13,50,0.634294,0.4,0.447668,0.307875
3,columbus,13,28,0.368923,0.5,0.408569,0.294059
2,cdmx,13,46,0.54595,0.470631,0.497979,0.269748
4,chicago,13,25,0.422922,0.56,0.443484,0.267721
5,amsterdam,10,31,0.464676,0.483871,0.477593,0.260499


In [24]:
out_csv = EVAL_CITY_ROOT / "metrics_by_city_v7_full_obstaculo.csv"
city_metrics_df.to_csv(out_csv, index=False)
print("Guardado:", out_csv)


Guardado: ..\notebooks\eval_by_city_v7_full_obstaculo\metrics_by_city_v7_full_obstaculo.csv


In [25]:
def hatch_cycle(n: int):
    h = ["", "//", "\\\\", "xx", "..", "oo", "++", "--", "**", "||"]
    return [h[i % len(h)] for i in range(n)]

def plot_city_ranking(df, metric="mAP50-95", topk=10, title=None, out_base=None):
    d = df.sort_values(metric, ascending=False).head(topk).copy()

    fig, ax = plt.subplots(figsize=(10.5, 3.6))
    x = np.arange(len(d))
    hatches = hatch_cycle(len(d))

    bars = ax.bar(x, d[metric].values, edgecolor="black", linewidth=1.4, facecolor="white", zorder=3)

    for i, b in enumerate(bars):
        b.set_hatch(hatches[i])
        ax.text(b.get_x()+b.get_width()/2, b.get_height()+0.01, f"{b.get_height():.2f}",
                ha="center", va="bottom", fontsize=9)

    ax.set_title(title or f"Ranking por ciudad — {metric}", fontsize=18, fontfamily="serif", pad=12)
    ax.set_ylabel(metric)
    ax.set_xticks(x)
    ax.set_xticklabels(d["city"].tolist(), rotation=25, ha="right")

    ax.grid(True, axis="y", linestyle=":", linewidth=0.9, alpha=0.35, zorder=0)

    # marco completo
    for s in ax.spines.values():
        s.set_visible(True)
        s.set_linewidth(1.6)

    plt.tight_layout()
    plt.show()

    if out_base:
        fig.savefig(str(out_base) + ".pdf", bbox_inches="tight", dpi=300)
        fig.savefig(str(out_base) + ".png", bbox_inches="tight", dpi=300)

# TOP
plot_city_ranking(
    city_metrics_df,
    metric="mAP50-95",
    topk=10,
    title="v7_full (obstáculo) — Top-10 ciudades por mAP50-95",
    out_base=EVAL_CITY_ROOT / "top10_cities_map5095"
)

# BOTTOM (orden inverso)
plot_city_ranking(
    city_metrics_df.sort_values("mAP50-95", ascending=True),
    metric="mAP50-95",
    topk=10,
    title="v7_full (obstáculo) — Peor-10 ciudades por mAP50-95",
    out_base=EVAL_CITY_ROOT / "bottom10_cities_map5095"
)


<Figure size 1050x360 with 1 Axes>

<Figure size 1050x360 with 1 Axes>

In [26]:
fig, ax = plt.subplots(figsize=(6.6, 4.2))
ax.scatter(city_metrics_df["n_obstaculos"], city_metrics_df["mAP50-95"], facecolors="white", edgecolors="black")
ax.set_title("v7_full (obstáculo) — mAP50-95 vs nº instancias", fontsize=14, fontfamily="serif", pad=10)
ax.set_xlabel("n_obstaculos (GT)")
ax.set_ylabel("mAP50-95")

ax.grid(True, linestyle=":", linewidth=0.9, alpha=0.35)
for s in ax.spines.values():
    s.set_visible(True)
    s.set_linewidth(1.4)

plt.tight_layout()
plt.show()


<Figure size 660x420 with 1 Axes>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.transforms import ScaledTranslation

def get_color_palette(n: int):
    """Paleta discreta profesional (tab10/tab20)."""
    if n <= 10:
        cmap = plt.get_cmap("tab10")
        return [cmap(i) for i in range(n)]
    if n <= 20:
        cmap = plt.get_cmap("tab20")
        return [cmap(i) for i in range(n)]
    cmap = plt.get_cmap("hsv", n)
    return [cmap(i) for i in range(n)]

def plot_city_ranking_safe(df, metric="recall", topk=10, title=None, out_base=None, rotate=25):
    d = df.sort_values(metric, ascending=False).head(topk).copy()

    fig, ax = plt.subplots(figsize=(11.5, 4.6), constrained_layout=False)
    fig.subplots_adjust(top=0.84, bottom=0.28, left=0.08, right=0.98)

    x = np.arange(len(d))
    vals = d[metric].astype(float).values

    colors = get_color_palette(len(d))

    bars = ax.bar(
        x, vals,
        edgecolor="black",
        linewidth=1.6,
        color=colors,
        alpha=0.85,
        zorder=3
    )

    maxv = float(np.nanmax(vals)) if len(vals) else 0.0
    top = maxv + max(0.12, 0.18 * maxv)
    ax.set_ylim(0, min(1.2, top))

    text_offset = ScaledTranslation(0, 10/72, fig.dpi_scale_trans)

    for b, val in zip(bars, vals):
        ax.text(
            b.get_x() + b.get_width()/2,
            val,
            f"{val:.2f}",
            transform=ax.transData + text_offset,
            ha="center",
            va="bottom",
            fontsize=13,
            zorder=5,
            clip_on=False,
            color="black"
        )

    ax.set_title(
        title or f"v7_full (obstáculo) — Top-{topk} ciudades por {metric}",
        fontsize=30, fontfamily="serif", pad=18
    )

    ax.set_ylabel(metric, fontsize=18)
    ax.set_xticks(x)
    ax.set_xticklabels(d["city"].tolist(), rotation=rotate, ha="right", fontsize=18)

    ax.grid(True, axis="y", linestyle=":", linewidth=0.9, alpha=0.30, zorder=0)

    for s in ax.spines.values():
        s.set_visible(True)
        s.set_linewidth(2.0)

    plt.show()

    if out_base:
        out_base_color = str(out_base) + "_color"
        fig.savefig(out_base_color + ".pdf", bbox_inches="tight", pad_inches=0.35, dpi=300)
        fig.savefig(out_base_color + ".png", bbox_inches="tight", pad_inches=0.35, dpi=300)

    plt.close(fig)

plot_city_ranking_safe(city_metrics_df, metric="recall", topk=10, out_base=EVAL_CITY_ROOT/"top10_cities_recall")
plot_city_ranking_safe(city_metrics_df, metric="precision", topk=10, out_base=EVAL_CITY_ROOT/"top10_cities_precision")
plot_city_ranking_safe(city_metrics_df, metric="mAP50", topk=10, out_base=EVAL_CITY_ROOT/"top10_cities_map50")
plot_city_ranking_safe(city_metrics_df, metric="mAP50-95", topk=10, out_base=EVAL_CITY_ROOT/"top10_cities_map5095")

<Figure size 1150x460 with 1 Axes>

<Figure size 1150x460 with 1 Axes>

<Figure size 1150x460 with 1 Axes>

<Figure size 1150x460 with 1 Axes>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.transforms import ScaledTranslation

fig, ax = plt.subplots(figsize=(8.2, 5.0), constrained_layout=False)
fig.subplots_adjust(top=0.86, bottom=0.16, left=0.12, right=0.98)

x = city_metrics_df["recall"].astype(float).values
y = city_metrics_df["precision"].astype(float).values
cities = city_metrics_df["city"].astype(str).values

ax.scatter(
    x, y,
    s=85,
    facecolors="white",
    edgecolors="black",
    linewidths=1.8,
    zorder=3
)

xmin, xmax = np.nanmin(x), np.nanmax(x)
ymin, ymax = np.nanmin(y), np.nanmax(y)
xpad = max(0.02, 0.10 * (xmax - xmin))
ypad = max(0.02, 0.12 * (ymax - ymin))
ax.set_xlim(max(0.0, xmin - xpad), min(1.0, xmax + xpad))
ax.set_ylim(max(0.0, ymin - ypad), min(1.0, ymax + ypad))

text_offset = ScaledTranslation(6/72, 3/72, fig.dpi_scale_trans)
for xi, yi, city in zip(x, y, cities):
    ax.text(
        xi, yi, city,
        transform=ax.transData + text_offset,
        fontsize=13,
        fontfamily="serif",
        ha="left",
        va="center",
        clip_on=False,
        zorder=5
    )

ax.set_title("v7_full (obstáculo) — Precision vs Recall por ciudad",
             fontsize=28, fontfamily="serif", pad=14)
ax.set_xlabel("recall", fontsize=20)
ax.set_ylabel("precision", fontsize=20)

ax.grid(True, linestyle=":", linewidth=0.9, alpha=0.35, zorder=0)

for s in ax.spines.values():
    s.set_visible(True)
    s.set_linewidth(2.0)

ax.tick_params(axis="both", labelsize=16)

plt.show()

# Guardado SIN espacios raros
fig.savefig(EVAL_CITY_ROOT / "scatter_precision_vs_recall_cities_pretty.pdf",
            bbox_inches="tight", pad_inches=0.15, dpi=300)
fig.savefig(EVAL_CITY_ROOT / "scatter_precision_vs_recall_cities_pretty.png",
            bbox_inches="tight", pad_inches=0.15, dpi=300)
plt.close(fig)

<Figure size 820x500 with 1 Axes>

In [None]:
import random
import numpy as np
from pathlib import Path
from PIL import Image
import matplotlib.pyplot as plt

IMG_DIR  = r"C:\Users\joano\Desktop\trabajo_final_master\data\images\annotated_images"
IMAGE_NAME = "gsv-chicago-86-Obstacle.png"

OUT_DIR = "augmented_outputs"
Path(OUT_DIR).mkdir(parents=True, exist_ok=True)

AUGS = {
    "da_geom_only": {
        "hsv_h": 0.0, "hsv_s": 0.0, "hsv_v": 0.0,
        "degrees": 2, "translate": 0.08, "scale": 0.5, "perspective": 0.0005,
        "fliplr": 0.5, "flipud": 0.0,
    },
    "da_color_only": {
        "hsv_h": 0.015, "hsv_s": 0.6, "hsv_v": 0.35,
        "degrees": 0.5, "translate": 0.02, "scale": 0.2, "perspective": 0.0,
        "fliplr": 0.5, "flipud": 0.0,
    },
    "da_baseline_soft_hsv": {
        "hsv_h": 0.01, "hsv_s": 0.35, "hsv_v": 0.20,
        "degrees": 2, "translate": 0.08, "scale": 0.5, "perspective": 0.0005,
        "fliplr": 0.5, "flipud": 0.0,
    },
}

def find_image_path(img_dir: str, image_name: str) -> Path:
    p = Path(img_dir) / image_name
    if p.exists():
        return p
    matches = list(Path(img_dir).rglob(image_name))
    if matches:
        return matches[0]
    raise FileNotFoundError(f"Image '{image_name}' not found in '{img_dir}' (or its subfolders).")

def _perspective_coeffs(dst_pts, src_pts):
    A, B = [], []
    for (x, y), (u, v) in zip(dst_pts, src_pts):
        A.append([x, y, 1, 0, 0, 0, -u*x, -u*y])
        A.append([0, 0, 0, x, y, 1, -v*x, -v*y])
        B.append(u)
        B.append(v)
    A = np.array(A, dtype=np.float64)
    B = np.array(B, dtype=np.float64)
    coeffs = np.linalg.lstsq(A, B, rcond=None)[0]
    return coeffs.tolist()

def apply_hsv_jitter(img: Image.Image, hsv_h: float, hsv_s: float, hsv_v: float) -> Image.Image:
    if hsv_h == 0 and hsv_s == 0 and hsv_v == 0:
        return img

    hsv = img.convert("HSV")
    h, s, v = hsv.split()

    h_np = np.array(h, dtype=np.int16)
    s_np = np.array(s, dtype=np.float32)
    v_np = np.array(v, dtype=np.float32)

    dh = int(round(random.uniform(-hsv_h, hsv_h) * 255))
    h_np = (h_np + dh) % 256
    h_np = h_np.astype(np.uint8)

    sat_gain = random.uniform(1 - hsv_s, 1 + hsv_s) if hsv_s > 0 else 1.0
    val_gain = random.uniform(1 - hsv_v, 1 + hsv_v) if hsv_v > 0 else 1.0

    s_np = np.clip(s_np * sat_gain, 0, 255).astype(np.uint8)
    v_np = np.clip(v_np * val_gain, 0, 255).astype(np.uint8)

    out = Image.merge("HSV", (Image.fromarray(h_np),
                              Image.fromarray(s_np),
                              Image.fromarray(v_np))).convert("RGB")
    return out

def apply_geometric(img: Image.Image, degrees: float, translate: float, scale: float,
                    perspective: float, fliplr: float, flipud: float,
                    fill=(114,114,114)) -> Image.Image:
    w, h = img.size

    if fliplr > 0 and random.random() < fliplr:
        img = img.transpose(Image.FLIP_LEFT_RIGHT)
    if flipud > 0 and random.random() < flipud:
        img = img.transpose(Image.FLIP_TOP_BOTTOM)

    angle = random.uniform(-degrees, degrees) if degrees else 0.0
    sc = 1.0 + random.uniform(-scale, scale) if scale else 1.0
    dx = random.uniform(-translate, translate) * w if translate else 0.0
    dy = random.uniform(-translate, translate) * h if translate else 0.0

    cx, cy = w / 2.0, h / 2.0
    rad = np.deg2rad(angle)
    cos_a, sin_a = np.cos(rad), np.sin(rad)

    C1 = np.array([[1, 0, -cx],
                   [0, 1, -cy],
                   [0, 0, 1]], dtype=np.float64)

    RS = np.array([[sc*cos_a, -sc*sin_a, 0],
                   [sc*sin_a,  sc*cos_a, 0],
                   [0, 0, 1]], dtype=np.float64)

    C2 = np.array([[1, 0, cx + dx],
                   [0, 1, cy + dy],
                   [0, 0, 1]], dtype=np.float64)

    forward = C2 @ RS @ C1
    inv = np.linalg.inv(forward)
    a, b, c = inv[0, 0], inv[0, 1], inv[0, 2]
    d, e, f = inv[1, 0], inv[1, 1], inv[1, 2]

    img = img.transform((w, h), Image.AFFINE, (a, b, c, d, e, f),
                        resample=Image.BICUBIC, fillcolor=fill)

    if perspective and perspective > 0:
        jitter = perspective * max(w, h)
        src = [(0,0), (w,0), (w,h), (0,h)]
        dst = [(0 + random.uniform(-jitter, jitter), 0 + random.uniform(-jitter, jitter)),
               (w + random.uniform(-jitter, jitter), 0 + random.uniform(-jitter, jitter)),
               (w + random.uniform(-jitter, jitter), h + random.uniform(-jitter, jitter)),
               (0 + random.uniform(-jitter, jitter), h + random.uniform(-jitter, jitter))]
        coeffs = _perspective_coeffs(dst, src)
        img = img.transform((w, h), Image.PERSPECTIVE, coeffs,
                            resample=Image.BICUBIC, fillcolor=fill)

    return img

def apply_aug_variant(img: Image.Image, cfg: dict, seed: int) -> Image.Image:
    random.seed(seed)
    np.random.seed(seed)

    out = img.copy()
    out = apply_geometric(
        out,
        degrees=cfg["degrees"],
        translate=cfg["translate"],
        scale=cfg["scale"],
        perspective=cfg["perspective"],
        fliplr=cfg["fliplr"],
        flipud=cfg["flipud"],
    )
    out = apply_hsv_jitter(out, cfg["hsv_h"], cfg["hsv_s"], cfg["hsv_v"])
    return out

def show_single(title: str, im: Image.Image):
    plt.figure(figsize=(8, 6))
    plt.imshow(im)
    plt.title(title)
    plt.axis("off")
    plt.show()

img_path = find_image_path(IMG_DIR, IMAGE_NAME)
img0 = Image.open(img_path).convert("RGB")

variants = [
    ("GEOM", "da_geom_only", 13),
    ("COLOR", "da_color_only", 21),
    ("COMBINED", "da_baseline_soft_hsv", 37),
]

show_single("ORIGINAL", img0)

saved_paths = []

for tag, key, seed in variants:
    img_aug = apply_aug_variant(img0, AUGS[key], seed=seed)

    fname = f"{Path(IMAGE_NAME).stem}_{tag}_{key}.png"
    out_path = Path(OUT_DIR) / fname
    img_aug.save(out_path)
    saved_paths.append(out_path)

    show_single(f"{tag} ({key})", img_aug)

print("Saved:")
for p in saved_paths:
    print(" -", p)

<Figure size 800x600 with 1 Axes>

<Figure size 800x600 with 1 Axes>

<Figure size 800x600 with 1 Axes>

<Figure size 800x600 with 1 Axes>

Saved:
 - augmented_outputs\gsv-chicago-86-Obstacle_GEOM_da_geom_only.png
 - augmented_outputs\gsv-chicago-86-Obstacle_COLOR_da_color_only.png
 - augmented_outputs\gsv-chicago-86-Obstacle_COMBINED_da_baseline_soft_hsv.png
