In [None]:
import json
from pathlib import Path
from collections import defaultdict
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.patches import Polygon
import cv2
import numpy as np
import random
from ipywidgets import interact, IntSlider, Button, HBox, VBox, Dropdown
import pandas as pd

COCO_JSON = Path("../data/new/train/_annotations.coco.json")
IMAGE_DIR = COCO_JSON.parent  
OUT_CSV = Path("../data/new/train_image_label_summary.csv")

if not COCO_JSON.exists():
    raise SystemExit(f"COCO JSON not found: {COCO_JSON}")

coco = json.loads(COCO_JSON.read_text(encoding="utf8"))
images_info = {img["id"]: img for img in coco.get("images", [])}
anns_by_image = defaultdict(list)
for ann in coco.get("annotations", []):
    anns_by_image[ann["image_id"]].append(ann)

def ann_seg_type(ann):
    seg = ann.get("segmentation", None)
    if seg is None:
        return "none"
    if isinstance(seg, dict):
        return "rle"
    if isinstance(seg, list):
        if len(seg) == 0:
            return "none"
        for poly in seg:
            if isinstance(poly, list) and len(poly) >= 6:
                return "poly"
        return "none"
    return "none"

def find_image_path(fname):
    candidates = [
        COCO_JSON.parent / fname,
        COCO_JSON.parent / "images" / fname,
        COCO_JSON.parent / "images" / Path(fname).name,
        Path("data/new") / fname,
        Path("data/new") / "images" / fname,
    ]
    for p in candidates:
        if p.exists():
            return p
    for p in (COCO_JSON.parent).rglob(Path(fname).name):
        if p.exists():
            return p
    return None

rows = []
for img_id, info in images_info.items():
    fname = info.get("file_name")
    img_path = find_image_path(fname)
    anns = anns_by_image.get(img_id, [])
    n_bbox_only = 0
    n_poly = 0
    n_rle = 0
    for a in anns:
        st = ann_seg_type(a)
        if st == "poly":
            n_poly += 1
        elif st == "rle":
            n_rle += 1
        else:
            n_bbox_only += 1
    n_anns = len(anns)
    n_poly_like = n_poly + n_rle
    if n_anns == 0:
        img_type = "no-annotations"
    elif n_poly_like == 0:
        img_type = "bbox-only"
    elif n_bbox_only == 0 and n_poly_like > 0:
        img_type = "poly-only"
    else:
        img_type = "mixed"
    rows.append({
        "image_id": img_id,
        "file_name": fname,
        "image_path": str(img_path) if img_path else "",
        "width": info.get("width"),
        "height": info.get("height"),
        "n_annotations": n_anns,
        "n_bbox_only": n_bbox_only,
        "n_poly": n_poly,
        "n_rle": n_rle,
        "type": img_type
    })

df = pd.DataFrame(rows)
total = len(df)
summary = df['type'].value_counts().to_dict()
print("Summary (train):")
for k in ["poly-only", "bbox-only", "mixed", "no-annotations"]:
    if k in summary:
        print(f"  {k:12s}: {summary[k]:6d}   ({summary[k]/total:.2%})")
print(f"Total images found in JSON: {total}")
df.to_csv(OUT_CSV, index=False)
print(f"Saved CSV summary to: {OUT_CSV}")

if total == 0:
    raise SystemExit("No images in COCO JSON 'images' array.")

def draw_image_by_index(idx, show_bbox=True, show_poly=True):
    row = df.iloc[idx]
    img_path = Path(row["image_path"])
    if not img_path.exists():
        print("Image not found:", row["file_name"], "expected at", row["image_path"])
        return
    img_bgr = cv2.imread(str(img_path))
    if img_bgr is None:
        print("Cannot read image:", img_path)
        return
    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
    h, w = img_rgb.shape[:2]
    fig, ax = plt.subplots(1, figsize=(12,8))
    ax.imshow(img_rgb)
    ax.axis("off")
    ann_list = anns_by_image.get(row["image_id"], [])
    colors = ['red','lime','cyan','yellow','magenta','orange','blue']
    for a in ann_list:
        cat_id = a.get("category_id", None)
        cls = (cat_id - 1) if isinstance(cat_id, int) else 0
        color = colors[int(cls) % len(colors)] if isinstance(cls, int) else "red"
        bbox = a.get("bbox", None)
        if bbox and show_bbox:
            x,y,bw,bh = bbox
            rect = patches.Rectangle((x,y), bw, bh, linewidth=2, edgecolor=color, facecolor='none')
            ax.add_patch(rect)
            label = str(cls) if cls is not None else ""
            ax.text(x, max(y-6,0), label, bbox=dict(facecolor=color, alpha=0.6), color='white', fontsize=10)
        seg = a.get("segmentation", None)
        st = ann_seg_type(a)
        if st == "poly" and show_poly:
            for poly in seg:
                if not isinstance(poly, list) or len(poly) < 6:
                    continue
                pts = [(poly[i], poly[i+1]) for i in range(0, len(poly), 2)]
                poly_patch = Polygon(pts, closed=True, linewidth=1.5, edgecolor=color, facecolor=color, alpha=0.35)
                ax.add_patch(poly_patch)
        elif st == "rle":
            pass
    plt.show()
    print(f"{row['file_name']} — anns: {row['n_annotations']} — type: {row['type']} (bbox-only:{row['n_bbox_only']} poly:{row['n_poly']} rle:{row['n_rle']})")

idx_slider = IntSlider(min=0, max=total-1, step=1, value=0, description='index')
poly_toggle = Dropdown(options=[('Show poly', True), ('Hide poly', False)], value=True, description='poly')
bbox_toggle = Dropdown(options=[('Show bbox', True), ('Hide bbox', False)], value=True, description='bbox')
rand_btn = Button(description='Random', button_style='info')
def on_rand_clicked(b):
    idx_slider.value = random.randrange(total)
rand_btn.on_click(on_rand_clicked)
ui = VBox([HBox([idx_slider, poly_toggle, bbox_toggle, rand_btn])])
display(ui)
out = interact(lambda idx, show_poly, show_bbox: draw_image_by_index(idx, show_bbox=show_bbox, show_poly=show_poly),
               idx=idx_slider, show_poly=poly_toggle, show_bbox=bbox_toggle)

counts = df.groupby("type").agg(
    images=("file_name","count"),
    total_annotations=("n_annotations","sum"),
    bbox_only_ann=("n_bbox_only","sum"),
    poly_ann=("n_poly","sum"),
    rle_ann=("n_rle","sum")
).reset_index()
print("\nCounts by image type:")
display(counts)


Summary (train):
  poly-only   :  36075   (99.95%)
  no-annotations:     17   (0.05%)
Total images found in JSON: 36092


PermissionError: [Errno 13] Permission denied: '..\\data\\new\\train_image_label_summary.csv'