In [4]:
import os
import json
from collections import defaultdict
import numpy as np

# ============================================================
# CONFIG
# ============================================================
dataset_path = "./flatbug-dataset/cao2022"

GT_JSON = os.path.join(dataset_path, "instances_default.json")
PRED_JSON = os.path.join(dataset_path, "sam3_results_tiled.json")   # or SAM3 seg file

IOU_THRESHOLD = 0.2
USE_SEGMENTATION = True     # set True if evaluating segmentation IoU

# ============================================================
# IOU FUNCTIONS
# ============================================================
def bbox_iou(b1, b2):
    x1,y1,w1,h1=b1
    x2,y2,w2,h2=b2
    xa=max(x1,x2); ya=max(y1,y2)
    xb=min(x1+w1,x2+w2); yb=min(y1+h1,y2+h2)
    inter=max(0,xb-xa)*max(0,yb-ya)
    union=w1*h1 + w2*h2 - inter
    return inter/union if union>0 else 0

def seg_mask(seg, H, W):
    """Polygon -> mask or RLE -> mask. Same logic as your previous script."""
    if seg is None:
        return None
    if isinstance(seg, list):
        mask = np.zeros((H, W), dtype=np.uint8)
        for poly in seg:
            try:
                pts = np.array(poly).reshape(-1,2).astype(np.int32)
                cv2.fillPoly(mask, [pts], 1)
            except:
                continue
        return mask
    if isinstance(seg, dict):
        try:
            from pycocotools import mask as mu
            return mu.decode(seg)
        except:
            return None
    return None

def seg_iou(m1, m2):
    inter = np.logical_and(m1, m2).sum()
    union = np.logical_or(m1, m2).sum()
    return inter / union if union > 0 else 0


# ============================================================
# LOAD JSON
# ============================================================
gt = json.load(open(GT_JSON))
pred = json.load(open(PRED_JSON))

# group by image
gt_by_image = defaultdict(list)
pred_by_image = defaultdict(list)

for g in gt["annotations"]:
    gt_by_image[g["image_id"]].append(g)

for p in pred["annotations"]:
    pred_by_image[p["image_id"]].append(p)

# image sizes (needed for seg IoU only)
sizes = {im["id"]: (im["height"], im["width"]) for im in gt["images"]}

# ============================================================
# TRACK FN / FP per image
# ============================================================
FN_images = defaultdict(int)
FP_images = defaultdict(int)

FN_details = defaultdict(list)
FP_details = defaultdict(list)

TP = 0
FP = 0
FN = 0

# ============================================================
# MAIN LOOP
# ============================================================
for img_id in gt_by_image.keys():

    gt_objs = gt_by_image[img_id]
    pred_objs = sorted(pred_by_image.get(img_id, []), 
                       key=lambda x: x.get("score",1.0), reverse=True)

    matched_gt = set()

    # prepare segmentation masks if needed
    if USE_SEGMENTATION:
        H,W = sizes[img_id]

        gt_masks = [seg_mask(g["segmentation"], H, W) for g in gt_objs]
        pred_masks = [seg_mask(p["segmentation"], H, W) for p in pred_objs]

    # -----------------------------------------
    # match predictions to GT
    # -----------------------------------------
    for pi, p in enumerate(pred_objs):

        best_iou = 0
        best_gt = None

        for gi, g in enumerate(gt_objs):
            if gi in matched_gt:
                continue
            ##if p["category_id"] != g["category_id"]:
              ##  continue

            if USE_SEGMENTATION:
                iou_val = seg_iou(pred_masks[pi], gt_masks[gi])
            else:
                iou_val = bbox_iou(p["bbox"], g["bbox"])

            if iou_val > best_iou:
                best_iou = iou_val
                best_gt = gi

        if best_iou >= IOU_THRESHOLD:
            matched_gt.add(best_gt)
            TP += 1
        else:
            FP += 1
            FP_images[img_id] += 1
            FP_details[img_id].append({
                "pred_id": p["id"],
                "pred_bbox": p.get("bbox"),
                "score": p.get("score",1.0),
                "best_iou": best_iou
            })

    # -----------------------------------------
    # GT not matched → FN
    # -----------------------------------------
    for gi, g in enumerate(gt_objs):
        if gi not in matched_gt:
            FN += 1
            FN_images[img_id] += 1
            FN_details[img_id].append({
                "gt_id": g["id"],
                "gt_bbox": g.get("bbox"),
                "category_id": g["category_id"]
            })


# ============================================================
# DISPLAY SUMMARY
# ============================================================
print("\n=====================")
print("FINAL SUMMARY")
print("=====================")
print("TP =", TP)
print("FP =", FP)
print("FN =", FN)

print("\nTop images with most FN:")
sorted_FN = sorted(FN_images.items(), key=lambda x: x[1], reverse=True)
for img_id, count in sorted_FN[:10]:
    print(f" image {img_id}: {count} FN")

print("\nTop images with most FP:")
sorted_FP = sorted(FP_images.items(), key=lambda x: x[1], reverse=True)
for img_id, count in sorted_FP[:10]:
    print(f" image {img_id}: {count} FP")

# ============================================================
# SAVE DETAILS TO JSON
# ============================================================
output = {
    "TP": TP,
    "FP": FP,
    "FN": FN,
    "FN_images": FN_images,
    "FP_images": FP_images,
    "FN_details": FN_details,
    "FP_details": FP_details,
}

# convert defaultdict → normal dict
output = json.loads(json.dumps(output, default=lambda x: dict(x)))

with open("debug_failures.json", "w") as f:
    json.dump(output, f, indent=4)

print("\nSaved detailed debug info → debug_failures.json")


# ============================================================
# DISPLAY SUMMARY
# ============================================================
print("\n=====================")
print("FINAL SUMMARY")
print("=====================")
print("TP =", TP)
print("FP =", FP)
print("FN =", FN)

# ----------------------------
# METRICS
# ----------------------------
precision = TP / (TP + FP) if (TP + FP) > 0 else 0
recall = TP / (TP + FN) if (TP + FN) > 0 else 0
f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0

print("\n--- METRICS ---")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1 Score:  {f1:.4f}")

print("\nTop images with most FN:")
sorted_FN = sorted(FN_images.items(), key=lambda x: x[1], reverse=True)
for img_id, count in sorted_FN[:10]:
    print(f" image {img_id}: {count} FN")

print("\nTop images with most FP:")
sorted_FP = sorted(FP_images.items(), key=lambda x: x[1], reverse=True)
for img_id, count in sorted_FP[:10]:
    print(f" image {img_id}: {count} FP")

# ============================================================
# SAVE DETAILS TO JSON
# ============================================================
output = {
    "TP": TP,
    "FP": FP,
    "FN": FN,
    "precision": precision,
    "recall": recall,
    "f1": f1,
    "FN_images": FN_images,
    "FP_images": FP_images,
    "FN_details": FN_details,
    "FP_details": FP_details,
}

# convert defaultdict → normal dict
output = json.loads(json.dumps(output, default=lambda x: dict(x)))

with open("debug_failures.json", "w") as f:
    json.dump(output, f, indent=4)

print("\nSaved detailed debug info → debug_failures.json")



FINAL SUMMARY
TP = 590
FP = 229
FN = 11

Top images with most FN:
 image 9: 3 FN
 image 45: 2 FN
 image 3: 1 FN
 image 16: 1 FN
 image 20: 1 FN
 image 24: 1 FN
 image 35: 1 FN
 image 40: 1 FN

Top images with most FP:
 image 8: 8 FP
 image 33: 8 FP
 image 59: 8 FP
 image 3: 7 FP
 image 57: 7 FP
 image 18: 6 FP
 image 24: 6 FP
 image 28: 6 FP
 image 40: 6 FP
 image 43: 6 FP

Saved detailed debug info → debug_failures.json

FINAL SUMMARY
TP = 590
FP = 229
FN = 11

--- METRICS ---
Precision: 0.7204
Recall:    0.9817
F1 Score:  0.8310

Top images with most FN:
 image 9: 3 FN
 image 45: 2 FN
 image 3: 1 FN
 image 16: 1 FN
 image 20: 1 FN
 image 24: 1 FN
 image 35: 1 FN
 image 40: 1 FN

Top images with most FP:
 image 8: 8 FP
 image 33: 8 FP
 image 59: 8 FP
 image 3: 7 FP
 image 57: 7 FP
 image 18: 6 FP
 image 24: 6 FP
 image 28: 6 FP
 image 40: 6 FP
 image 43: 6 FP

Saved detailed debug info → debug_failures.json


In [1]:
import os
import json
import numpy as np
from PIL import Image, ImageDraw
from pycocotools import mask as maskUtils

def debug_visualize_annotation(dataset_path, image_id, output_folder="debug_output"):
    """
    dataset_path: path to dataset folder (contains images/ and instances_default.json)
    image_id: integer or string (without extension)
    """
    print("===================================")
    print(f" Debug Visualizer")
    print(f" Dataset: {dataset_path}")
    print(f" Image ID: {image_id}")
    print("===================================")

    # -------------------------------
    # Load JSON
    # -------------------------------
    json_path = os.path.join(dataset_path, "instances_default.json")

    if not os.path.exists(json_path):
        print(f"ERROR: JSON not found: {json_path}")
        return
    
    with open(json_path, "r") as f:
        gt = json.load(f)

    # -------------------------------
    # Find the image entry
    # -------------------------------
    image_entry = None
    for img in gt["images"]:
        if str(img["id"]) == str(image_id):
            image_entry = img
            break

    if image_entry is None:
        print(f"ERROR: image_id {image_id} not found in JSON.")
        return
    
    img_path = os.path.join(dataset_path, image_entry["file_name"])

    if not os.path.exists(img_path):
        print(f"ERROR: image file missing: {img_path}")
        return

    # Load image
    img = Image.open(img_path).convert("RGBA")
    draw = ImageDraw.Draw(img, "RGBA")

    # -------------------------------
    # Find GT annotations for this image
    # -------------------------------
    ann_for_image = [ann for ann in gt["annotations"] if ann["image_id"] == image_entry["id"]]

    if len(ann_for_image) == 0:
        print("WARNING: No annotations found for this image.")
        return

    print(f"Found {len(ann_for_image)} annotations.")

    # -------------------------------
    # Create mask canvas
    # -------------------------------
    W, H = image_entry["width"], image_entry["height"]
    mask_canvas = Image.new("RGBA", (W, H), (0, 0, 0, 0))
    mask_draw = ImageDraw.Draw(mask_canvas, "RGBA")

    # -------------------------------
    # Draw annotations
    # -------------------------------
    for ann in ann_for_image:
        # Draw bounding box
        x, y, w, h = ann["bbox"]
        draw.rectangle([x, y, x + w, y + h], outline=(0, 255, 0, 255), width=3)

        # Draw segmentation mask if polygon
        seg = ann["segmentation"]
        
        if isinstance(seg, list):  # polygon
            for poly in seg:
                mask_draw.polygon(poly, outline=(255, 0, 0, 255), fill=(255, 0, 0, 80))
        else:
            # if RLE encoded
            rle = seg
            if isinstance(rle, dict) and "counts" in rle:
                m = maskUtils.decode(rle)
                colored_mask = Image.new("RGBA", (W, H), (255, 0, 0, 0))
                pixels = colored_mask.load()
                for i in range(W):
                    for j in range(H):
                        if m[j, i] == 1:
                            pixels[i, j] = (255, 0, 0, 80)
                img = Image.alpha_composite(img, colored_mask)

    # Combine mask with image
    img = Image.alpha_composite(img, mask_canvas)

    # -------------------------------
    # Save output
    # -------------------------------
    out_dir = os.path.join(output_folder, os.path.basename(dataset_path))
    os.makedirs(out_dir, exist_ok=True)

    out_path = os.path.join(out_dir, f"{image_id}.png")
    img.save(out_path)

    print(f"Saved visualization → {out_path}")
    print("Done.")



In [3]:
debug_visualize_annotation(
    dataset_path="./flatbug-dataset/AMT",
    image_id=104
)

 Debug Visualizer
 Dataset: ./flatbug-dataset/AMT
 Image ID: 104
Found 22 annotations.
Saved visualization → debug_output/AMT/104.png
Done.


In [1]:
import os
import json
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from pycocotools import mask as maskUtils


def visualize_gt_vs_pred(dataset_path,
                         sam3_image_id,
                         gt_json="instances_default.json",
                         pred_json="sam3_results.json",
                         output_folder="debug_output"):

    print("=============================================")
    print(" VISUALIZING GT vs SAM3 PREDICTIONS")
    print(f" Dataset: {dataset_path}")
    print(f" SAM3 Image ID: {sam3_image_id}")
    print("=============================================")

    # -----------------------
    # Load GT
    # -----------------------
    gt_path = os.path.join(dataset_path, gt_json)
    with open(gt_path, "r") as f:
        gt = json.load(f)

    # -----------------------
    # Load SAM3
    # -----------------------
    pred_path = os.path.join(dataset_path, pred_json)
    if not os.path.exists(pred_path):
        print("ERROR: SAM3 predictions file not found:", pred_path)
        return

    with open(pred_path, "r") as f:
        preds_raw = json.load(f)

    # Flexible SAM3 format
    if isinstance(preds_raw, list):
        preds = preds_raw
    elif "annotations" in preds_raw:
        preds = preds_raw["annotations"]
    elif "predictions" in preds_raw:
        preds = preds_raw["predictions"]
    else:
        raise ValueError("Cannot detect SAM3 format")

    # ---------------------------------------------------
    # NEW: Extract file_name from SAM3 first
    # ---------------------------------------------------
    sam3_items = [p for p in preds if str(p.get("image_id")) == str(sam3_image_id)]

    if len(sam3_items) == 0:
        print("ERROR: No SAM3 predictions found for image_id:", sam3_image_id)
        print("Available first few:",
              [p.get("image_id") for p in preds[:10]])
        return

    file_name = sam3_items[0].get("file_name")
    if not file_name:
        print("ERROR: SAM3 entries missing `file_name`")
        return

    print(f"→ SAM3 file_name found: {file_name}")

    # ---------------------------------------------------
    # Find matching GT image by file_name
    # ---------------------------------------------------
    img_entry = next((img for img in gt["images"]
                      if img["file_name"] == file_name), None)

    if img_entry is None:
        print("ERROR: GT image not found for file_name:", file_name)
        return

    W, H = img_entry["width"], img_entry["height"]
    img_path = os.path.join(dataset_path, file_name)

    if not os.path.exists(img_path):
        print("ERROR: Image missing:", img_path)
        return

    base_img = Image.open(img_path).convert("RGBA")
    draw = ImageDraw.Draw(base_img, "RGBA")

    mask_canvas = Image.new("RGBA", (W, H), (0, 0, 0, 0))
    mask_draw = ImageDraw.Draw(mask_canvas, "RGBA")

    GT_COLOR = (0, 255, 0, 180)
    PRED_COLOR = (0, 120, 255, 180)

    try:
        font = ImageFont.truetype("arial.ttf", 22)
    except:
        font = ImageFont.load_default()

    # ---------------------------------------------------
    # GT annotations matching correct GT image
    # ---------------------------------------------------
    gt_anns = [a for a in gt["annotations"]
               if a["image_id"] == img_entry["id"]]

    # ---------------------------------------------------
    # SAM3 annotations matching file_name (BEST)
    # ---------------------------------------------------
    pred_anns = [a for a in preds if a.get("file_name") == file_name]

    print(f"GT objects  : {len(gt_anns)}")
    print(f"SAM3 objects: {len(pred_anns)}")

    debug_info = {
        "file_name": file_name,
        "gt": [],
        "sam3": []
    }

    # ---------------------------------------------------
    # Draw GT
    # ---------------------------------------------------
    for ann in gt_anns:

        seg = ann["segmentation"]
        debug_info["gt"].append({
            "bbox": ann["bbox"],
            "segmentation": ann["segmentation"],
            "area": ann.get("area")
        })

        if isinstance(seg, list):
            # polygon
            for poly in seg:
                mask_draw.polygon(poly, fill=(0,255,0,80))
        else:
            # RLE
            mask = maskUtils.decode(seg)
            ys, xs = np.where(mask == 1)
            for x, y in zip(xs, ys):
                mask_canvas.putpixel((x, y), (0,255,0,80))

        x, y, w, h = ann["bbox"]
        draw.rectangle([x, y, x+w, y+h], outline=GT_COLOR, width=3)
        draw.text((x, y - 12), "GT", fill=(0,255,0), font=font)

    # ---------------------------------------------------
    # Draw SAM3
    # ---------------------------------------------------
    for ann in pred_anns:
        seg = ann["segmentation"]

        debug_info["sam3"].append({
            "bbox": ann["bbox"],
            "segmentation": ann["segmentation"],
            "score": ann.get("score"),
            "area": ann.get("area")
        })

        if isinstance(seg, list):
            for poly in seg:
                mask_draw.polygon(poly, fill=(0,120,255,80))
        else:
            if isinstance(seg, dict) and "counts" in seg:
                mask = maskUtils.decode(seg)
                ys, xs = np.where(mask == 1)
                for x, y in zip(xs, ys):
                    mask_canvas.putpixel((x, y), (0,120,255,80))

        x, y, w, h = ann["bbox"]
        draw.rectangle([x, y, x+w, y+h], outline=PRED_COLOR, width=3)
        draw.text((x, y - 12), "SAM3", fill=(0,120,255), font=font)

    # ---------------------------------------------------
    # Merge
    # ---------------------------------------------------
    final_img = Image.alpha_composite(base_img, mask_canvas)

    out_dir = os.path.join(output_folder, os.path.basename(dataset_path))
    os.makedirs(out_dir, exist_ok=True)

    png_path = os.path.join(out_dir, f"{sam3_image_id}.png")
    json_path = os.path.join(out_dir, f"{sam3_image_id}.json")

    final_img.save(png_path)
    with open(json_path, "w") as f:
        json.dump(debug_info, f, indent=2)

    print("Saved:", png_path)
    print("Saved:", json_path)
    print("Done.")


In [4]:
visualize_gt_vs_pred(
  "flatbug-dataset/cao2022/",
   13,
)

 VISUALIZING GT vs SAM3 PREDICTIONS
 Dataset: flatbug-dataset/cao2022/
 SAM3 Image ID: 13
→ SAM3 file_name found: 000100.jpg
GT objects  : 10
SAM3 objects: 10
Saved: debug_output/13.png
Saved: debug_output/13.json
Done.


In [6]:
import os
import json
from collections import defaultdict
import numpy as np
import cv2

# ============================================================
# CONFIG
# ============================================================
dataset_path = "./flatbug-dataset/cao2022"

GT_JSON = os.path.join(dataset_path, "instances_default.json")
PRED_JSON = os.path.join(dataset_path, "sam3_results.json")

IOU_THRESHOLD = 0.5


# ============================================================
# MASK UTILITIES
# ============================================================
def polygons_to_mask(polygons, height, width):
    mask = np.zeros((height, width), dtype=np.uint8)
    for poly in polygons:
        try:
            pts = np.array(poly, dtype=np.int32).reshape(-1, 2)
            cv2.fillPoly(mask, [pts], 1)
        except Exception:
            continue
    return mask


def seg_to_mask(segmentation, height, width):
    """Convert COCO segmentation (polygons or RLE) to binary mask."""
    if segmentation is None:
        return None

    # polygon format
    if isinstance(segmentation, list):
        if len(segmentation) == 0:
            return np.zeros((height, width), dtype=np.uint8)
        return polygons_to_mask(segmentation, height, width)

    # RLE
    if isinstance(segmentation, dict):
        try:
            from pycocotools import mask as mask_utils
            return mask_utils.decode(segmentation).astype(np.uint8)
        except Exception:
            return None

    return None


def mask_iou(m1, m2):
    inter = np.logical_and(m1, m2).sum()
    union = np.logical_or(m1, m2).sum()
    return float(inter) / float(union) if union > 0 else 0.0


# ============================================================
# LOAD JSON
# ============================================================
gt = json.load(open(GT_JSON))
pred = json.load(open(PRED_JSON))

print("Loaded GT:", len(gt["annotations"]))
print("Loaded Pred:", len(pred["annotations"]))


# ============================================================
# GROUP BY FILE NAME
# ============================================================
gt_by_file = defaultdict(list)
sizes = {}

for im in gt["images"]:
    sizes[im["file_name"]] = (im["height"], im["width"])
for g in gt["annotations"]:
    file_name = next(im["file_name"] for im in gt["images"] if im["id"] == g["image_id"])
    gt_by_file[file_name].append(g)

pred_by_file = defaultdict(list)
for p in pred["annotations"]:
    file_name = p.get("file_name")
    if file_name:
        pred_by_file[file_name].append(p)


# ============================================================
# DEBUG COUNTERS
# ============================================================
TP = FP = FN = 0
FN_images = defaultdict(int)
FP_images = defaultdict(int)

FN_details = defaultdict(list)
FP_details = defaultdict(list)


# ============================================================
# MAIN LOOP
# ============================================================
for file_name, gt_objs in gt_by_file.items():

    if file_name not in sizes:
        print(f"Skipping missing image size: {file_name}")
        continue

    H, W = sizes[file_name]
    pred_objs = pred_by_file.get(file_name, [])

    # Construct masks
    gt_masks = [seg_to_mask(g.get("segmentation"), H, W) for g in gt_objs]
    gt_cats = [g["category_id"] for g in gt_objs]

    pred_masks = [seg_to_mask(p.get("segmentation"), H, W) for p in pred_objs]
    pred_cats = [p["category_id"] for p in pred_objs]
    pred_scores = [p.get("score", 1.0) for p in pred_objs]

    matched_gt = set()

    # Sort predictions by confidence
    order = sorted(range(len(pred_objs)), key=lambda i: pred_scores[i], reverse=True)

    for pi in order:
        pmask = pred_masks[pi]
        if pmask is None:
            FP += 1
            FP_images[file_name] += 1
            FP_details[file_name].append({
                "pred_bbox": pred_objs[pi].get("bbox"),
                "score": pred_scores[pi],
                "reason": "mask_none"
            })
            continue

        pcat = pred_cats[pi]

        best_iou = 0.0
        best_gt = None

        for gi, (gmask, gcat) in enumerate(zip(gt_masks, gt_cats)):
            if gi in matched_gt:
                continue
            if gmask is None:
                continue
            ## if pcat != gcat:
            ##   continue

            iou = mask_iou(pmask, gmask)
            if iou > best_iou:
                best_iou = iou
                best_gt = gi

        if best_iou >= IOU_THRESHOLD and best_gt is not None:
            matched_gt.add(best_gt)
            TP += 1
        else:
            FP += 1
            FP_images[file_name] += 1
            FP_details[file_name].append({
                "pred_bbox": pred_objs[pi].get("bbox"),
                "score": pred_scores[pi],
                "best_iou": best_iou
            })

    # Remaining unmatched GT = FN
    for gi, g in enumerate(gt_objs):
        if gi not in matched_gt:
            FN += 1
            FN_images[file_name] += 1
            FN_details[file_name].append({
                "gt_bbox": g.get("bbox"),
                "category_id": g["category_id"]
            })


# ============================================================
# SUMMARY
# ============================================================
print("\n=====================")
print(" FINAL SUMMARY")
print("=====================")
print("TP =", TP)
print("FP =", FP)
print("FN =", FN)

print("\nTop images with most FN:")
for fn in sorted(FN_images.items(), key=lambda x: x[1], reverse=True)[:10]:
    print(" ", fn)

print("\nTop images with most FP:")
for fp in sorted(FP_images.items(), key=lambda x: x[1], reverse=True)[:10]:
    print(" ", fp)


# ============================================================
# SAVE TO JSON
# ============================================================
output = {
    "TP": TP,
    "FP": FP,
    "FN": FN,
    "FN_images": dict(FN_images),
    "FP_images": dict(FP_images),
    "FN_details": {k: v for k, v in FN_details.items()},
    "FP_details": {k: v for k, v in FP_details.items()},
}

with open("debug_failures_segmentation.json", "w") as f:
    json.dump(output, f, indent=4)

print("\nSaved → debug_failures_segmentation.json")


Loaded GT: 601
Loaded Pred: 628

 FINAL SUMMARY
TP = 280
FP = 348
FN = 321

Top images with most FN:
  ('000053.jpg', 10)
  ('000103.jpg', 10)
  ('000253.jpg', 10)
  ('000002.jpg', 9)
  ('000077.jpg', 9)
  ('000301.jpg', 9)
  ('000303.jpg', 9)
  ('000325.jpg', 9)
  ('000153.jpg', 8)
  ('000202.jpg', 8)

Top images with most FP:
  ('000002.jpg', 10)
  ('000053.jpg', 10)
  ('000253.jpg', 10)
  ('000325.jpg', 10)
  ('000077.jpg', 9)
  ('000103.jpg', 9)
  ('000275.jpg', 9)
  ('000303.jpg', 9)
  ('000928.jpg', 9)
  ('000003.jpg', 8)

Saved → debug_failures_segmentation.json


In [15]:
import os
import json
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from pycocotools import mask as maskUtils


# ===========================================================
# Decode segmentation mask
# ===========================================================
def decode_mask(segmentation, height, width):
    """Decode COCO RLE or polygon segmentation to binary mask."""
    if isinstance(segmentation, list):   # polygon
        rle = maskUtils.frPyObjects(segmentation, height, width)
        rle = maskUtils.merge(rle)
    else:                                # RLE
        rle = segmentation
    return maskUtils.decode(rle)


# ===========================================================
# Draw bounding box + label (Pillow 10+ compatible)
# ===========================================================
def draw_bbox(draw, bbox, outline_color, label_text, label_bg, font):
    x, y, w, h = bbox
    x2, y2 = x + w, y + h

    # rectangle
    draw.rectangle([x, y, x2, y2], outline=outline_color, width=3)

    # compute text size (new Pillow API)
    try:
        l, t, r, b = font.getbbox(label_text)
        tw, th = r - l, b - t
    except:
        # fallback old versions
        tw, th = draw.textsize(label_text, font=font)

    # label background
    draw.rectangle([x, y - th - 2, x + tw + 4, y], fill=label_bg)

    # label text
    draw.text((x + 2, y - th - 2), label_text, fill="white", font=font)


# ===========================================================
# Overlay segmentation mask
# ===========================================================
def overlay_segmentation(image, mask, color):
    rgba = Image.new("RGBA", image.size)
    overlay = ImageDraw.Draw(rgba)

    ys, xs = np.where(mask == 1)

    # semi-transparent
    for x, y in zip(xs, ys):
        overlay.point((x, y), fill=color + (90,))   # RGBA with alpha

    return Image.alpha_composite(image.convert("RGBA"), rgba)


# ===========================================================
# Main visualization function
# ===========================================================
def visualize_selected_images(
    dataset_path,
    gt_json_path,
    pred_json_path,
    output_folder,
    selected_images
):
    os.makedirs(output_folder, exist_ok=True)

    # Load JSONs
    with open(gt_json_path, "r") as f:
        gt_data = json.load(f)
    with open(pred_json_path, "r") as f:
        pred_data = json.load(f)

    # Build image_id → file_name dictionary
    gt_id_to_file = {img["id"]: img["file_name"] for img in gt_data["images"]}
    pred_id_to_file = {img["id"]: img["file_name"] for img in pred_data["images"]}

    # Build file_name → annotations map
    gt_by_file = {}
    pred_by_file = {}

    for ann in gt_data["annotations"]:
        fname = gt_id_to_file[ann["image_id"]]
        gt_by_file.setdefault(fname, []).append(ann)

    for ann in pred_data["annotations"]:
        fname = pred_id_to_file[ann["image_id"]]
        pred_by_file.setdefault(fname, []).append(ann)

    # Load font
    try:
        font = ImageFont.truetype("arial.ttf", 16)
    except:
        font = ImageFont.load_default()

    # Colors
    GT_COLOR = (0, 255, 0)      # green
    SAM_COLOR = (0, 100, 255)   # blue

    # Process each selected image
    for file_name in selected_images:
        print(f"Processing {file_name} ...")

        img_path = os.path.join(dataset_path, file_name)
        if not os.path.exists(img_path):
            print(" ❌ Image missing:", img_path)
            continue

        image = Image.open(img_path).convert("RGBA")
        H, W = image.height, image.width

        # all annotations for that file
        gt_anns = gt_by_file.get(file_name, [])
        pred_anns = pred_by_file.get(file_name, [])

        # JSON output
        debug_json = {
            "file_name": file_name,
            "gt": [],
            "sam3": []
        }

        draw = ImageDraw.Draw(image)

        # ---- GT Overlays ----
        for ann in gt_anns:
            mask = decode_mask(ann["segmentation"], H, W)
            image = overlay_segmentation(image, mask, GT_COLOR)
            draw_bbox(draw, ann["bbox"], GT_COLOR, "GT", (0, 150, 0), font)

            debug_json["gt"].append({
                "bbox": ann["bbox"],
                "segmentation": ann["segmentation"]
            })

        # ---- SAM3 Overlays ----
        for ann in pred_anns:
            mask = decode_mask(ann["segmentation"], H, W)
            image = overlay_segmentation(image, mask, SAM_COLOR)
            draw_bbox(draw, ann["bbox"], SAM_COLOR, "SAM3", (0, 60, 180), font)

            debug_json["sam3"].append({
                "bbox": ann["bbox"],
                "segmentation": ann["segmentation"]
            })

        # Save visualization
        out_img_path = os.path.join(output_folder, file_name)
        image.convert("RGB").save(out_img_path)

        # Save JSON
        debug_json_path = os.path.join(output_folder, file_name.replace(".jpg", ".json"))
        with open(debug_json_path, "w") as f:
            json.dump(debug_json, f, indent=2)

    print("\n✅ DONE — Output saved in:", output_folder)


In [1]:
## debug output with label of BB and seg mask of both sam3 and gt

import os
import json
import numpy as np
import cv2
from pycocotools import mask as maskUtils

# ---------- CONFIG ----------
dataset_path = "flatbug-dataset/Mothitor"
gt_json_path = os.path.join(dataset_path, "instances_default.json")
pred_json_path = os.path.join(dataset_path, "sam3_results_pyramid.json")
output_folder = "debug_output/Mothitor"
selected_images = ["2023-07-06_22_20_03.jpg", "2023-07-07_23_00_03.jpg", "2023-08-29_23_00_17.jpg"]

os.makedirs(output_folder, exist_ok=True)

# Colors (BGR) and alpha
GT_COLOR = (0, 200, 0)      # green
SAM_COLOR = (220, 100, 10)  # orange-ish blue alternative in BGR
ALPHA = 0.45                # mask transparency

FONT = cv2.FONT_HERSHEY_SIMPLEX
FONT_SCALE = 0.6
FONT_THICK = 1


# ---------- utils ----------
def decode_mask(seg, H, W):
    """
    seg: polygon list or RLE dict (COCO format)
    returns: binary mask (H,W) dtype=np.uint8 with 1 where object is
    """
    if seg is None:
        return None
    # polygon(s)
    if isinstance(seg, list):
        # pycocotools expects a list of polygons; frPyObjects handles lists of polys
        try:
            rles = maskUtils.frPyObjects(seg, H, W)
            if isinstance(rles, list):
                rle = maskUtils.merge(rles)
            else:
                rle = rles
            mask = maskUtils.decode(rle)
            # decode returns shape (H,W) or (H,W,1)
            if mask.ndim == 3:
                mask = mask[:, :, 0]
            return (mask > 0).astype(np.uint8)
        except Exception:
            # fallback: try rasterizing polygons with cv2
            mask = np.zeros((H, W), dtype=np.uint8)
            for poly in seg:
                try:
                    pts = np.array(poly, dtype=np.int32).reshape(-1, 2)
                    cv2.fillPoly(mask, [pts], 1)
                except Exception:
                    continue
            return mask
    # RLE dict
    if isinstance(seg, dict):
        try:
            mask = maskUtils.decode(seg)
            if mask.ndim == 3:
                mask = mask[:, :, 0]
            return (mask > 0).astype(np.uint8)
        except Exception:
            return None
    return None


def blend_mask_on_image(image, mask, color, alpha=0.45):
    """
    image: BGR uint8
    mask: binary uint8 (H,W)
    color: BGR tuple
    returns blended image (modified copy)
    """
    if mask is None:
        return image
    overlay = image.copy().astype(np.float32)
    colored = np.zeros_like(overlay, dtype=np.uint8)
    colored[:, :] = color
    # apply only where mask==1
    mask3 = np.stack([mask] * 3, axis=-1).astype(bool)
    overlay[mask3] = overlay[mask3] * (1 - alpha) + colored[mask3] * alpha
    return overlay.astype(np.uint8)


def draw_bbox_with_label(image, bbox, color, label):
    """
    image: BGR uint8
    bbox: [x,y,w,h]
    draws rectangle and label on image in-place
    """
    x, y, w, h = bbox
    x1, y1 = int(x), int(y)
    x2, y2 = int(x + w), int(y + h)

    # rectangle
    cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness=2)

    # label background: compute text size
    ((tw, th), _) = cv2.getTextSize(label, FONT, FONT_SCALE, FONT_THICK)
    # ensure label sits above box; if not enough space, put inside top-left corner of box
    label_x1 = x1
    label_y1 = y1 - th - 6
    label_y2 = y1
    if label_y1 < 0:
        label_y1 = y1
        label_y2 = y1 + th + 6

    cv2.rectangle(image, (label_x1, label_y1), (label_x1 + tw + 8, label_y2), color, thickness=-1)
    # put white text
    text_x = label_x1 + 4
    text_y = label_y2 - 4 if label_y1 < label_y2 else label_y1 + th + 2
    cv2.putText(image, label, (text_x, text_y), FONT, FONT_SCALE, (255, 255, 255), FONT_THICK, lineType=cv2.LINE_AA)


# ---------- load JSONs and build lookups ----------
with open(gt_json_path, "r") as f:
    gt = json.load(f)
with open(pred_json_path, "r") as f:
    pred = json.load(f)

# Build id->file and file->image info
gt_id_to_file = {img["id"]: img["file_name"] for img in gt["images"]}
gt_file_to_size = {img["file_name"]: (img.get("height"), img.get("width")) for img in gt["images"]}

pred_id_to_file = {img["id"]: img["file_name"] for img in pred["images"]}
pred_file_to_size = {img["file_name"]: (img.get("height"), img.get("width")) for img in pred["images"]}

gt_by_file = {}
for ann in gt["annotations"]:
    fname = gt_id_to_file.get(ann["image_id"])
    if fname is None:
        continue
    gt_by_file.setdefault(fname, []).append(ann)

pred_by_file = {}
for ann in pred["annotations"]:
    fname = pred_id_to_file.get(ann["image_id"])
    if fname is None:
        continue
    pred_by_file.setdefault(fname, []).append(ann)


# ---------- process selected images ----------
for file_name in selected_images:
    img_path = os.path.join(dataset_path, file_name)
    print("Processing:", file_name)
    if not os.path.exists(img_path):
        print(" Missing image:", img_path)
        continue

    # read image (BGR)
    img = cv2.imread(img_path)
    if img is None:
        print(" Failed to read:", img_path)
        continue
    H, W = img.shape[:2]

    # copy for drawing
    vis = img.copy()

    # collect debug info
    debug_json = {"file_name": file_name, "gt": [], "sam3": []}

    # draw GT first (so SAM3 overlays on top if overlapping)
    for ann in gt_by_file.get(file_name, []):
        bbox = ann.get("bbox")
        seg = ann.get("segmentation")
        mask = decode_mask(seg, H, W) if seg is not None else None

        if mask is not None:
            vis = blend_mask_on_image(vis, mask, GT_COLOR, alpha=ALPHA)
        if bbox:
            draw_bbox_with_label(vis, bbox, GT_COLOR, "GT")

        debug_json["gt"].append({
            "id": ann.get("id"),
            "bbox": bbox,
            "category_id": ann.get("category_id"),
            "has_mask": mask is not None,
            "segmentation": seg
        })

    # draw SAM3 predictions
    for ann in pred_by_file.get(file_name, []):
        bbox = ann.get("bbox")
        seg = ann.get("segmentation")
        mask = decode_mask(seg, H, W) if seg is not None else None

        if mask is not None:
            vis = blend_mask_on_image(vis, mask, SAM_COLOR, alpha=ALPHA)
        if bbox:
            draw_bbox_with_label(vis, bbox, SAM_COLOR, "SAM3")

        debug_json["sam3"].append({
            "id": ann.get("id"),
            "bbox": bbox,
            "score": ann.get("score"),
            "category_id": ann.get("category_id"),
            "has_mask": mask is not None,
            "segmentation": seg
        })

    # write output image and JSON
    out_img_path = os.path.join(output_folder, file_name)
    cv2.imwrite(out_img_path, vis)

    out_json_path = os.path.join(output_folder, file_name.replace(".jpg", ".json"))
    with open(out_json_path, "w") as jf:
        json.dump(debug_json, jf, indent=2)

    print(" Saved:", out_img_path, "and", out_json_path)

print("Done.")


Processing: 2023-07-06_22_20_03.jpg.jpg
 Missing image: flatbug-dataset/Mothitor/2023-07-06_22_20_03.jpg.jpg
Processing: 2023-07-07_23_00_03.jpg
 Saved: debug_output/cao2022/2023-07-07_23_00_03.jpg and debug_output/cao2022/2023-07-07_23_00_03.json
Processing: 2023-08-29_23_00_17.jpg
 Saved: debug_output/cao2022/2023-08-29_23_00_17.jpg and debug_output/cao2022/2023-08-29_23_00_17.json
Done.


In [2]:
import os
import json
import cv2
import numpy as np
from collections import defaultdict

# ============================================================
# CONFIG
# ============================================================
root_dataset = "./flatbug-dataset"

# List of datasets to evaluate
datasets_to_eval = [
    "abram2023", "ALUS", "amarathunga2022", "AMI-traps", "AMT", "anTraX",
    "ArTaxOr", "biodiscover-arm", "BIOSCAN", "cao2022", "CollembolAI",
    "Diopsis", "DIRT", "DiversityScanner", "gernat2018", "Mothitor",
    "NHM-beetles-crops", "PeMaToEuroPep", "pinoy2023", "sittinger2023",
    "sticky-pi", "ubc-pitfall-traps", "ubc-scanned-sticky-cards"
]

IOU_THRESHOLD = 0.2
USE_SEGMENTATION = True      # set True if evaluating segmentation IoU

# ============================================================
# IOU HELPER FUNCTIONS
# ============================================================
def bbox_iou(b1, b2):
    x1, y1, w1, h1 = b1
    x2, y2, w2, h2 = b2
    xa = max(x1, x2); ya = max(y1, y2)
    xb = min(x1+w1, x2+w2); yb = min(y1+h1, y2+h2)
    inter = max(0, xb - xa) * max(0, yb - ya)
    union = w1*h1 + w2*h2 - inter
    return inter / union if union > 0 else 0

def seg_mask(seg, H, W):
    if seg is None: return None
    if isinstance(seg, list):
        mask = np.zeros((H, W), dtype=np.uint8)
        for poly in seg:
            try:
                pts = np.array(poly).reshape(-1, 2).astype(np.int32)
                cv2.fillPoly(mask, [pts], 1)
            except: continue
        return mask
    if isinstance(seg, dict):
        try:
            from pycocotools import mask as mu
            return mu.decode(seg)
        except: return None
    return None

def seg_iou(m1, m2):
    inter = np.logical_and(m1, m2).sum()
    union = np.logical_or(m1, m2).sum()
    return inter / union if union > 0 else 0

# ============================================================
# GLOBAL ACCUMULATORS
# ============================================================
global_fp_scores = []
dataset_stats = {}

print(f"{'DATASET':<30} | {'FP COUNT':<10} | {'MEAN FP SCORE':<15}")
print("-" * 65)

# ============================================================
# MAIN DATASET LOOP
# ============================================================
for dataset_name in datasets_to_eval:
    dataset_path = os.path.join(root_dataset, dataset_name)
    
    GT_JSON = os.path.join(dataset_path, "instances_default.json")
    PRED_JSON = os.path.join(dataset_path, "sam3_results_tiled.json")

    # Skip if files don't exist
    if not os.path.exists(GT_JSON) or not os.path.exists(PRED_JSON):
        # print(f"⚠️  Skipping {dataset_name} (files missing)")
        continue

    # Load JSONs
    with open(GT_JSON) as f: gt = json.load(f)
    with open(PRED_JSON) as f: pred = json.load(f)

    # Group by image
    gt_by_image = defaultdict(list)
    pred_by_image = defaultdict(list)
    sizes = {im["id"]: (im["height"], im["width"]) for im in gt["images"]}

    for g in gt["annotations"]:
        gt_by_image[g["image_id"]].append(g)

    for p in pred["annotations"]:
        pred_by_image[p["image_id"]].append(p)

    # Local accumulators for this dataset
    local_fp_scores = []

    # -----------------------------------------
    # PROCESS IMAGES IN CURRENT DATASET
    # -----------------------------------------
    for img_id in gt_by_image.keys():
        gt_objs = gt_by_image[img_id]
        # Sort predictions by score descending
        pred_objs = sorted(pred_by_image.get(img_id, []), 
                           key=lambda x: x.get("score", 0.0), reverse=True)

        matched_gt = set()

        # Prepare masks if segmentation is enabled
        if USE_SEGMENTATION:
            H, W = sizes[img_id]
            gt_masks = [seg_mask(g["segmentation"], H, W) for g in gt_objs]
            pred_masks = [seg_mask(p["segmentation"], H, W) for p in pred_objs]

        # Match predictions
        for pi, p in enumerate(pred_objs):
            best_iou = 0
            best_gt = None

            for gi, g in enumerate(gt_objs):
                if gi in matched_gt: continue
                # Optional: check category_id if strict class matching is needed
                # if p.get("category_id") != g.get("category_id"): continue

                if USE_SEGMENTATION:
                    iou_val = seg_iou(pred_masks[pi], gt_masks[gi])
                else:
                    iou_val = bbox_iou(p["bbox"], g["bbox"])

                if iou_val > best_iou:
                    best_iou = iou_val
                    best_gt = gi

            # ----------------------------------
            # DETERMINE TP / FP
            # ----------------------------------
            score = p.get("score", 0.0)

            if best_iou >= IOU_THRESHOLD:
                # True Positive
                matched_gt.add(best_gt)
            else:
                # False Positive
                local_fp_scores.append(score)
                global_fp_scores.append(score)

    # Calculate mean for this dataset
    if local_fp_scores:
        mean_score = np.mean(local_fp_scores)
        count = len(local_fp_scores)
    else:
        mean_score = 0.0
        count = 0

    dataset_stats[dataset_name] = {
        "count": count,
        "mean_score": mean_score
    }

    print(f"{dataset_name:<30} | {count:<10} | {mean_score:.4f}")

# ============================================================
# FINAL GLOBAL REPORT
# ============================================================
print("-" * 65)
print("\n==========================================")
print(" GLOBAL FP SCORE ANALYSIS ")
print("==========================================")

if global_fp_scores:
    global_mean = np.mean(global_fp_scores)
    global_median = np.median(global_fp_scores)
    global_min = np.min(global_fp_scores)
    global_max = np.max(global_fp_scores)
    
    print(f"Total FPs analyzed:   {len(global_fp_scores)}")
    print(f"Global Mean FP Score: {global_mean:.4f}")
    print(f"Global Median Score:  {global_median:.4f}")
    print(f"Score Range:          {global_min:.4f} - {global_max:.4f}")
else:
    print("No False Positives found across any dataset.")

# ============================================================
# OPTIONAL: SAVE RAW SCORES
# ============================================================
# If you want to plot a histogram later, saving the raw list is useful
with open("fp_scores_analysis.json", "w") as f:
    json.dump({
        "global_mean": global_mean if global_fp_scores else 0,
        "dataset_stats": dataset_stats,
        "all_fp_scores": global_fp_scores
    }, f, indent=4)

print("\nSaved detailed analysis to → fp_scores_analysis.json")

DATASET                        | FP COUNT   | MEAN FP SCORE  
-----------------------------------------------------------------
cao2022                        | 229        | 0.6248
-----------------------------------------------------------------

 GLOBAL FP SCORE ANALYSIS 
Total FPs analyzed:   229
Global Mean FP Score: 0.6248
Global Median Score:  0.5977
Score Range:          0.4531 - 0.9062

Saved detailed analysis to → fp_scores_analysis.json


In [None]:
# ============================================================
# find mean sam3 score of FP
# ============================================================

# List of datasets to evaluate
datasets_to_eval = [
    "amarathunga2022", "biodiscover-arm",
    "NHM-beetles-crops", "sittinger2023", "gernat2018",
    ## "PeMaToEuroPep", "ArTaxOr", "BIOSCAN",
    ##"sticky-pi", "pinoy2023", "cao2022", "Diopsis", "DIRT",  "Mothitor", "abram2023", "ALUS",
    ## "AMI-traps", "AMT", "anTraX",
    ## "CollembolAI", "DiversityScanner", "ubc-pitfall-traps", "ubc-scanned-sticky-cards",
]

In [6]:
import os
import json
import cv2
import numpy as np
from collections import defaultdict

# ============================================================
# CONFIGURATION
# ============================================================
root_dataset = "./flatbug-dataset"

datasets_to_eval = [
    ## "amarathunga2022",
    ## "NHM-beetles-crops", "sittinger2023", "gernat2018", "cao2022"
     "PeMaToEuroPep", "ArTaxOr", "BIOSCAN", "Mothitor", "DIRT", "abram2023",
     "Diopsis", "AMI-traps", "AMT", "anTraX", "biodiscover-arm",
    ## "ALUS",
    ##"sticky-pi", "pinoy2023",
    ## "CollembolAI", "DiversityScanner", "ubc-pitfall-traps", "ubc-scanned-sticky-cards",
]

IOU_THRESHOLD = 0.5 

# ============================================================
# UTILITIES
# ============================================================
def normalize_name(fname):
    """Strips paths to ensure matching works: 'data/img.jpg' -> 'img.jpg'"""
    return os.path.basename(fname) if fname else None

def polygons_to_mask(polygons, height, width):
    mask = np.zeros((height, width), dtype=np.uint8)
    for poly in polygons:
        if not poly: continue
        try:
            pts = np.array(poly, dtype=np.int32).reshape(-1, 2)
            cv2.fillPoly(mask, [pts], 1)
        except: continue
    return mask

def seg_to_mask(segmentation, height, width):
    if segmentation is None or not segmentation:
        return np.zeros((height, width), dtype=np.uint8)
    if isinstance(segmentation, list):
        if isinstance(segmentation[0], (int, float)): return np.zeros((height, width), dtype=np.uint8)
        return polygons_to_mask(segmentation, height, width)
    if isinstance(segmentation, dict):
        try:
            from pycocotools import mask as mask_utils
            return mask_utils.decode(segmentation).astype(np.uint8)
        except: return np.zeros((height, width), dtype=np.uint8)
    return np.zeros((height, width), dtype=np.uint8)

def mask_iou(mask1, mask2):
    inter = np.logical_and(mask1, mask2).sum()
    union = np.logical_or(mask1, mask2).sum()
    return float(inter) / float(union) if union > 0 else 0.0

# ============================================================
# MAIN LOOP
# ============================================================
global_fp_scores = []
dataset_stats = {}

print(f"{'DATASET':<25} | {'FP COUNT':<10} | {'MEAN SCORE':<10}")
print("-" * 60)

for dataset_name in datasets_to_eval:
    dataset_path = os.path.join(root_dataset, dataset_name)
    gt_file = os.path.join(dataset_path, "instances_default.json")
    
    # Attempt to find sam3 results
    pred_file = os.path.join(dataset_path, "sam3_results_tiled.json")
    if not os.path.exists(pred_file):
        pred_file = os.path.join(dataset_path, "sam3_results.json")

    if not os.path.exists(gt_file) or not os.path.exists(pred_file):
        # print(f"Skipping {dataset_name} (files missing)")
        continue

    # Load JSONs
    with open(gt_file, 'r') as f: gt = json.load(f)
    with open(pred_file, 'r') as f: pred = json.load(f)

    # -------------------------------------------------
    # DEBUG: CHECK FILE MAPPING
    # -------------------------------------------------
    # Map GT ID -> File Name (Normalized)
    gt_id_to_info = {}
    gt_filenames_set = set()
    
    for im in gt.get("images", []):
        norm_name = normalize_name(im["file_name"])
        gt_id_to_info[im["id"]] = {
            "file_name": norm_name,
            "height": im["height"],
            "width": im["width"]
        }
        gt_filenames_set.add(norm_name)

    # Map Pred ID -> File Name (Normalized)
    pred_id_to_filename = {}
    for im in pred.get("images", []):
        pred_id_to_filename[im["id"]] = normalize_name(im["file_name"])

    # -------------------------------------------------
    # ORGANIZE ANNOTATIONS BY FILE NAME
    # -------------------------------------------------
    gt_by_file = defaultdict(list)
    for ann in gt.get("annotations", []):
        img_id = ann.get("image_id")
        if img_id in gt_id_to_info:
            fname = gt_id_to_info[img_id]["file_name"]
            gt_by_file[fname].append(ann)

    pred_by_file = defaultdict(list)
    pred_filenames_set = set()
    
    for ann in pred.get("annotations", []):
        # Try to get filename from annotation, fallback to image_id lookup
        fname = normalize_name(ann.get("file_name"))
        if not fname:
            fname = pred_id_to_filename.get(ann.get("image_id"))
        
        if fname:
            pred_by_file[fname].append(ann)
            pred_filenames_set.add(fname)

    # -------------------------------------------------
    # LOUD DEBUGGING
    # -------------------------------------------------
    common_files = gt_filenames_set.intersection(pred_filenames_set)
    
    if len(common_files) == 0:
        print(f"\n[DEBUG] ⚠️  {dataset_name}: NO MATCHING FILES FOUND!")
        print(f"   GT Sample:   {list(gt_filenames_set)[:3]}")
        print(f"   Pred Sample: {list(pred_filenames_set)[:3]}")
        continue # Skip this dataset if no files match
        
    # Optional: Print match stats if suspicious
    # print(f"[DEBUG] {dataset_name}: Matched {len(common_files)} images.")

    # -------------------------------------------------
    # EVALUATION LOGIC
    # -------------------------------------------------
    local_fp_scores = []
    
    for file_name in common_files:
        gt_objs = gt_by_file[file_name]
        pred_objs = pred_by_file[file_name]
        
        if not pred_objs: continue

        H = gt_id_to_info[next(k for k,v in gt_id_to_info.items() if v["file_name"] == file_name)]["height"]
        W = gt_id_to_info[next(k for k,v in gt_id_to_info.items() if v["file_name"] == file_name)]["width"]

        # Convert GT to masks
        gt_masks = [seg_to_mask(g.get("segmentation"), H, W) for g in gt_objs]
        
        # Prepare Predictions
        # Sort by score desc
        pred_data = []
        for p in pred_objs:
            mask = seg_to_mask(p.get("segmentation"), H, W)
            score = p.get("score", 0.0)
            pred_data.append({"mask": mask, "score": score})
        
        pred_data.sort(key=lambda x: x["score"], reverse=True)

        matched_gt_indices = set()

        for p in pred_data:
            p_mask = p["mask"]
            best_iou = 0.0
            best_gt_idx = None

            for i, g_mask in enumerate(gt_masks):
                if i in matched_gt_indices: continue
                iou = mask_iou(p_mask, g_mask)
                if iou > best_iou:
                    best_iou = iou
                    best_gt_idx = i
            
            if best_iou >= IOU_THRESHOLD:
                matched_gt_indices.add(best_gt_idx)
            else:
                # This is a FP
                local_fp_scores.append(p["score"])
                global_fp_scores.append(p["score"])

    # Stats
    count = len(local_fp_scores)
    mean_score = np.mean(local_fp_scores) if count > 0 else 0.0
    
    print(f"{dataset_name:<25} | {count:<10} | {mean_score:.4f}")

# ============================================================
# FINAL SUMMARY
# ============================================================
print("-" * 60)
if global_fp_scores:
    print(f"Global Mean FP Score: {np.mean(global_fp_scores):.4f}")
    print(f"Total FPs: {len(global_fp_scores)}")
else:
    print("No False Positives found.")

DATASET                   | FP COUNT   | MEAN SCORE
------------------------------------------------------------
amarathunga2022           | 23         | 0.6038
biodiscover-arm           | 510        | 0.6246
NHM-beetles-crops         | 848        | 0.7231
sittinger2023             | 65         | 0.6662
gernat2018                | 417        | 0.7228
------------------------------------------------------------
Global Mean FP Score: 0.6926
Total FPs: 1863


In [7]:
# ============================================================
# this script focuses on True Positives (TP). It collects the confidence scores of all successfully matched predictions (TPs) and counts how many of them fall 
## below Mean FP Score threshold calculated (0.6926). 
## These are the detections we would lose (turning them into False Negatives) if we filter by that score.
# ============================================================

import os
import json
import cv2
import numpy as np
from collections import defaultdict

# ============================================================
# CONFIGURATION
# ============================================================
root_dataset = "./flatbug-dataset"

datasets_to_eval = [
    ## "amarathunga2022",
    ## "NHM-beetles-crops", "sittinger2023", "gernat2018", "cao2022"
     "PeMaToEuroPep", "ArTaxOr", "BIOSCAN", "Mothitor", "DIRT", "abram2023",
     "Diopsis", "AMI-traps", "AMT", "anTraX", "biodiscover-arm",
    ## "ALUS",
    ##"sticky-pi", "pinoy2023",
    ## "CollembolAI", "DiversityScanner", "ubc-pitfall-traps", "ubc-scanned-sticky-cards",
]
# The threshold calculated from your previous run (Mean FP Score)
CUTOFF_SCORE = 0.6926

IOU_THRESHOLD = 0.5  # IoU required to call it a TP

# ============================================================
# UTILITIES
# ============================================================
def normalize_name(fname):
    """Strips paths to ensure matching works."""
    return os.path.basename(fname) if fname else None

def polygons_to_mask(polygons, height, width):
    mask = np.zeros((height, width), dtype=np.uint8)
    for poly in polygons:
        if not poly: continue
        try:
            pts = np.array(poly, dtype=np.int32).reshape(-1, 2)
            cv2.fillPoly(mask, [pts], 1)
        except: continue
    return mask

def seg_to_mask(segmentation, height, width):
    if segmentation is None or not segmentation:
        return np.zeros((height, width), dtype=np.uint8)
    if isinstance(segmentation, list):
        if isinstance(segmentation[0], (int, float)): return np.zeros((height, width), dtype=np.uint8)
        return polygons_to_mask(segmentation, height, width)
    if isinstance(segmentation, dict):
        try:
            from pycocotools import mask as mask_utils
            return mask_utils.decode(segmentation).astype(np.uint8)
        except: return np.zeros((height, width), dtype=np.uint8)
    return np.zeros((height, width), dtype=np.uint8)

def mask_iou(mask1, mask2):
    inter = np.logical_and(mask1, mask2).sum()
    union = np.logical_or(mask1, mask2).sum()
    return float(inter) / float(union) if union > 0 else 0.0

# ============================================================
# MAIN LOOP
# ============================================================
print(f"Analyzing impact of raising Confidence Threshold to: {CUTOFF_SCORE}\n")
print(f"{'DATASET':<25} | {'TOTAL TPs':<10} | {'LOST TPs':<10} | {'% LOST':<10}")
print("-" * 65)

total_tps_global = 0
lost_tps_global = 0

for dataset_name in datasets_to_eval:
    dataset_path = os.path.join(root_dataset, dataset_name)
    gt_file = os.path.join(dataset_path, "instances_default.json")
    
    # Try finding sam3 results
    pred_file = os.path.join(dataset_path, "sam3_results_tiled.json")
    if not os.path.exists(pred_file):
        pred_file = os.path.join(dataset_path, "sam3_results.json")

    if not os.path.exists(gt_file) or not os.path.exists(pred_file):
        continue

    # Load JSONs
    with open(gt_file, 'r') as f: gt = json.load(f)
    with open(pred_file, 'r') as f: pred = json.load(f)

    # -------------------------------------------------
    # MAPPING LOGIC (Same as before)
    # -------------------------------------------------
    gt_id_to_info = {}
    for im in gt.get("images", []):
        norm_name = normalize_name(im["file_name"])
        gt_id_to_info[im["id"]] = {
            "file_name": norm_name,
            "height": im["height"],
            "width": im["width"]
        }

    pred_id_to_filename = {im["id"]: normalize_name(im["file_name"]) for im in pred.get("images", [])}

    # Group by File
    gt_by_file = defaultdict(list)
    for ann in gt.get("annotations", []):
        img_id = ann.get("image_id")
        if img_id in gt_id_to_info:
            gt_by_file[gt_id_to_info[img_id]["file_name"]].append(ann)

    pred_by_file = defaultdict(list)
    for ann in pred.get("annotations", []):
        fname = normalize_name(ann.get("file_name"))
        if not fname:
            fname = pred_id_to_filename.get(ann.get("image_id"))
        if fname:
            pred_by_file[fname].append(ann)

    # -------------------------------------------------
    # CALCULATE TP & LOST TP
    # -------------------------------------------------
    dataset_tp_count = 0
    dataset_lost_tp_count = 0
    
    # Only process files that exist in both
    common_files = set(gt_by_file.keys()).intersection(pred_by_file.keys())

    for file_name in common_files:
        gt_objs = gt_by_file[file_name]
        pred_objs = pred_by_file[file_name]
        
        if not pred_objs: continue

        # Get dims
        # (A bit hacky lookup but works given logic above)
        img_info = next(v for v in gt_id_to_info.values() if v["file_name"] == file_name)
        H, W = img_info["height"], img_info["width"]

        # GT Masks
        gt_masks = [seg_to_mask(g.get("segmentation"), H, W) for g in gt_objs]
        
        # Preds (Sorted by score)
        pred_data = []
        for p in pred_objs:
            mask = seg_to_mask(p.get("segmentation"), H, W)
            score = p.get("score", 0.0)
            pred_data.append({"mask": mask, "score": score})
        
        pred_data.sort(key=lambda x: x["score"], reverse=True)

        matched_gt_indices = set()

        # Match TPs
        for p in pred_data:
            p_mask = p["mask"]
            best_iou = 0.0
            best_gt_idx = None

            for i, g_mask in enumerate(gt_masks):
                if i in matched_gt_indices: continue
                iou = mask_iou(p_mask, g_mask)
                if iou > best_iou:
                    best_iou = iou
                    best_gt_idx = i
            
            # CHECK IF TP
            if best_iou >= IOU_THRESHOLD:
                matched_gt_indices.add(best_gt_idx)
                dataset_tp_count += 1
                
                # KEY CHECK: Is this TP score below our cutoff?
                if p["score"] <= CUTOFF_SCORE:
                    dataset_lost_tp_count += 1

    # Stats for this dataset
    pct_lost = (dataset_lost_tp_count / dataset_tp_count * 100) if dataset_tp_count > 0 else 0.0
    
    print(f"{dataset_name:<25} | {dataset_tp_count:<10} | {dataset_lost_tp_count:<10} | {pct_lost:.1f}%")

    total_tps_global += dataset_tp_count
    lost_tps_global += dataset_lost_tp_count

# ============================================================
# FINAL SUMMARY
# ============================================================
print("-" * 65)
if total_tps_global > 0:
    global_pct = (lost_tps_global / total_tps_global * 100)
    print(f"TOTAL TPs FOUND:     {total_tps_global}")
    print(f"TPs LOST (<= {CUTOFF_SCORE}): {lost_tps_global}")
    print(f"GLOBAL LOSS RATE:    {global_pct:.2f}%")
else:
    print("No True Positives found in any dataset.")

Analyzing impact of raising Confidence Threshold to: 0.6926

DATASET                   | TOTAL TPs  | LOST TPs   | % LOST    
-----------------------------------------------------------------
ArTaxOr                   | 1101       | 81         | 7.4%
BIOSCAN                   | 494        | 9          | 1.8%
biodiscover-arm           | 2541       | 1551       | 61.0%
-----------------------------------------------------------------
TOTAL TPs FOUND:     4136
TPs LOST (<= 0.6926): 1641
GLOBAL LOSS RATE:    39.68%
