In [None]:
import pandas as pd
import numpy as np
import random

def compute_iou(box1, box2):
    x1_min, y1_min, x1_max, y1_max = box1
    x2_min, y2_min, x2_max, y2_max = box2
    ix_min = max(x1_min, x2_min)
    iy_min = max(y1_min, y2_min)
    ix_max = min(x1_max, x2_max)
    iy_max = min(y1_max, y2_max)
    inter = max(0, ix_max - ix_min) * max(0, iy_max - iy_min)
    area1 = (x1_max - x1_min) * (y1_max - y1_min)
    area2 = (x2_max - x2_min) * (y2_max - y2_min)
    union = area1 + area2 - inter
    return inter/union if union>0 else 0

def fuse_bounding_boxes(df, iou_thresh=0.3):
    fused = []
    for (img, cls), g in df.groupby(["image_id","class_name"]):
        boxes = g[["x_min","y_min","x_max","y_max"]].values.tolist()
        while boxes:
            base = boxes.pop(0)
            merged = True
            while merged:
                merged = False
                survivors = []
                for other in boxes:
                    if compute_iou(base, other) >= iou_thresh:
                        base = [
                            min(base[0], other[0]),
                            min(base[1], other[1]),
                            max(base[2], other[2]),
                            max(base[3], other[3])
                        ]
                        merged = True
                    else:
                        survivors.append(other)
                boxes = survivors
            fused.append([img, cls, *base])
    return pd.DataFrame(fused, columns=["image_id","class_name","x_min","y_min","x_max","y_max"])


df_raw   = pd.read_csv("/content/drive/MyDrive/testing6.csv")
df_fused = fuse_bounding_boxes(df_raw, iou_thresh=0.3)


all_imgs   = df_fused["image_id"].unique().tolist()
target_n   = int(len(all_imgs) * 0.3)
random.seed(42)
random.shuffle(all_imgs)

visual_misses = []
remain_idx    = set(df_fused.index)
picked_imgs   = set()

for img in all_imgs:
    if len(picked_imgs) >= target_n:
        break

    sub = df_fused[df_fused["image_id"] == img]
    idx_list = sub.index.tolist()

    isolated = []
    for i in idx_list:
        box_i = tuple(df_fused.loc[i, ["x_min","y_min","x_max","y_max"]])
        if all(
            compute_iou(box_i,
                        tuple(df_fused.loc[j, ["x_min","y_min","x_max","y_max"]]))
            == 0
            for j in idx_list if j != i
        ):
            isolated.append(i)

    if not isolated:
        continue

    pick = isolated[0]
    visual_misses.append(df_fused.loc[pick])
    remain_idx.remove(pick)
    picked_imgs.add(img)

vm_df  = pd.DataFrame(visual_misses).reset_index(drop=True)
rem_df = df_fused.loc[sorted(remain_idx)].reset_index(drop=True)

print(f"Removed one box from {len(picked_imgs)} images (target was {target_n}).")


In [None]:
vm_df.to_csv("/content/drive/MyDrive/visual_misses.csv",         index=False)
rem_df.to_csv("/content/drive/MyDrive/remaining_annotations.csv", index=False)