In [1]:
from pathlib import Path

In [2]:
ref_dir = Path("atypical_examples")
grouped_imgs = {}
for category_dir in ref_dir.iterdir():
    if not category_dir.is_dir() or category_dir.name.startswith("."):
        continue
    img = []
    for img_path in category_dir.iterdir():
        if not img_path.is_file() or img_path.name.startswith("."):
            continue
        img.append(img_path)
    grouped_imgs[category_dir.name] = img

In [None]:
import os
import cv2
import numpy as np
from sklearn.cluster import DBSCAN
import matplotlib.pyplot as plt

# --- CONFIG ---
IMAGES_DIR = "images"         # folder with your 10 images
OUTPUT_DIR = "output"         # will be created, annotated images saved here
REFERENCE_INDEX = 0           # which image to use as reference (0 = first)
ORB_N_FEATURES = 3000
MATCH_DISTANCE_THRESHOLD = 60  # lower stricter; tune if needed
DBSCAN_EPS = 30               # pixels, tune with image resolution
DBSCAN_MIN_SAMPLES = 3
BOX_MARGIN = 10               # pixels to expand bounding box
# ----------------

# Convert to grayscale
grays = [cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) for im in images]
h, w = grays[0].shape[:2]

# Create ORB and detect keypoints/descriptors for reference image
orb = cv2.ORB_create(nfeatures=ORB_N_FEATURES)
kp_ref, des_ref = orb.detectAndCompute(grays[REFERENCE_INDEX], None)
if des_ref is None or len(kp_ref) == 0:
    raise SystemExit("No features found in reference image. Try increasing ORB_N_FEATURES.")

# Match each other image to reference and collect reference keypoint locations for good matches
all_ref_pts = []
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)

for i, g in enumerate(grays):
    if i == REFERENCE_INDEX:
        continue
    kp, des = orb.detectAndCompute(g, None)
    if des is None or len(kp) == 0:
        print(f"Warning: no features in image {image_files[i]} - skipping.")
        continue
    # match descriptors: query = reference descriptors, train = current image descriptors
    matches = bf.match(des_ref, des)
    # keep good matches by distance
    good = [m for m in matches if m.distance < MATCH_DISTANCE_THRESHOLD]
    # optionally take top-K if too many
    good = sorted(good, key=lambda x: x.distance)[:1000]

    # For each good match, get the location of the keypoint in the reference image
    for m in good:
        q_idx = m.queryIdx  # index in des_ref / kp_ref
        pt = kp_ref[q_idx].pt  # (x, y) in reference image coordinates (float)
        all_ref_pts.append(pt)

all_ref_pts = np.array(all_ref_pts)  # shape (N, 2)
if all_ref_pts.shape[0] == 0:
    raise SystemExit("No good matches found across images. Try relaxing MATCH_DISTANCE_THRESHOLD or ORB_N_FEATURES.")

# Cluster the reference points to find the densest region (object location)
clust = DBSCAN(eps=DBSCAN_EPS, min_samples=DBSCAN_MIN_SAMPLES).fit(all_ref_pts)
labels = clust.labels_
unique, counts = np.unique(labels[labels >= 0], return_counts=True)  # ignore noise label -1

if len(unique) == 0:
    # fallback: use bounding box around all matches
    print("No DBSCAN clusters found - falling back to bounding box around all matched points.")
    x_min = int(np.min(all_ref_pts[:, 0]))
    y_min = int(np.min(all_ref_pts[:, 1]))
    x_max = int(np.max(all_ref_pts[:, 0]))
    y_max = int(np.max(all_ref_pts[:, 1]))
else:
    # choose the largest cluster
    largest_label = unique[np.argmax(counts)]
    mask = labels == largest_label
    cluster_pts = all_ref_pts[mask]
    x_min = int(np.min(cluster_pts[:, 0]))
    y_min = int(np.min(cluster_pts[:, 1]))
    x_max = int(np.max(cluster_pts[:, 0]))
    y_max = int(np.max(cluster_pts[:, 1]))

# Expand box a bit
x_min = max(0, x_min - BOX_MARGIN)
y_min = max(0, y_min - BOX_MARGIN)
x_max = min(w - 1, x_max + BOX_MARGIN)
y_max = min(h - 1, y_max + BOX_MARGIN)
box = (x_min, y_min, x_max, y_max)
print("Detected bounding box (x_min, y_min, x_max, y_max):", box)

# Save visualization: plot matched points density and cluster
plt.figure(figsize=(6, 6))
plt.imshow(cv2.cvtColor(images[REFERENCE_INDEX], cv2.COLOR_BGR2RGB))
if all_ref_pts.size:
    plt.scatter(all_ref_pts[:, 0], all_ref_pts[:, 1], s=6, c='yellow', alpha=0.6)
if 'cluster_pts' in locals():
    plt.scatter(cluster_pts[:, 0], cluster_pts[:, 1], s=12, c='red', alpha=0.8)
rect = plt.Rectangle((x_min, y_min), x_max - x_min, y_max - y_min,
                     linewidth=2, edgecolor='cyan', facecolor='none')
plt.gca().add_patch(rect)
plt.title('Reference image with matched points and detected box')
plt.axis('off')
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, 'reference_matched_points_box.png'), dpi=150)
plt.close()

# Apply the same box to all images and save
for fname, im in zip(image_files, images):
    im_out = im.copy()
    cv2.rectangle(im_out, (x_min, y_min), (x_max, y_max), (0, 255, 255), 2)
    # optionally crop & save cropped object
    cropped = im[y_min:y_max, x_min:x_max]
    cv2.imwrite(os.path.join(OUTPUT_DIR, f"boxed_{fname}"), im_out)
    cv2.imwrite(os.path.join(OUTPUT_DIR, f"crop_{fname}"), cropped)

print("Saved annotated images and crops to:", OUTPUT_DIR)


In [3]:
# Cross correlate each image within each category to find the most common part of the images
from skimage import io, feature, color
import numpy as np
import matplotlib.pyplot as plt

def cross_correlate_images(grouped_imgs: dict) -> dict:
    correlated_images = {}
    for category, img_paths in grouped_imgs.items():
        if not img_paths:
            continue
        # Load the first image as a reference
        ref_img = io.imread(img_paths[0])
        ref_img_gray = color.rgb2gray(ref_img)
        # Compute features for the reference image
        ref_features = feature.canny(ref_img_gray)
        # Cross-correlate with other images
        for img_path in img_paths[1:]:
            img = io.imread(img_path)
            img_gray = color.rgb2gray(img)
            img_features = feature.canny(img_gray)
            correlation = np.sum(ref_features * img_features)
            correlated_images[(category, img_path.name)] = correlation
    return correlated_images
correlated_results = cross_correlate_images(grouped_imgs)

ValueError: operands could not be broadcast together with shapes (2983,3961) (2736,3648) 