In [1]:
import cv2
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import os

In [2]:
def extract_objects(image_path, min_area=1000):
    """
    Extract objects from an image using contours.

    Args:
        image_path (str): Path to the image.
        min_area (int): Minimum area of the contour to be considered an object.
    
    Returns:
        list: List of object images.
    """

    # Read the image
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError("Unable to read the image")

    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Detect edges using Canny
    edges = cv2.Canny(gray, 50, 150)

    # Dilate edges to connect broken lines
    kernel = np.ones((3,3), np.uint8)
    dilated = cv2.dilate(edges, kernel, iterations=2)

    # Find contours
    contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    objects = []
    for i, contour in enumerate(contours):
        # Filter out small contours
        if cv2.contourArea(contour) < min_area:
            continue
        
        # Get bounding rectangle
        x, y, w, h = cv2.boundingRect(contour)
        
        # Add padding and ensure within image bounds
        x = max(0, x)
        y = max(0, y)
        w = min(image.shape[1] - x, w)
        h = min(image.shape[0] - y, h)
        
        # Extract object image
        object_img = image[y:y+h, x:x+w]
        objects.append(object_img)

    return objects

In [13]:
def template_matching_with_mask(main_image_path, objects, mask_threshold=230, scales=np.linspace(0.4, 1.0, 20)):
    """
    Perform template matching with mask on multiple object images.
    
    Args:
    main_image_path (str): Path to the main image.
    objects (list): List of object images.
    mask_threshold (int): Threshold for creating the mask.
    scales (numpy.ndarray): Array of scale factors for resizing.
    
    Returns:
    list: List of best match details for each object (position, size, correlation).
    """
    main_image = cv2.imread(main_image_path)
    if main_image is None:
        raise ValueError("Unable to read the main image")
    
    main_gray = cv2.cvtColor(main_image, cv2.COLOR_BGR2GRAY)
    best_matches = []

    for i, obj in enumerate(objects):
        object_gray = cv2.cvtColor(obj, cv2.COLOR_BGR2GRAY)
        object_mask = (object_gray < mask_threshold).astype(np.uint8)
        
        best_match = find_best_match(main_gray, object_gray, object_mask, scales)
        best_matches.append(best_match)
        draw_result(main_image, best_match, i)

    task_index = main_image_path[-5]
    result_name = f'result_{task_index}.jpg'
    cv2.imwrite(result_name, main_image)
    return best_matches

def find_best_match(main_gray, object_gray, object_mask, scales):
    """
    Find the best match for an object in the main image across different scales.
    
    Args:
    main_gray (numpy.ndarray): Grayscale main image.
    object_gray (numpy.ndarray): Grayscale object image.
    object_mask (numpy.ndarray): Binary mask of the object.
    scales (numpy.ndarray): Array of scale factors for resizing.
    
    Returns:
    tuple: Best match details (position, size, correlation).
    """
    best_val = float('inf')
    best_match = None

    for scale in scales:
        resized_object = cv2.resize(object_gray, None, fx=scale, fy=scale)
        resized_mask = cv2.resize(object_mask, None, fx=scale, fy=scale)
        h, w = resized_object.shape

        result = cv2.matchTemplate(main_gray, resized_object, cv2.TM_SQDIFF_NORMED, mask=resized_mask)
        min_val, _, min_loc, _ = cv2.minMaxLoc(result)

        if min_val < best_val:
            best_val = min_val
            best_match = (min_loc, (h, w), 1 - min_val)

    return best_match

def draw_result(main_image, match, i):
    """
    Draw the best match result on the main image.
    
    Args:
    main_image (numpy.ndarray): Main image to draw on.
    match (tuple): Match details (position, size, correlation score).
    i (int): Object index.
    """
    pt, (h, w), score = match
    cv2.rectangle(main_image, pt, (pt[0] + w, pt[1] + h), (0, 0, 0), 5)
    cv2.putText(main_image, f"object_{i} ({score:.2f})", (pt[0], pt[1] - 10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 0), 3)

def overlaps(match1, match2, overlap_threshold=0.5):
    """
    Check if two matches overlap.
    
    Args:
    match1, match2 (tuple): Match details.
    overlap_threshold (float): Threshold for considering overlap.
    
    Returns:
    bool: True if matches overlap, False otherwise.
    """
    _, (x1, y1), (h1, w1), _ = match1
    _, (x2, y2), (h2, w2), _ = match2
    
    intersection_width = max(0, min(x1 + w1, x2 + w2) - max(x1, x2))
    intersection_height = max(0, min(y1 + h1, y2 + h2) - max(y1, y2))
    intersection_area = intersection_width * intersection_height
    
    area1 = w1 * h1
    area2 = w2 * h2
    
    overlap_ratio = intersection_area / min(area1, area2)
    
    return overlap_ratio > overlap_threshold



In [14]:
# Task 1
objects_1 = extract_objects('objects_to_find_1.jpg')
main_image_path_1 = 'main_image_1.jpg'
results = template_matching_with_mask(main_image_path_1, objects_1)

In [15]:
# Task 2
objects_2 = extract_objects('objects_to_find_2.jpg')
main_image_path_2 = 'main_image_2.jpg'
results = template_matching_with_mask(main_image_path_2, objects_2)