In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
import os

## Template matching y descriptores
Objetivos:
1. Obtener una detección del logo en cada imagen sin falsos positivos
2. Plantear y validar un algoritmo para múltiples detecciones en la imagen `coca_multi.png` con el mismo template del ítem 1
3. Generalizar el algoritmo del ítem 2 para todas las imágenes

Visualizar los resultados con bounding boxes en cada imagen mostrando el nivel de confianza de la detección.

### Template e imagenes a analizar

In [None]:
TEMPLATE_PATH = "./template/pattern.png"
IMAGES_PATH = "./images/"

template_color = cv2.imread(TEMPLATE_PATH, cv2.IMREAD_COLOR)
images_color = [
    cv2.imread(IMAGES_PATH + image_name, cv2.IMREAD_COLOR)
    for image_name in os.listdir(path=IMAGES_PATH)
    if not "multi" in image_name
]

### Pre-procesamiento de imagenes
<!-- - Se normaliza la imagen mediante una ecualización del histograma -->
- Conversión a escala de grises
- Aplicación de Gaussian Blue para reducción de ruido
- Detección de límites con Canny

In [None]:
def preprocess_image(input_image: np.ndarray) -> np.ndarray:
    """Encapsulates the pre-processing on images"""
    gray_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2GRAY)
    blurred_image = cv2.GaussianBlur(gray_image, (3, 3), 1)
    # edges_image = cv2.Canny(gray_image, threshold1=50, threshold2=150)
    return blurred_image

In [None]:
fig, axs = plt.subplots(nrows=1, ncols=5)
fig.set_size_inches(20, 5)
template1 = preprocess_image(template_color)
template2 = -preprocess_image(template_color)
template3 = template1[:,40:350]
template4 = template2[:,40:350]
axs[0].imshow(cv2.cvtColor(template_color, cv2.COLOR_BGR2RGB))
axs[1].imshow(template1, cmap="gray")
axs[2].imshow(template2, cmap="gray")
axs[3].imshow(template3, cmap="gray")
axs[4].imshow(template4, cmap="gray")
axs[0].set_title("Original template")
axs[1].set_title("Pre-processed template 1")
axs[2].set_title("Pre-processed template 2")
axs[3].set_title("Pre-processed template 3")
axs[4].set_title("Pre-processed template 4")

In [None]:
input_images = [preprocess_image(image) for image in images_color]

fig, axs = plt.subplots(nrows=2, ncols=len(images_color))
fig.set_size_inches(25, 10)
for ax_color, ax_preprocess, image_color, input_image in zip(
    axs[0][:], axs[1][:], images_color, input_images
):
    ax_color.imshow(cv2.cvtColor(image_color, cv2.COLOR_BGR2RGB))
    ax_preprocess.imshow(input_image, cmap="gray")

### Aplicación de pirámides en el template
Se exploro su uso pero no es utilizado en el algoritmo final.

Piramide gaussiana:

In [None]:
# Get max width and height of the images to test
max_height = max([image.shape[0] for image in input_images])
max_width = max([image.shape[1] for image in input_images])
min_width = 70

# over-sampling
template_oversapling = [template2.copy()]
while (template_oversapling[-1].shape[0] < max_height) and (
    template_oversapling[-1].shape[1] < max_width
):
    template_oversapling.append(cv2.pyrUp(template_oversapling[-1]))
template_oversapling.reverse()
# sub-sampling
template_subsapling = [template2.copy()]
while template_subsapling[-1].shape[1] > min_width:
    template_subsapling.append(cv2.pyrDown(template_subsapling[-1]))
# to not repeat the original
template_gauss_pyramid =  template_oversapling[:-1] + template_subsapling

In [None]:
print("NUMBER OF TEMPLATES:", len(template_gauss_pyramid))

fig, axs = plt.subplots(nrows=1, ncols=len(template_gauss_pyramid))
fig.set_size_inches(20, 5)
for ax, template in zip(axs, template_gauss_pyramid):
    ax.imshow(cv2.cvtColor(template, cv2.COLOR_BGR2RGB))

Para tener mayor granularidad se realizó una piramide con resize e interpolación con INTER_NEAREST.

In [None]:
def generate_template_pyramid(template, min_scale=0.2, max_scale=2.5, step=0.1):
    """Generate a pyramid with intermediate scales for a template."""
    height, width = template.shape[:2]
    pyramid = []
    scales = np.arange(min_scale, max_scale + step, step)  # Generate scale factors
    for scale in scales:
        new_width = int(width * scale)
        new_height = int(height * scale)
        resized = cv2.resize(template, (new_width, new_height), interpolation=cv2.INTER_NEAREST)
        pyramid.append(resized)
    return pyramid

In [None]:
template_pyramid = generate_template_pyramid(template3, min_scale=0.2, max_scale=5, step=0.1) + generate_template_pyramid(template4, min_scale=0.2, max_scale=5, step=0.1)
print("NUMBER OF TEMPLATES:", len(template_pyramid))

### Template matching
Consideraciones al comparar los resultados obtenidos para diferentes templates:
- Las metricas TM_CCOEF y TM_SQDIFF dependen del tamaño del template

In [None]:
def single_template_matching(
    input_image,
    template_pyramid,
    metric_fn=cv2.TM_CCOEFF_NORMED,
    threshold=0.5,
    k_targets=1,
):
    preprocess_input = preprocess_image(input_image)
    detected_boxes = []
    detected_scores = []

    for template in template_pyramid:
        if (
            template.shape[0] > preprocess_input.shape[0]
            or template.shape[1] > preprocess_input.shape[1]
        ):
            continue
        match = cv2.matchTemplate(preprocess_input, template, metric_fn)
        _, max_value, _, max_loc = cv2.minMaxLoc(match)
        detected_boxes.append(
            [max_loc[0], max_loc[1], template.shape[1], template.shape[0]]
        )
        detected_scores.append(max_value)
    print(detected_scores)
    if not detected_scores:
        return [(-1, -1, -1, -1)]
    # Apply Non Maximum Suppression
    indices = cv2.dnn.NMSBoxes(detected_boxes, detected_scores, threshold, 0.4, top_k=k_targets)
    if len(indices) == 0:
        return [(-1, -1, -1, -1)]
    return [detected_boxes[index] for index in indices]

Comparación de métodos de template matching haciendo un barrido por la pirámide de templates:

In [None]:
metrics = [
    # ("TM_CCOEFF", cv2.TM_CCOEFF),
    ("TM_CCOEFF_NORMED", cv2.TM_CCOEFF_NORMED, 0.37),
    # ("TM_CCORR", cv2.TM_CCORR),
    ("TM_CCORR_NORMED", cv2.TM_CCORR_NORMED, 0.95),
    # ("TM_SQDIFF", cv2.TM_SQDIFF), # no good results
    # ("TM_SQDIFF_NORMED", cv2.TM_SQDIFF_NORMED), # no good results
]

fig, axs = plt.subplots(nrows=len(input_images), ncols=len(metrics))
fig.set_size_inches(20, 20)

for input_image_idx, input_image in enumerate(images_color):
    print(input_image_idx)
    # template_pyramid = template_pyramid1 if input_image_idx == 2 else template_pyramid3
    for ax, (metric_name, metric_fn, threshold) in zip(axs[input_image_idx], metrics):
        x, y, w, h = single_template_matching(input_image.copy(), template_pyramid, metric_fn, threshold, k_targets=1)[0]
        print(x, y, w, h)
        plot_image = input_image.copy()
        cv2.rectangle(plot_image, (x, y), (x + w, y + h), (0, 255, 0), 3)
        ax.imshow(cv2.cvtColor(plot_image, cv2.COLOR_BGR2RGB))
        ax.set_title(metric_name)

In [None]:
input_image = cv2.imread(IMAGES_PATH + "coca_multi.png", cv2.IMREAD_COLOR)
detections = single_template_matching(input_image.copy(), template_pyramid, threshold=0.5, k_targets=10)
print(detections)
plot_image = input_image.copy()
for x, y, v, h in detections:
    cv2.rectangle(plot_image, (x, y), (x + w, y + h), (0, 255, 0), 3)
plt.imshow(cv2.cvtColor(plot_image, cv2.COLOR_BGR2RGB))


### Features matching con SIFT
Objetivo 1 cumplido: Obtener una detección del logo en cada imagen sin falsos positivos

In [None]:
fig, axs = plt.subplots(nrows=len(input_images), ncols=1)
fig.set_size_inches(10, 20)

sift = cv2.SIFT_create()
bf_matcher = cv2.BFMatcher()

for input_image_idx, (ax, color_image, input_image) in enumerate(
    zip(axs, images_color, input_images)
):
    template = template2 if input_image_idx != 2 else template1
    keypoints_template, descriptors_template = sift.detectAndCompute(template, None)
    keypoints_input, descriptors_input = sift.detectAndCompute(input_image, None)
    matches = bf_matcher.knnMatch(descriptors_template, descriptors_input, k=2)

    good_matches = sorted(
        [m for m, n in matches if m.distance < 0.8 * n.distance],
        key=lambda x: x.distance,
    )
    template_points = np.float32(
        [keypoints_template[m.queryIdx].pt for m in good_matches]
    ).reshape(-1, 1, 2)
    input_points = np.float32(
        [keypoints_input[m.trainIdx].pt for m in good_matches]
    ).reshape(-1, 1, 2)

    homography, mask = cv2.findHomography(
        template_points, input_points, cv2.RANSAC, 3.0
    )
    draw_params = dict(
        matchColor=(0, 255, 255),
        singlePointColor=None,
        matchesMask=mask.ravel().tolist(),
        flags=2,
    )

    height, width = template.shape[:2]
    template_corners = np.float32(
        [[0, 0], [width, 0], [width, height], [0, height]]
    ).reshape(-1, 1, 2)
    transformed_corners = cv2.perspectiveTransform(template_corners, homography)

    plot_image = color_image.copy()
    plot_image = cv2.polylines(
        plot_image,
        [np.int32(transformed_corners)],
        isClosed=True,
        color=(0, 255, 0),
        thickness=3,
        lineType=cv2.LINE_AA,
    )
    plot_image = cv2.drawMatches(
        template,
        keypoints_template,
        plot_image,
        keypoints_input,
        good_matches,
        None,
        **draw_params
    )
    ax.imshow(cv2.cvtColor(plot_image, cv2.COLOR_BGR2RGB))

In [None]:
sift = cv2.SIFT_create()
bf_matcher = cv2.BFMatcher()
template = template2

color_image = cv2.imread(IMAGES_PATH + "coca_multi.png", cv2.IMREAD_COLOR)
input_image = preprocess_image(color_image)

keypoints_template, descriptors_template = sift.detectAndCompute(template, None)
keypoints_input, descriptors_input = sift.detectAndCompute(input_image, None)
matches = bf_matcher.knnMatch(descriptors_template, descriptors_input, k=2)
print(len(matches))
print(matches[0])

good_matches = sorted(
    [m for m, n in matches if m.distance < n.distance],
    key=lambda x: x.distance,
)

template_points = np.float32(
    [keypoints_template[m.queryIdx].pt for m in good_matches]
).reshape(-1, 1, 2)
input_points = np.float32(
    [keypoints_input[m.trainIdx].pt for m in good_matches]
).reshape(-1, 1, 2)

homography, mask = cv2.findHomography(
    template_points, input_points, cv2.RANSAC, 5.0
)
draw_params = dict(
    matchColor=(0, 255, 255),
    singlePointColor=None,
    matchesMask=mask.ravel().tolist(),
    flags=2,
)

plot_image = color_image.copy()
plot_image = cv2.drawMatches(
    template,
    keypoints_template,
    plot_image,
    keypoints_input,
    good_matches,
    None,
    **draw_params
)
plt.imshow(cv2.cvtColor(plot_image, cv2.COLOR_BGR2RGB))


In [None]:

input_points = np.float32(
    [keypoints_input[m.trainIdx].pt for m in good_matches]
)

# Cluster matches
dbscan = DBSCAN(eps=30, min_samples=4).fit(input_points)
labels = dbscan.labels_

# Separate matches into groups
unique_labels = set(labels)
clusters = {label: [] for label in unique_labels if label != -1}

for match, label in zip(good_matches, labels):
    if label != -1:  # Exclude noise points
        clusters[label].append(match)

detections = []
for label, cluster_matches in clusters.items():
    template_points = np.float32([keypoints_template[m.queryIdx].pt for m in cluster_matches]).reshape(-1, 1, 2)
    input_points = np.float32([keypoints_input[m.trainIdx].pt for m in cluster_matches]).reshape(-1, 1, 2)
    
    # Compute homography for this group
    homography, mask = cv2.findHomography(template_points, input_points, cv2.RANSAC, 5.0)
    if homography is not None:
        # Transform template corners
        height, width = template.shape[:2]
        template_corners = np.float32([[0, 0], [width, 0], [width, height], [0, height]]).reshape(-1, 1, 2)
        transformed_corners = cv2.perspectiveTransform(template_corners, homography)
        detections.append(transformed_corners)


plot_image = color_image.copy()
for corners in detections:
    cv2.polylines(
        plot_image,
        [np.int32(corners)],
        isClosed=True,
        color=(0, 255, 0),
        thickness=3,
        lineType=cv2.LINE_AA,
    )

plt.imshow(cv2.cvtColor(plot_image, cv2.COLOR_BGR2RGB))

### Resources
- [Image Processing in OpenCV: Template matching](https://docs.opencv.org/4.x/d4/dc6/tutorial_py_template_matching.html)
- [Image Processing: Image Pyramids](https://docs.opencv.org/4.x/d4/d1f/tutorial_pyramids.html)
- [2D Features framework: Basic concepts of the homography explained with code](https://docs.opencv.org/4.x/d9/dab/tutorial_homography.html)