In [None]:
import cv2
import numpy as np
import os

In [None]:
!ls data/near_dup

In [None]:
!ls data/near_dup/wo0001/

In [None]:
!ls data/near_dup/wo0002/

In [None]:
from IPython.display import display, HTML
from IPython.display import Image as IPImage
from math import ceil

def show_images_in(image_folder, images_per_row=4):

    image_files = sorted([f for f in os.listdir(image_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif'))])

    # Calculate the number of rows
    num_rows = int(ceil(len(image_files) / images_per_row))

    # Create an HTML string to display the images
    html_str = "<table>"

    for i in range(num_rows):
        html_str += "<tr>"
        for j in range(images_per_row):
            index = i * images_per_row + j
            if index < len(image_files):
                img_path = os.path.join(image_folder, image_files[index])
                # Display the image with filename
                html_str += f"<td><img src='{img_path}'><br>{image_files[index]}</td>"
        html_str += "</tr>"

    html_str += "</table>"

    display(HTML(html_str))

In [None]:
show_images_in("data/near_dup/wo0001")

In [None]:
show_images_in("data/near_dup/wo0002")

In [None]:
def show_matches(image_path1, image_path2, print_distances=True):
    # Load the two images
    img1 = cv2.imread(image_path1, cv2.IMREAD_GRAYSCALE)
    img2 = cv2.imread(image_path2, cv2.IMREAD_GRAYSCALE)

    # Initialize the ORB detector
    orb = cv2.ORB_create()

    # Detect ORB keypoints and descriptors for both images
    kp1, des1 = orb.detectAndCompute(img1, None)
    kp2, des2 = orb.detectAndCompute(img2, None)

    # Create a Brute Force Matcher
    bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)

    # Match descriptors
    matches = bf.match(des1, des2)

    # Sort them in ascending order of distance
    matches = sorted(matches, key=lambda x: x.distance)

    # Draw matches and calculate matching distances
    result_img = cv2.drawMatches(img1, kp1, img2, kp2, matches[:60], None, flags=2)

    matching_distances = [match.distance for match in matches]

    if print_distances:
        print("Matching Distances:", matching_distances[:60])

    fig, ax = plt.subplots(figsize=(10, 8))
    plt.imshow(cv2.cvtColor(result_img, cv2.COLOR_BGR2RGB))
    plt.show()
    print(f"MatchRatio:{sum((1 if match.distance < max_dist else 0 for match in matches)) / len(matches)}")

In [None]:
import matplotlib.pyplot as plt
max_dist = 30
min_match_count = 40

In [None]:
show_matches("data/near_dup/wo0001/wo0001003_1.jpg", "data/near_dup/wo0002/wo0002003_1.jpg")

In [None]:
import itertools as it

base_path = "/mnt/hdd/__Docencia/DataAnalysisWithPython/!!2023SepUH/challenges/NearDupImgDet/Images"

# Initialize the ORB detector
orb = cv2.ORB_create()
# Create a Brute Force Matcher
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)

image_data = []    

for folder_name in os.listdir(base_path):
    folder_path = os.path.join(base_path, folder_name)
    if not os.path.isdir(folder_path):
        continue
    for img_file in os.listdir(folder_path):
        file_name = os.path.join(folder_path, img_file)
        img = cv2.imread(os.path.join(folder_path, img_file), cv2.IMREAD_GRAYSCALE)
        kp, des = orb.detectAndCompute(img, None)
        image_data.append({
            'folder': folder_name,
            'file': img_file,
            'file_name': file_name,
            'image': img,
            'keypoints': kp,
            'descriptors': des
        })

In [None]:
len(image_data)

In [None]:
matched_images = []

for d1, d2 in it.combinations(image_data,2):
    matches = bf.match(d1['descriptors'], d2['descriptors'])
#     matches = sorted(matches, key=lambda x: x.distance)
    match_in_dist = sum((1 if m.distance < max_dist else 0 for m in matches))
    if match_in_dist >= min_match_count:
        matched_images.append((d1, d2, matches))

In [None]:
len(matched_images)

In [None]:
d1, d2, _ = matched_images[0]
show_matches(d1['file_name'], d2['file_name'])

In [None]:
d1, d2, _ = matched_images[1]
show_matches(d1['file_name'], d2['file_name'])

In [None]:
d1, d2, _ = matched_images[2]
show_matches(d1['file_name'], d2['file_name'])

In [None]:
d1, d2, matches = matched_images[2]
match_in_dist = [m for m in matches if m.distance <= max_dist]

In [None]:
src_pts = np.float32([d1['keypoints'][m.queryIdx].pt for m in match_in_dist]).reshape(-1, 1, 2)
dst_pts = np.float32([d2['keypoints'][m.trainIdx].pt for m in match_in_dist]).reshape(-1, 1, 2)
trans_mat, _ = cv2.estimateAffinePartial2D(src_pts, dst_pts)

In [None]:
scale_x = trans_mat[0,0]
scale_y = trans_mat[1,1]
shear_x = trans_mat[0,1]
shear_y = trans_mat[1,0]
trans_x = trans_mat[0,2]
trans_y = trans_mat[1,2]
rotation_angle_rad = -np.arctan2(trans_mat[1, 0], trans_mat[0, 0])
rotation_angle_deg = np.degrees(rotation_angle_rad)
print(f'Scale: {scale_x:.2f}, {scale_y:.2f}')
print(f'Shear: {shear_x:.2f}, {shear_y:.2f}')
print(f'Translation: {trans_x:.2f}, {trans_y:.2f}')
print(f'Rotation: {rotation_angle_deg}')

In [None]:
def print_transforms(match):
    d1, d2, matches = match
    match_in_dist = [m for m in matches if m.distance <= max_dist]
    src_pts = np.float32([d1['keypoints'][m.queryIdx].pt for m in match_in_dist]).reshape(-1, 1, 2)
    dst_pts = np.float32([d2['keypoints'][m.trainIdx].pt for m in match_in_dist]).reshape(-1, 1, 2)
    trans_mat, _ = cv2.estimateAffinePartial2D(src_pts, dst_pts)
    
    scale_x = trans_mat[0,0]
    scale_y = trans_mat[1,1]
    shear_x = trans_mat[0,1]
    shear_y = trans_mat[1,0]
    trans_x = trans_mat[0,2]
    trans_y = trans_mat[1,2]
    rotation_angle_rad = -np.arctan2(trans_mat[1, 0], trans_mat[0, 0])
    rotation_angle_deg = np.degrees(rotation_angle_rad)
    print(f'Scale: {scale_x:.2f}, {scale_y:.2f}')
    print(f'Shear: {shear_x:.2f}, {shear_y:.2f}')
    print(f'Translation: {trans_x:.2f}, {trans_y:.2f}')
    print(f'Rotation: {rotation_angle_deg}')

In [None]:
for m in matched_images:
    d1, d2, _ = m
    show_matches(d1['file_name'], d2['file_name'], print_distances=False)
    print_transforms(m)