In [1]:
import cv2
import os
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd

In [2]:
def sift_similarity(img1, img2):
    sift = cv2.SIFT_create()
    kp1, des1 = sift.detectAndCompute(img1, None)
    kp2, des2 = sift.detectAndCompute(img2, None)

    if des1 is None or des2 is None:
        return 0.0

    FLANN_INDEX_KDTREE = 1
    index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
    search_params = dict(checks=50)

    flann = cv2.FlannBasedMatcher(index_params, search_params)
    matches = flann.knnMatch(des1, des2, k=2)

    good_matches = []
    for m, n in matches:
        if m.distance < 0.7 * n.distance:
            good_matches.append(m)
    if len(good_matches) < 10:
        return 0.0

    src_pts = np.float32([kp1[m.queryIdx].pt for m in good_matches]).reshape(-1, 2)
    dst_pts = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1, 2)

    H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 10.0)
    matches_mask = mask.ravel().tolist()

    num_inliers = np.sum(matches_mask)
    num_matches = len(good_matches)
    
    similarity_percentage = (num_inliers / num_matches) * 100
    return similarity_percentage

In [3]:
def get_table(sims):
    """
    This function takes the output produced by either the compute_similarities \ 
    or compute_similarities_testsets function, and returns a pandas dataframe/table \
    and also saves it in excel.
    """
    
    data = {}
    rows = []

    for key, value in sims.items():
        if key[0] not in data:
            data[key[0]] = []
        if key[1] not in rows:
            rows.append(key[1])
        data[key[0]].append(value)
        
    data = {key[:key.rfind(".")]:value for key, value in data.items()}
    rows = [row[:row.rfind(".")] for row in rows]
        
    df = pd.DataFrame(data, index=rows)
    #df.to_excel('output.xlsx')
    return df.T

def get_table1(sims):
    """
    This function takes the output produced by either the compute_similarities 
    or compute_similarities_testsets function, and returns a pandas dataframe/table 
    and also saves it in excel.
    """
    
    data = {}
    rows = []
    
    mapping = {
        "kast_nk.jpg": "closet_nk.jpg",
        "speeltafel_nk.png": "card_table_nk.png",
        "tafel_nk.jpg": "table_nk.jpg",
        "dressoir_nk.jpg": "dresser_nk.jpg",
        "stoel_nk.jpg": "chair_nk.jpg",
        "kast_mccp.jpg": "closet_mccp.jpg",
        "speeltafel_mccp.png": "card_table_mccp.png",
        "tafel_mccp.jpg": "table_mccp.jpg",
        "dressoir_mccp.jpg": "dresser_mccp.jpg",
        "stoel_mccp.jpg": "chair_mccp.jpg",
    }

    for (key1, key2), value in sims.items():
        file1 = os.path.basename(key1)
        file2 = os.path.basename(key2)
        
        if mapping[file1] not in data:
            data[mapping[file1]] = []
        if mapping[file2] not in rows:
            rows.append(mapping[file2])
        
        value = np.round(value, 3)
        data[mapping[file1]].append(value)
        
    data = {key[:key.rfind(".")]: value for key, value in sorted(data.items())}
    rows = [row[:row.rfind(".")] for row in rows]
    rows_indices_begin = list(range(len(rows)))
    data_values = list(data.values())
    rows_indices_end = sorted(rows_indices_begin, key=lambda i: rows[i])
    rows = sorted(rows)
    
    for key, value in data.items():
        new_value = []
        for i in rows_indices_end:
            new_value.append(value[i])
            
        data[key] = new_value
    
    df = pd.DataFrame(data, index=rows)
    #df.to_excel('output.xlsx')
    return df.T

  """


In [4]:
def compute_similarities(nk_img_path, munich_imgs, path):
    """
    This function takes three arguments: 
    - nk_img, which is a single image from the nk collection. 
    - munich_imgs, this contains all images from the Munich Database. 
    - path, this is the path to the gray scaled Munich Database.
    
    It then computes the feature descriptor for the nk collection image and all the images in the \
    Munich Database. Afterwards takes the dot-product to get the dot-product similiarity. It then \
    saves the similarity and the two images as key-value pairs in a dictionary. 
    """
    
    nk_img = cv2.imread(nk_img_path, cv2.IMREAD_GRAYSCALE)
    similarities = {}
    i = 0

    for img_name in munich_imgs:
        if i == 50:
            break
        print(i)
        i += 1
        img_path = os.path.join(path, img_name)
        munich_img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

        if nk_img is not None and munich_img is not None:
            similarity = sift_similarity(nk_img, munich_img)
            nk_img_name = os.path.basename(nk_img_path)
            munich_img_name = os.path.basename(img_path)
            similarities[(nk_img_name, munich_img_name)] = similarity

    return similarities

# Example usage
nk_img = "test_dataset_gray/kast_nk.jpg"
munich_imgs = os.listdir("scraped_images_grayscaled_big")
path = "scraped_images_grayscaled_big"

similarities = compute_similarities(nk_img, munich_imgs, path)
print(similarities)
get_table(similarities)

0
1
2
3
4
5
6
7
8
9
10
11
12
13


KeyboardInterrupt: 

In [21]:
def get_top_10_similarities(similarities):
    """
    Get the top 10 highest similarity values for each NK image from the similarities dictionary.
    
    Parameters:
    - similarities: A dictionary where keys are (NK image name, Munich image name) tuples
                    and values are similarity scores.
                    
    Returns:
    - A dictionary where keys are NK image names and values are lists of tuples
      (Munich image name, similarity) sorted by similarity in descending order.
    """
    top_10_similarities = {}
    
    for nk_img_name in set(key[0] for key in similarities.keys()):
        # Filter similarities for current NK image
        nk_similarities = [(munich_img_name, similarity) for (nk, munich_img_name), similarity in similarities.items() if nk == nk_img_name]
        
        # Sort by similarity in descending order
        sorted_similarities = sorted(nk_similarities, key=lambda x: x[1], reverse=True)
        
        # Get top 10 similarities
        top_10_similarities[nk_img_name] = sorted_similarities[:10]
    
    return top_10_similarities


get_top_10_similarities(similarities)


{'kast_nk.jpg': [('0166_940-328_id=cp176430_badv.jpg', 80.95238095238095),
  ('0497_2394-34_id=cp150371_badv.jpg', 80.0),
  ('1503_13062_id=cp139612_badv.jpg', 73.19587628865979),
  ('2052_38461_id=cp158127_badv.jpg', 71.42857142857143),
  ('1245_9632_id=cp130764_linz.jpg', 69.0909090909091),
  ('0709_4251_id=cp161012_badv.jpg', 67.64705882352942),
  ('1825_26305-86_id=cp151953_badv.jpg', 67.02412868632707),
  ('1905_32195_id=cp154721_badv.jpg', 65.45454545454545),
  ('0697_4244-1_id=cp160763_badv.jpg', 61.53846153846154),
  ('0327_1595-5_id=cp143235_badv.jpg', 61.53846153846154)]}

In [6]:
def sift_similarity(img1, img2):
    sift = cv2.SIFT_create()
    kp1, des1 = sift.detectAndCompute(img1, None)
    kp2, des2 = sift.detectAndCompute(img2, None)

    if des1 is None or des2 is None:
        return 0.0

    FLANN_INDEX_KDTREE = 1
    index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
    search_params = dict(checks=50)

    flann = cv2.FlannBasedMatcher(index_params, search_params)
    matches = flann.knnMatch(des1, des2, k=2)

    good_matches = []
    for m, n in matches:
        if m.distance < 0.7 * n.distance:
            good_matches.append(m)

    if len(good_matches) < 10:
        return 0.0

    src_pts = np.float32([kp1[m.queryIdx].pt for m in good_matches]).reshape(-1, 2)
    dst_pts = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1, 2)

    H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 10.0)
    matches_mask = mask.ravel().tolist()

    num_inliers = np.sum(matches_mask)
    num_matches = len(good_matches)
    
    similarity_percentage = (num_inliers / num_matches) * 100
    return similarity_percentage

def compute_similarities_testsets(munich_testset, nk_testset, 
                                  munich_path="munich_testset", 
                                  nk_path="nk_testset"):
    """
    Computes similarities between images from Munich and NK test sets using ORB features.
    """
    similarities = {}
    for nk_img in nk_testset:
        nk_img_path = os.path.join(nk_path, nk_img)
        try:
            nk_img = cv2.imread(nk_img_path, cv2.IMREAD_GRAYSCALE)
            if nk_img is None:
                raise ValueError(f"Image at {nk_img_path} could not be loaded.")
        except ValueError as e:
            print(e)
            continue
        for munich_img in munich_testset:
            munich_img_path = os.path.join(munich_path, munich_img)
            try:
                munich_img = cv2.imread(munich_img_path, cv2.IMREAD_GRAYSCALE)
                if munich_img is None:
                    raise ValueError(f"Image at {munich_img_path} could not be loaded.")
            except ValueError as e:
                print(e)
                continue
            similarity = sift_similarity(nk_img, munich_img)
            similarities[(nk_img_path, munich_img_path)] = similarity
        
    return similarities

# Usage
nk_path = "nk_testset"
munich_path = "munich_testset"

if not os.path.exists(nk_path):
    raise FileNotFoundError(f"The path {nk_path} does not exist.")
if not os.path.exists(munich_path):
    raise FileNotFoundError(f"The path {munich_path} does not exist.")

nk_testset = os.listdir(nk_path)
munich_testset = os.listdir(munich_path)

sims = compute_similarities_testsets(munich_testset, nk_testset, munich_path, nk_path)

get_table1(sims)




Unnamed: 0,card_table_mccp,chair_mccp,closet_mccp,dresser_mccp,table_mccp
card_table_nk,33.333,0.0,0.0,0.0,0.0
chair_nk,36.842,36.842,22.222,41.667,41.085
closet_nk,26.667,48.768,32.203,38.462,52.023
dresser_nk,32.609,28.571,25.641,17.778,42.105
table_nk,38.462,32.812,36.364,33.333,47.458
