In [16]:
import cv2
import os
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd

In [17]:
def sift_similarity(img1, img2):
    sift = cv2.SIFT_create()
    kp1, des1 = sift.detectAndCompute(img1, None)
    kp2, des2 = sift.detectAndCompute(img2, None)

    if des1 is None or des2 is None:
        return 0.0

    FLANN_INDEX_KDTREE = 1
    index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
    search_params = dict(checks=50)

    flann = cv2.FlannBasedMatcher(index_params, search_params)
    matches = flann.knnMatch(des1, des2, k=2)

    good_matches = []
    for m, n in matches:
        if m.distance < 0.7 * n.distance:
            good_matches.append(m)
    if len(good_matches) < 10:
        return 0.0

    src_pts = np.float32([kp1[m.queryIdx].pt for m in good_matches]).reshape(-1, 2)
    dst_pts = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1, 2)

    H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 10.0)
    matches_mask = mask.ravel().tolist()

    num_inliers = np.sum(matches_mask)
    num_matches = len(good_matches)
    
    similarity_percentage = (num_inliers / num_matches) * 100
    return similarity_percentage

In [18]:
def get_table(sims):
    """
    This function takes the output produced by either the compute_similarities \ 
    or compute_similarities_testsets function, and returns a pandas dataframe/table \
    and also saves it in excel.
    """
    
    data = {}
    rows = []

    for key, value in sims.items():
        if key[0] not in data:
            data[key[0]] = []
        if key[1] not in rows:
            rows.append(key[1])
        data[key[0]].append(value)
        
    data = {key[:key.rfind(".")]:value for key, value in data.items()}
    rows = [row[:row.rfind(".")] for row in rows]
        
    df = pd.DataFrame(data, index=rows)
    #df.to_excel('output.xlsx')
    return df.T

In [19]:
def compute_similarities(nk_img_path, munich_imgs, path):
    """
    This function takes three arguments: 
    - nk_img, which is a single image from the nk collection. 
    - munich_imgs, this contains all images from the Munich Database. 
    - path, this is the path to the gray scaled Munich Database.
    
    It then computes the feature descriptor for the nk collection image and all the images in the \
    Munich Database. Afterwards takes the dot-product to get the dot-product similiarity. It then \
    saves the similarity and the two images as key-value pairs in a dictionary. 
    """
    
    nk_img = cv2.imread(nk_img_path, cv2.IMREAD_GRAYSCALE)
    similarities = {}
    i = 0

    for img_name in munich_imgs:
        if i == 50:
            break
        print(i)
        i += 1
        img_path = os.path.join(path, img_name)
        munich_img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

        if nk_img is not None and munich_img is not None:
            similarity = sift_similarity(nk_img, munich_img)
            nk_img_name = os.path.basename(nk_img_path)
            munich_img_name = os.path.basename(img_path)
            similarities[(nk_img_name, munich_img_name)] = similarity

    return similarities

# Example usage
nk_img = "test_dataset_gray/kast_nk.jpg"
munich_imgs = os.listdir("scraped_images_grayscaled_big")
path = "scraped_images_grayscaled_big"

similarities = compute_similarities(nk_img, munich_imgs, path)
print(similarities)
get_table(similarities)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
{('kast_nk.jpg', '1245_9632_id=cp130764_linz.jpg'): 67.32673267326733, ('kast_nk.jpg', '0719_4326_id=cp161541_badv.jpg'): 54.06976744186046, ('kast_nk.jpg', '0571_2865-1_id=cp153185_badv.jpg'): 48.64864864864865, ('kast_nk.jpg', '0101_562_id=cp168851_badv.jpg'): 45.76271186440678, ('kast_nk.jpg', '2081_41379_id=cp159297_badv.jpg'): 51.61290322580645, ('kast_nk.jpg', '0697_4244-1_id=cp160763_badv.jpg'): 59.25925925925925, ('kast_nk.jpg', '0327_1595-5_id=cp143235_badv.jpg'): 61.53846153846154, ('kast_nk.jpg', '0497_2394-34_id=cp150371_badv.jpg'): 85.71428571428571, ('kast_nk.jpg', '1732_21836-28_id=cp147429_badv.jpg'): 27.27272727272727, ('kast_nk.jpg', '0642_3848_id=cp132737_linz.jpg'): 35.714285714285715, ('kast_nk.jpg', '2351_48651_id=cp166751_badv.jpg'): 47.16981132075472, ('kast_nk.jpg', '1893_31894_id=cp154490_badv.jpg'): 36.76470588235294, ('k

Unnamed: 0,1245_9632_id=cp130764_linz,0719_4326_id=cp161541_badv,0571_2865-1_id=cp153185_badv,0101_562_id=cp168851_badv,2081_41379_id=cp159297_badv,0697_4244-1_id=cp160763_badv,0327_1595-5_id=cp143235_badv,0497_2394-34_id=cp150371_badv,1732_21836-28_id=cp147429_badv,0642_3848_id=cp132737_linz,...,2236_45681_id=cp163945_badv,1052_7928-5_id=cp172928_badv,2177_44340-2_id=cp162651_badv,1506_13100_id=cp139686_badv,1705_21035-1_id=cp146834_badv,0595_2874-16_id=cp153578_badv,1399_11515-6_id=cp137203_badv,2052_38461_id=cp158127_badv,1905_32195_id=cp154721_badv,1956_35215-16_id=cp155855_badv
kast_nk,67.326733,54.069767,48.648649,45.762712,51.612903,59.259259,61.538462,85.714286,27.272727,35.714286,...,49.57265,56.363636,46.341463,36.842105,71.942446,43.478261,52.777778,71.428571,64.912281,31.818182


In [21]:
def get_top_10_similarities(similarities):
    """
    Get the top 10 highest similarity values for each NK image from the similarities dictionary.
    
    Parameters:
    - similarities: A dictionary where keys are (NK image name, Munich image name) tuples
                    and values are similarity scores.
                    
    Returns:
    - A dictionary where keys are NK image names and values are lists of tuples
      (Munich image name, similarity) sorted by similarity in descending order.
    """
    top_10_similarities = {}
    
    for nk_img_name in set(key[0] for key in similarities.keys()):
        # Filter similarities for current NK image
        nk_similarities = [(munich_img_name, similarity) for (nk, munich_img_name), similarity in similarities.items() if nk == nk_img_name]
        
        # Sort by similarity in descending order
        sorted_similarities = sorted(nk_similarities, key=lambda x: x[1], reverse=True)
        
        # Get top 10 similarities
        top_10_similarities[nk_img_name] = sorted_similarities[:10]
    
    return top_10_similarities


get_top_10_similarities(similarities)


{'kast_nk.jpg': [('0166_940-328_id=cp176430_badv.jpg', 80.95238095238095),
  ('0497_2394-34_id=cp150371_badv.jpg', 80.0),
  ('1503_13062_id=cp139612_badv.jpg', 73.19587628865979),
  ('2052_38461_id=cp158127_badv.jpg', 71.42857142857143),
  ('1245_9632_id=cp130764_linz.jpg', 69.0909090909091),
  ('0709_4251_id=cp161012_badv.jpg', 67.64705882352942),
  ('1825_26305-86_id=cp151953_badv.jpg', 67.02412868632707),
  ('1905_32195_id=cp154721_badv.jpg', 65.45454545454545),
  ('0697_4244-1_id=cp160763_badv.jpg', 61.53846153846154),
  ('0327_1595-5_id=cp143235_badv.jpg', 61.53846153846154)]}