In [2]:
# pip install opencv-python

import cv2
import numpy as np
import os
import matplotlib.pyplot as plt

In [3]:
def extract_orb_features(image_path):
    """
    Extracts ORB features from an image.
    """
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        raise ValueError(f"Image at {image_path} could not be loaded.")
    # Initialize the ORB detector
    orb = cv2.ORB_create()
    # Find the keypoints and descriptors with ORB
    kp, des = orb.detectAndCompute(img, None)
    return kp, des

def visualize_keypoints(image_path):
    # Load the image
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        raise ValueError("The image path is invalid.")

    # Initialize the AKAZE detector
    ORB = cv2.ORB_create()

    # Detect keypoints and descriptors with AKAZE
    kp, des = ORB.detectAndCompute(img, None)
    kp = kp[:100]
    len(kp)
    # Draw keypoints on the image
    img_with_keypoints = cv2.drawKeypoints(img, kp, None, color=(255, 0, 0), flags=cv2.DrawMatchesFlags_DEFAULT)

    # Display the image with keypoints
    plt.imshow(img_with_keypoints, cmap='gray')
    plt.title(f'Image: {image_name(image_path)} Keypoints')
    plt.axis('off')
    plt.show()

def image_name(image_path):
    return os.path.splitext(os.path.basename(image_path))[0]


image1_path = 'nk_collection_meubels_cleaned/meubel_1.jpg'
image2_path = 'nk_collection_meubels_cleaned/meubel_2.jpg'

import pandas as pd

def get_table(sims):
    """
    This function takes the output produced by either the compute_similarities \ 
    or compute_similarities_testsets function, and returns a pandas dataframe/table \
    and also saves it in excel.
    """
    
    data = {}
    rows = []

    for key, value in sims.items():
        if key[0] not in data:
            data[key[0]] = []
        if key[1] not in rows:
            rows.append(key[1])
        data[key[0]].append(value)
        
    data = {key[:key.rfind(".")]:value for key, value in data.items()}
    rows = [row[:row.rfind(".")] for row in rows]
        
    df = pd.DataFrame(data, index=rows)
    #df.to_excel('output.xlsx')
    return df.T
    


  """


In [4]:
import cv2
import numpy as np
import os
import pandas as pd

def orb_similarity(img1, img2):
    orb = cv2.ORB_create()
    kp1, des1 = orb.detectAndCompute(img1, None)
    kp2, des2 = orb.detectAndCompute(img2, None)
    if des1 is None or des2 is None:
        return 0.0

    # Use BFMatcher with default params
    # bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
    # # Match descriptors
    # matches = bf.match(des1, des2)
    # matches = sorted(matches, key=lambda x: x.distance)

    # # Apply RANSAC to find the best transformation
    # src_pts = np.float32([kp1[m.queryIdx].pt for m in matches]).reshape(-1, 1, 2)
    # dst_pts = np.float32([kp2[m.trainIdx].pt for m in matches]).reshape(-1, 1, 2)

    # H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 15)
    # matches_mask = mask.ravel().tolist()

    # num_inliers = np.sum(matches_mask)
    # num_matches = len(matches)
    
    # similarity_percentage = (num_inliers / num_matches) * 100
    # return similarity_percentage
    FLANN_INDEX_LSH = 6
    index_params = dict(algorithm=FLANN_INDEX_LSH, table_number=6, key_size=12, multi_probe_level=1)
    search_params = dict(checks=50)

    flann = cv2.FlannBasedMatcher(index_params, search_params)
    matches = flann.knnMatch(des1, des2, k=2)

    good_matches = []
    for match in matches:
        if len(match) == 2:
            m, n = match
            if m.distance < 0.75 * n.distance:
                good_matches.append(m)

    if len(good_matches) < 10:
        return 0.0

    src_pts = np.float32([kp1[m.queryIdx].pt for m in good_matches]).reshape(-1, 2)
    dst_pts = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1, 2)

    H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
    matches_mask = mask.ravel().tolist()

    num_inliers = np.sum(matches_mask)
    num_matches = len(good_matches)
    
    similarity_percentage = (num_inliers / num_matches) * 100
    return similarity_percentage

def compute_similarities_testsets(munich_testset, nk_testset, 
                                  munich_path="munich_testset", 
                                  nk_path="nk_testset"):
    """
    Computes similarities between images from Munich and NK test sets using ORB features.
    """
    similarities = {}
    for nk_img in nk_testset:
        nk_img_path = os.path.join(nk_path, nk_img)
        try:
            nk_img = cv2.imread(nk_img_path, cv2.IMREAD_GRAYSCALE)
            if nk_img is None:
                raise ValueError(f"Image at {nk_img_path} could not be loaded.")
        except ValueError as e:
            print(e)
            continue
        for munich_img in munich_testset:
            munich_img_path = os.path.join(munich_path, munich_img)
            try:
                munich_img = cv2.imread(munich_img_path, cv2.IMREAD_GRAYSCALE)
                if munich_img is None:
                    raise ValueError(f"Image at {munich_img_path} could not be loaded.")
            except ValueError as e:
                print(e)
                continue
            similarity = orb_similarity(nk_img, munich_img)
            similarities[(nk_img_path, munich_img_path)] = similarity
        
    return similarities

# Example usage:
nk_path = "nk_testset"
munich_path = "munich_testset"

if not os.path.exists(nk_path):
    raise FileNotFoundError(f"The path {nk_path} does not exist.")
if not os.path.exists(munich_path):
    raise FileNotFoundError(f"The path {munich_path} does not exist.")

nk_testset = os.listdir(nk_path)
munich_testset = os.listdir(munich_path)

sims = compute_similarities_testsets(munich_testset, nk_testset, munich_path, nk_path)
# Assuming you have a function to visualize or save the results:
get_table(sims)




Unnamed: 0,munich_testset\dressoir_mccp,munich_testset\kast_mccp,munich_testset\speeltafel_mccp,munich_testset\stoel_mccp,munich_testset\tafel_mccp
nk_testset\dressoir_nk,12.94964,10.56338,17.346939,13.669065,14.285714
nk_testset\kast_nk,14.173228,10.447761,14.130435,13.636364,13.385827
nk_testset\speeltafel_nk,15.873016,8.62069,17.582418,16.504854,18.367347
nk_testset\stoel_nk,13.28125,12.676056,21.428571,14.516129,13.157895
nk_testset\tafel_nk,15.602837,7.594937,14.285714,14.685315,12.195122


In [5]:
def compute_similarities(nk_img_path, munich_imgs, path):
    """
    This function takes three arguments: 
    - nk_img, which is a single image from the nk collection. 
    - munich_imgs, this contains all images from the Munich Database. 
    - path, this is the path to the gray scaled Munich Database.
    
    It then computes the feature descriptor for the nk collection image and all the images in the \
    Munich Database. Afterwards takes the dot-product to get the dot-product similiarity. It then \
    saves the similarity and the two images as key-value pairs in a dictionary. 
    """
    
    nk_img = cv2.imread(nk_img_path, cv2.IMREAD_GRAYSCALE)
    similarities = {}
    i = 0

    for img_name in munich_imgs:
        if i == 1000:
            break
        print(i)
        i += 1
        img_path = os.path.join(path, img_name)
        munich_img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

        if nk_img is not None and munich_img is not None:
            similarity = orb_similarity(nk_img, munich_img)
            nk_img_name = os.path.basename(nk_img_path)
            munich_img_name = os.path.basename(img_path)
            similarities[(nk_img_name, munich_img_name)] = similarity

    return similarities

# Example usage
nk_img = "test_dataset_gray/kast_nk.jpg"
munich_imgs = os.listdir("scraped_images_grayscaled_big")
path = "scraped_images_grayscaled_big"

similarities = compute_similarities(nk_img, munich_imgs, path)
print(similarities)
get_table(similarities)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

Unnamed: 0,0001_3991-3998_id=cp134449_linz,00100_524_id=cp168205_badv,00100_527_id=cp168251_badv,00100_530-2_id=cp168296_badv,00100_531-1_id=cp168313_badv,00100_532-1_id=cp168329_badv,00100_533-2_id=cp168360_badv,00100_535-1_id=cp168395_badv,00100_535-2_id=cp168396_badv,00100_536-1_id=cp168424_badv,...,0073_387-6_id=cp158225_badv,0073_387-7_id=cp158226_badv,0073_387-7_id=cp158227_badv,0073_387-7_id=cp158228_badv,0073_387-8_id=cp158229_badv,0073_387-9_id=cp158230_badv,0073_387-9_id=cp158231_badv,0073_388-1_id=cp158247_badv,0073_388-2_id=cp158248_badv,0073_388-3_id=cp158249_badv
kast_nk,9.243697,19.090909,24.324324,24.761905,21.875,25.714286,34.343434,15.671642,14.184397,30.0,...,47.058824,28.712871,25.0,23.423423,22.413793,22.807018,24.271845,20.512821,13.432836,14.393939


In [6]:
def get_top_10_similarities(similarities):
    """
    Get the top 10 highest similarity values for each NK image from the similarities dictionary.
    
    Parameters:
    - similarities: A dictionary where keys are (NK image name, Munich image name) tuples
                    and values are similarity scores.
                    
    Returns:
    - A dictionary where keys are NK image names and values are lists of tuples
      (Munich image name, similarity) sorted by similarity in descending order.
    """
    top_10_similarities = {}
    
    for nk_img_name in set(key[0] for key in similarities.keys()):
        # Filter similarities for current NK image
        nk_similarities = [(munich_img_name, similarity) for (nk, munich_img_name), similarity in similarities.items() if nk == nk_img_name]
        
        # Sort by similarity in descending order
        sorted_similarities = sorted(nk_similarities, key=lambda x: x[1], reverse=True)
        
        # Get top 10 similarities
        top_10_similarities[nk_img_name] = sorted_similarities[:10]
    
    return top_10_similarities


get_top_10_similarities(similarities)


{'kast_nk.jpg': [('0038_130-11_id=cp139528_badv.jpg', 100.0),
  ('0051_196-26_id=cp146003_badv.jpg', 100.0),
  ('0036_85-14_id=cp174748_badv.jpg', 93.75),
  ('0033_65-9_id=cp170230_badv.jpg', 92.85714285714286),
  ('0036_85-13_id=cp174747_badv.jpg', 91.66666666666666),
  ('0035_85-7_id=cp174754_badv.jpg', 88.88888888888889),
  ('0025_15-7_id=cp141992_badv.jpg', 88.23529411764706),
  ('0051_196-26_id=cp146002_badv.jpg', 85.71428571428571),
  ('0073_387-15_id=cp158218_badv.jpg', 85.71428571428571),
  ('0051_196-24_id=cp146000_badv.jpg', 83.33333333333334)]}