##### Copyright 2018 The TensorFlow Hub Authors.

Licensed under the Apache License, Version 2.0 (the "License");

In [6]:
from absl import logging

import matplotlib.pyplot as plt
import numpy as np
from PIL import Image, ImageOps
from scipy.spatial import cKDTree
from skimage.feature import plot_matches
from skimage.measure import ransac
from skimage.transform import AffineTransform
from six import BytesIO
import time
from sklearn.metrics.pairwise import cosine_similarity
import glob
from multiprocessing import Pool
from six.moves.urllib.request import urlopen
import pickle
import json
import os
import gzip

In the next cell, we specify the URLs of two images we would like to process with DELF in order to match and compare them.

In [7]:
with open('landmark_ids_greater_than_5_list.json') as json_file:
    landmark_ids_greater_than_5_list = json.load(json_file)
type(landmark_ids_greater_than_5_list)
print(len(landmark_ids_greater_than_5_list))
print(landmark_ids_greater_than_5_list[:5])

125729
[177870, 176528, 192931, 126637, 83144]


In [8]:
# creating dictionary of landmark_ids and delf features for all landmarks that has >5 images to later use to compare features
landmark_ids_images_delf_features_dict = {}
    
# Create a dict of images/files, max_inliers to filter out after DELF
images_to_filter_out_after_delf = {}

# Create a dictionary {'landmark_id' : (tuples with image1 path, image2path, inliers). 
# we will use this list of tuples later to only use the top 100 images with the most similar inliers within each class for modeling
top_pairs_landmark_id_dict = {}
tuple_list = []

Download, resize, save and display the images.

In [9]:
# print(len(results))
# print(results[0].keys())
# print(type(results))
# print(type(results[0]))

26
dict_keys(['scores', 'features', 'scales', 'locations', 'boxes', 'attention', 'descriptors'])
<class 'list'>
<class 'dict'>

In [24]:
landmark_id = 177870
filename = "landmark_ids_delf_features_dict_pickle_files\\{}.pkl.gz".format(landmark_id)
with gzip.open(filename,'rb') as f:
     landmark_ids_results_dict = pickle.load(f)
print(len(landmark_ids_results_dict))
print(list(landmark_ids_results_dict.keys()))
print(len(landmark_ids_results_dict[landmark_id]))
print(landmark_ids_results_dict)

1
[177870]
500
{177870: [{'locations': array([[240.      ,   0.      ],
       [ 32.      , 224.      ],
       [ 16.      , 240.      ],
       [224.      ,  32.      ],
       [ 32.      , 240.      ],
       [ 90.51054 ,   0.      ],
       [240.      , 160.      ],
       [ 45.255264, 226.27635 ],
       [248.90399 ,  67.882904]], dtype=float32), 'descriptors': array([[ 1.48597881e-01, -3.58392037e-02, -1.21899776e-01,
        -1.86202154e-01, -2.71297712e-02, -3.54816839e-02,
        -4.03299145e-02, -7.62504190e-02, -1.33471444e-01,
         1.84990704e-01, -2.72398889e-01,  4.82967384e-02,
        -2.93049663e-01,  3.78010362e-01, -2.88409349e-02,
         2.83388793e-01,  3.75699788e-01, -7.89024606e-02,
        -7.21395761e-02, -9.78576243e-02, -3.71259265e-02,
         3.24220747e-01, -2.47455817e-02, -6.84538996e-03,
        -6.72551394e-02, -9.98965725e-02, -9.70296785e-02,
        -7.98313618e-02,  6.59641698e-02, -9.98013020e-02,
        -4.45939042e-02,  1.10246497e-03, 

## Use the locations and description vectors to match the images

In [26]:
results = landmark_ids_results_dict[landmark_id]

In [27]:
#@title TensorFlow is not needed for this post-processing and visualization
def match_images3(multiprocessing_results_list_item):
    result1 = multiprocessing_results_list_item[0]
    
    list_results = multiprocessing_results_list_item[1]
    max_inliers_reached = 10
    max_inlier = 0
    for j in range(len(list_results)):
        result2 = list_results[j]
        
        distance_threshold = 0.8
        # Read features.
        num_features_1 = result1['locations'].shape[0]
        num_features_2 = result2['locations'].shape[0]

        # Find nearest-neighbor matches using a KD tree.
        d1_tree = cKDTree(result1['descriptors'])
        _, indices = d1_tree.query(
            result2['descriptors'],
            distance_upper_bound=distance_threshold)

        # Select feature locations for putative matches.
        locations_2_to_use = np.array([
            result2['locations'][i,]
            for i in range(num_features_2)
            if indices[i] != num_features_1
        ])

        locations_1_to_use = np.array([
            result1['locations'][indices[i],]
            for i in range(num_features_2)
            if indices[i] != num_features_1
        ])

        # Perform geometric verification using RANSAC.
        try:
            _, inliers = ransac(
                (locations_1_to_use, locations_2_to_use),
                AffineTransform,
                min_samples=3,
                residual_threshold=20,
                max_trials=1000)

            sum_inliers = sum(inliers)
        except:
            sum_inliers = 0
            
        if num_features_1 != sum_inliers:
            if max_inlier < sum_inliers:
                max_inlier = sum_inliers
        
            if sum_inliers > max_inliers_reached:
                break

In [28]:
# the multiprocessing_results_list is of type list of tuples, each tuple has position 0 as dictionary, position 1 as list of dictionaries
multiprocessing_results_list = []
for i in range(0, len(results)):
    multiprocessing_results_list.append((results[i], results))

In [29]:
print(type(multiprocessing_results_list))
print(type(multiprocessing_results_list[0]))
print(type(multiprocessing_results_list[0][0]))
print(type(multiprocessing_results_list[0][1]))
# print(len(multiprocessing_results_list))
print(multiprocessing_results_list[0][0]['locations'])
print("\n\n\n\n\n\n\n\n\n\n")
print(multiprocessing_results_list[0][1])

<class 'list'>
<class 'tuple'>
<class 'dict'>
<class 'list'>
[[240.         0.      ]
 [ 32.       224.      ]
 [ 16.       240.      ]
 [224.        32.      ]
 [ 32.       240.      ]
 [ 90.51054    0.      ]
 [240.       160.      ]
 [ 45.255264 226.27635 ]
 [248.90399   67.882904]]











[{'locations': array([[240.      ,   0.      ],
       [ 32.      , 224.      ],
       [ 16.      , 240.      ],
       [224.      ,  32.      ],
       [ 32.      , 240.      ],
       [ 90.51054 ,   0.      ],
       [240.      , 160.      ],
       [ 45.255264, 226.27635 ],
       [248.90399 ,  67.882904]], dtype=float32), 'descriptors': array([[ 1.48597881e-01, -3.58392037e-02, -1.21899776e-01,
        -1.86202154e-01, -2.71297712e-02, -3.54816839e-02,
        -4.03299145e-02, -7.62504190e-02, -1.33471444e-01,
         1.84990704e-01, -2.72398889e-01,  4.82967384e-02,
        -2.93049663e-01,  3.78010362e-01, -2.88409349e-02,
         2.83388793e-01,  3.75699788e-01, -7.89024606e-02,
    

In [None]:
# Trying with multiprocessing
time_to_compare_all_images_in_class = time.time()

if __name__ == '__main__':
    max_inliers_reached = 10

    images_to_filter_out = {}
    for i in range(0, len(results)):
        max_inliers = 0
        p = Pool(4)
        inliers = p.map(match_images3, (multiprocessing_results_list))
        p.close()
        p.join()
    #     If max inliers for an image is <= 10 then add the image to dict to filter out later.
#         if max_inliers <= max_inliers_reached:
        images_to_filter_out[images_paths[i]] = inliers

# images_to_filter_out_after_delf.update(delf_compare_images_multiprocessing.compare_images(images_paths, images, results))
# if __name__ == '__main__':
    
#     p = Pool(8)
#     temp_dict = p.map(delf_compare_images_multiprocessing.compare_images, (images_paths, results))
#     p.close()
#     p.join()
    
print("Total time to compare all " + str(len(images_paths)) + " images in the landmark_id " + landmark_id + " : ", time.time() - time_to_compare_all_images_in_class)

In [None]:
images_to_filter_out_after_delf.update(temp_dict[0])

In [None]:
print(temp_dict)
print(images_to_filter_out_after_delf)

{'datasets\\raw_data\\train\\0\\0ddc92c6353c019a.jpg': 0, 'datasets\\raw_data\\train\\0\\2c8acb7c9958da08.jpg': 7, 'datasets\\raw_data\\train\\0\\b5d8eb502833a58c.jpg': 5}

In [None]:
print(top_pairs_landmark_id_dict)
print(top_pairs_landmark_id_dict.keys())