In [None]:
import warnings
warnings.simplefilter("ignore")

import sys
import time
import glob
import pickle
import hashlib
import PIL
from os.path import splitext, basename, join, isfile
from collections import defaultdict, Counter
from PIL import Image
from io import BytesIO
import pandas as pd
import numpy as np

from scipy.spatial import cKDTree
from skimage.feature import plot_matches
from skimage.measure import ransac
from skimage.transform import AffineTransform

from delf import feature_io
sys.path.append('/home/alexandrearaujo/library/faiss/')
import faiss

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import pandas as pd

In [None]:
def sanitize(x):
    """ convert array to a c-contiguous float array """
    return np.ascontiguousarray(x.astype('float32'))

In [None]:
# image to use for query
query_filenames = [
    '4f4b811a065c1423', # big ben
    'b580027e974cb582', # montagne
]

# path to image use for test
query_image_path = []
query_feature_path = []
for filename in query_filenames:
    path = '/media/hdd1/kaggle/landmark-retrieval-challenge/index_256x256/{}.jpg'.format(filename)
    query_image_path.append(path)
    path = '/media/hdd1/kaggle/landmark-retrieval-challenge/feature_index_256x256/{}.delf'.format(filename)
    query_feature_path.append(path)

# load feature query
n_images_query = len(query_image_path)
loc_query = [None] * n_images_query
desc_query = [None] * n_images_query
n_feature_query = [None] * n_images_query
filenames_query_ix = []
filenames_query_ix_counter = []
for i, path in enumerate(query_feature_path):
    loc, _, desc, _, _ = feature_io.ReadFromFile(path)
    loc_query[i] = loc
    desc_query[i] = desc
    n_feature_query[i] = desc.shape[0]
    filenames_query_ix.extend([i] * desc.shape[0])
    filenames_query_ix_counter.append((i, desc.shape[0]))
    # print('query shape', i, desc.shape)
    

# images to use for indexing
images_filenames = [
    '4e899444d83ca6b2', # 0 big ben
    '1b0a2ccecabea7cc', # 1 big ben
    '693326a5738aefcd', # 2 montagne
    '34cc88b6714be7a5', # 3 montagne with plain
    'c1307ac9c38ecbc5', # 4 nature
    'c2168fc9c087f805', # 5 voute
    
    '3f1b45fada10fa9e', # 6 distractors
    '71ff7f6b9d3b7f08', # 7 distractors
    'c1ac55b3f8daa72b', # 8 distractors
    '69384190c57e1dc8', # 9 distractors
    '601263c554e8f4d9', # 10 distractors
    'c727199e10419a46', # 11 distractors
    'efa8f5ec4954ea45', # 12 distractors
]

# path to image use for indexing
index_image_path = []
index_feature_path = []
for filename in images_filenames:
    path = '/media/hdd1/kaggle/landmark-retrieval-challenge/index_256x256/{}.jpg'.format(filename)
    index_image_path.append(path)
    path = '/media/hdd1/kaggle/landmark-retrieval-challenge/feature_index_256x256/{}.delf'.format(filename)
    index_feature_path.append(path)

# load features index
n_images_index = len(index_image_path)
loc_index = [None] * n_images_index
desc_index = [None] * n_images_index
n_feature_index = [None] * n_images_index
filenames_index_ix = []
filenames_index_ix_counter = {}
filenames_index_ix_counter_cummul = {}
filenames_index_ix_counter_cummul[0] = 0
for i, path in enumerate(index_feature_path):
    loc, _, desc, _, _ = feature_io.ReadFromFile(path)
    loc_index[i] = loc
    desc_index[i] = desc
    n_feature_index[i] = desc.shape[0]
    filenames_index_ix.extend([i] * desc.shape[0])
    filenames_index_ix_counter[i] = desc.shape[0]
    filenames_index_ix_counter_cummul[i+1] = filenames_index_ix_counter_cummul[i] + desc.shape[0]
    # print('index shape', i, desc.shape)


# sanitize
desc_query = sanitize(np.concatenate(desc_query))
desc_index = sanitize(np.concatenate(desc_index))
print('query shape final', desc_query.shape)
print('index shape final', desc_index.shape)

In [None]:
# See the chapter about IndexIVFFlat for the setting of ncentroids. 
# The code_size is typically a power of two between 4 and 64. 
# Like for IndexPQ, d should be a multiple of m.

# faiss
d = 40 # dim of descriptors
nlist = 2**5
m = 8 # number of subquantizers
quantizer = faiss.IndexFlatL2(d)  # this remains the same
n_bits = 8 # should be 8 # bits allocated per subquantizer

index = faiss.IndexIVFPQ(quantizer, d, nlist, m, n_bits)
index.train(desc_index)
index.add(desc_index)
distances, index = index.search(desc_query, 60)
# res = faiss.StandardGpuResources()
# gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
# distances, index = gpu_index.search(desc_query, 60)



# d = 40 # dim of descriptors
# index = faiss.IndexFlatL2(d)
# index.train(desc_index)
# index.add(desc_index)
# distances, index = index.search(desc_query, 60)

# k = 60
# res = faiss.StandardGpuResources()
# gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
# distances, index = gpu_index.search(sanitize(desc_query), k)

In [None]:
# remove local descriptors with a distance superior to 0.8
index[distances > 0.8] = -1
distances = None

In [None]:
# map local descriptors index to image index
print('mapped_index', flush=True)
mapped_index = np.empty_like(index)
for i, j in np.ndindex(index.shape):
    ix = index[i, j]
    mapped_index[i, j] = filenames_index_ix[ix] if ix != -1 else -1

In [None]:
count_img_by_query = {}
start_subset, end_subset = 0, 0
for i, (query_ix, n_descriptors) in enumerate(filenames_query_ix_counter):
    end_subset += n_descriptors
    subset = mapped_index[start_subset:end_subset]
    value, count = np.unique(subset[subset != -1], return_counts=True)
    count_img_by_query[query_ix] = Counter(dict(zip(value, count))).most_common(100)
    start_subset += n_descriptors
    
count_img_by_query_final = defaultdict(list)
start_subset, end_subset = 0, 0
for i, (query_ix, n_descriptors_query) in enumerate(filenames_query_ix_counter):
    # print('query_ix', query_ix, 'n_descriptors_query', n_descriptors_query)
    
    end_subset += n_descriptors_query
    subset = index[start_subset:end_subset]
    subset_mapped = mapped_index[start_subset:end_subset]
    start_subset += n_descriptors_query
    retrieved_images = count_img_by_query[query_ix]
    
    for img_ix, count in retrieved_images:
        
        n_descriptors_index = filenames_index_ix_counter[img_ix]
        
        # print('\timg_ix', img_ix, 'n_descriptors_index', n_descriptors_index)
    
    
#         ## 1 ###
#         start = time.clock()
#         loc_index_to_use = []
#         loc_query_to_use = []
#         for subset_i, line in enumerate(subset):
#             for subset_j, value in enumerate(line):
#                 if subset_mapped[subset_i, subset_j] != img_ix:
#                     continue
#                 # offset the index
#                 value -= filenames_index_ix_counter_cummul[img_ix]
#                 loc_query_to_use.append(loc_query[query_ix][subset_i, :])
#                 loc_index_to_use.append(loc_index[img_ix][value, :])
#         loc_query_to_use = np.array(loc_query_to_use)
#         loc_index_to_use = np.array(loc_index_to_use)
#         print('1', time.clock() - start)

        
        ## 2 ##
        start = time.clock()
        loc_index_to_use = []
        loc_query_to_use = []
        index_for_query_loc = np.arange(subset.shape[0])[:, np.newaxis].repeat(subset.shape[1], axis=1)
        
        cond = (subset_mapped == img_ix) & (subset != -1)
        index_for_query_loc = index_for_query_loc[cond]
        index_for_index_loc = subset[cond] - filenames_index_ix_counter_cummul[img_ix]
        assert len(index_for_query_loc) == len(index_for_index_loc)
        
        for ix_query_loc, ix_index_loc in zip(index_for_query_loc, index_for_index_loc):
            loc_query_to_use.append(loc_query[query_ix][ix_query_loc, :])
            loc_index_to_use.append(loc_index[img_ix][ix_index_loc, :])
        
        loc_query_to_use = np.array(loc_query_to_use)
        loc_index_to_use = np.array(loc_index_to_use)
        print('2', time.clock() - start)
        
        ## 3 ##
        start = time.clock()
        index_for_query_loc = np.arange(subset.shape[0])[:, np.newaxis].repeat(subset.shape[1], axis=1)
        cond = (subset_mapped == img_ix) & (subset != -1)
        index_for_query_loc = index_for_query_loc[cond]
        index_for_index_loc = subset[cond] - filenames_index_ix_counter_cummul[img_ix]
        loc_query_to_use_2 = loc_query[query_ix][index_for_query_loc]
        loc_index_to_use_2 = loc_index[img_ix][index_for_index_loc]
        print('3', time.clock() - start)
        
        ## test ## 
        assert np.array_equal(loc_query_to_use, loc_query_to_use_2)
        assert np.array_equal(loc_index_to_use, loc_index_to_use_2)
        
        
        # Perform geometric verification using RANSAC.
        _, inliers = ransac(
          (loc_index_to_use, loc_query_to_use),
          AffineTransform,
          min_samples=5,
          residual_threshold=20,
          max_trials=2000)
        nb_inliers = 0 if inliers is None else np.sum(inliers)
        # print('\t\tnb_inliers', nb_inliers, 'count', count, '\n')
        if nb_inliers != 0:
            count_img_by_query_final[query_ix].append((img_ix, nb_inliers))
        
for key in count_img_by_query_final.keys():
    count_img_by_query_final[key] = sorted(count_img_by_query_final[key], key=lambda x: x[1], reverse=True)

In [None]:
for k, v in count_img_by_query.items():
    print(k, v)

In [None]:
for k, v in count_img_by_query_final.items():
    print(k, v)

In [None]:
from skimage.measure import ransac

In [None]:
# Select feature locations for putative matches
loc_index_to_use = np.array([
  loc_index[indices[i],]
  for i in range(n_features_query)
  if indices[i] != -1
])

loc_query_to_use = np.array([
  loc_query[i,]
  for i in range(n_features_query)
  if indices[i] != -1
])
print(len(loc_index_to_use), len(loc_query_to_use))

In [None]:
# Perform geometric verification using RANSAC.
_, inliers = ransac(
  (loc_index_to_use, loc_query_to_use),
  AffineTransform,
  min_samples=3,
  residual_threshold=20,
  max_trials=1000)
nb_inliers = 0 if inliers is None else np.sum(inliers)
print(nb_inliers)

In [None]:
# Visualize correspondences, and save to file.
_, ax = plt.subplots()
img_1 = mpimg.imread(image_query_path)
img_2 = mpimg.imread(image_index_path)
inlier_idxs = np.nonzero(inliers)[0]
plot_matches(
  ax,
  img_1,
  img_2,
  loc_query_to_use,
  loc_index_to_use,
  np.column_stack((inlier_idxs, inlier_idxs)),
  matches_color='b')
ax.axis('off')
ax.set_title('DELF correspondences')
plt.show()

In [None]:
# Find nearest-neighbor matches using a KD tree.
d1_tree = cKDTree(desc_index)
_, indices = d1_tree.query(
  desc_query, distance_upper_bound=0.8)

print('indices', len(indices))

# Select feature locations for putative matches.
loc_index_to_use = np.array([
  loc_index[indices[i],]
  for i in range(n_features_query)
  if indices[i] != n_features_index
])
loc_query_to_use = np.array([
  loc_query[i,]
  for i in range(n_features_query)
  if indices[i] != n_features_index
])

print(len(loc_index_to_use))
print(len(loc_query_to_use))

# Perform geometric verification using RANSAC.
_, inliers = ransac(
  (loc_index_to_use, loc_query_to_use),
  AffineTransform,
  min_samples=3,
  residual_threshold=20,
  max_trials=1000)

print('Found %d inliers' % sum(inliers))


# Visualize correspondences, and save to file.
_, ax = plt.subplots()
img_1 = mpimg.imread(image_query_path)
img_2 = mpimg.imread(image_index_path)
inlier_idxs = np.nonzero(inliers)[0]
plot_matches(
  ax,
  img_1,
  img_2,
  loc_query_to_use,
  loc_index_to_use,
  np.column_stack((inlier_idxs, inlier_idxs)),
  matches_color='b')
ax.axis('off')
ax.set_title('DELF correspondences')
plt.show()

In [None]:
loc_index_to_use

In [None]:
loc_query_to_use

In [None]:
def resize_image(image, target_size=800):
  def calc_by_ratio(a, b):
    return int(a * target_size / float(b))

  size = image.size
  if size[0] < size[1]:
    w = calc_by_ratio(size[0], size[1])
    h = target_size
  else:
    w = target_size
    h = calc_by_ratio(size[1], size[0])

  # image = image.resize((w, h), Image.BILINEAR)
  image = image.resize((w, h), Image.ANTIALIAS)
  return image

filename = splitext(basename(image_query_path))[0]
img = Image.open(image_query_path)
img = resize_image(img)
out ='/media/hdd1/kaggle/landmark-retrieval-challenge/test_rescale/{}.jpg'.format(filename)
print(out)
# img.save(out)

filename = splitext(basename(image_index_path))[0]
img = Image.open(image_index_path)
img = resize_image(img)
out = '/media/hdd1/kaggle/landmark-retrieval-challenge/index_rescale/{}.jpg'.format(filename)
print(out)
img.save(out)