In [None]:
!apt install libomp-dev
!pip install faiss
!pip install faiss-gpu

In [None]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input

from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image

from sklearn.neighbors import NearestNeighbors

from PIL import Image
import math
import numpy as np
from sklearn.utils import shuffle
import os
import json
from sklearn.model_selection import train_test_split
import cv2
from google.colab.patches import cv2_imshow
from scipy import spatial

In [None]:
import matplotlib.pyplot as plt

def draw_image(path):
    img = cv2.imread(path)
    cv2_imshow(img)
def draw_list_image(paths, titles = None):
    axes = []
    grid_size = 5
    k = len(paths)
    fig = plt.figure(figsize=(20, 20))
    for id in range(k):
        path = "/content/drive/MyDrive/Yonin-IR/data/paris_data/" + paths[id].split('/')[-1] + '.jpg'
        print(path)
        axes.append(fig.add_subplot(grid_size, grid_size, id + 1))
        if (titles != None):
            axes[-1].set_title(round(titles[id], 5))
        plt.imshow(Image.open(path))
    fig.tight_layout()
    plt.show()

##Load Data

In [None]:
data_path = "/content/drive/MyDrive/Yonin-IR/data/data_oxbuild_vgg16_pretrain.json"
with open(data_path) as fp:
    data = json.load(fp)

In [None]:
vectors = np.array(data['vectors'])
paths = data['paths']
names = data['names']

In [None]:
vectors = vectors.astype(np.float32)

In [None]:
vectors.shape

(5063, 512)

##Faiss

In [None]:
import faiss
flower_index = faiss.IndexFlatL2(vectors.shape[1])
flower_index.add(vectors)

##Load query

In [None]:
def extract_file_names(path):
    listdir = os.listdir(path)
    list_name = []
    for file in listdir:
        type_file = file.split('_')[-1][:-4]
        if type_file == 'good':
            list_name.append(file[:-9])
    return list_name

In [None]:
query_file_path = "/content/drive/MyDrive/Yonin-IR/data/query_oxbuild/"
query_file = extract_file_names(query_file_path)

In [None]:
len(query_file)

55

In [None]:
query_file[:5]

['all_souls_4', 'all_souls_2', 'all_souls_1', 'all_souls_3', 'balliol_3']

In [None]:
path = "/content/drive/MyDrive/Yonin-IR/data/data_oxbuild_vgg16_pretrain_query.json"
with open(path) as fp:
    query = json.load(fp)

In [None]:
vectors_query = {}
for i in range(len(query['names'])):
    vectors_query[query['names'][i]] = query['vectors'][i]

##List rank for each query

In [None]:
image_path = "/content/drive/MyDrive/Yonin-IR/data/oxbuild_data/"
model = get_extract_model()
for name in query_file:
    query_vector = np.array(vectors_query[name])
    query_vector = query_vector.astype(np.float32)
    nearest_paths, nearest_distances = search(flower_index, names, query_vector, 5000)
    list_rank_path = "/content/drive/MyDrive/Yonin-IR/data/list_rank/" + name +".json"
    with open(list_rank_path, "w") as fp:
        json.dump(nearest_paths, fp, indent=4)

##Evaluation

###MAP

In [None]:
k = 2000

list_rank_path = "/content/drive/MyDrive/Yonin-IR/data/list_rank/"
query_file_path = "/content/drive/MyDrive/Yonin-IR/data/query_oxbuild/"
list_ap = []
for name in query_file:
    with open(list_rank_path + name + ".json") as fp:
        list_rank = json.load(fp)
    with open(query_file_path + name + "_good.txt", 'r') as fp:
        good_set = fp.read().split('\n')[:-1]
    with open(query_file_path + name + "_ok.txt", 'r') as fp:
        ok_set = fp.read().split('\n')[:-1]
    with open(query_file_path + name + "_junk.txt", 'r') as fp:
        junk_set = fp.read().split('\n')[:-1]
    right_set = good_set + ok_set

    pos = len(right_set)
    ap = 0
    relevant = 0
    old_recall = 0
    old_pre = 1
    j = 0
    for i in range(k):
        if any(s in list_rank[i] for s in right_set):
            relevant += 1
        elif any(s in list_rank[i] for s in junk_set):
            continue
        pre = relevant / (j + 1)
        recall = relevant/pos
        ap += (recall - old_recall)*((pre + old_pre)/2)
        j += 1
        old_recall = recall
        old_pre = pre
    list_ap.append(np.mean(ap))
map = np.mean(list_ap)
print(map)

0.3030517174869393


In [None]:
old_recall

1.0

###MAP@K

In [None]:
k = 50
list_rank_path = "/content/drive/MyDrive/Yonin-IR/data/list_rank/"
query_file_path = "/content/drive/MyDrive/Yonin-IR/data/query_oxbuild/"
ap = []
for name in query_file:
    with open(list_rank_path + name + ".json") as fp:
        list_rank = json.load(fp)
    with open(query_file_path + name + "_good.txt", 'r') as fp:
        good_set = fp.read().split('\n')[:-1]
    with open(query_file_path + name + "_ok.txt", 'r') as fp:
        ok_set = fp.read().split('\n')[:-1]
    with open(query_file_path + name + "_junk.txt", 'r') as fp:
        junk_set = fp.read().split('\n')[:-1]
    right_set = good_set + ok_set

    precision = []
    relevant = 0
    j = 0
    for i in range(k):
        if any(s in list_rank[i] for s in right_set):
            relevant += 1
        elif any(s in list_rank[i] for s in junk_set):
            continue
        pre = relevant / (j + 1)
        precision.append(pre)
        j += 1
    ap.append(np.mean(precision))
map = np.mean(ap)
print('top ' + str(k) + ': ', map)

top 50:  0.3596698578311356


In [None]:
ap

##Visualize Result

In [None]:
i = 21
list_rank_path = "/content/drive/MyDrive/Yonin-IR/data/list_rank_paris/"
print(query_file[i])
draw_image("/content/drive/MyDrive/Yonin-IR/data/query_image_paris/" + query_file[i] + '.jpg')
with open(list_rank_path + query_file[i] + ".json") as fp:
    list_rank = json.load(fp)
draw_list_image(list_rank[:20])

Output hidden; open in https://colab.research.google.com to view.