In [None]:
!pip install opencv-python

In [None]:
import cv2
import scipy.cluster.vq as vq
import numpy as np
import os
import time
import copy
from tqdm.notebook import tqdm
import pickle
from matplotlib import pyplot as plt

In [None]:
from google.colab import drive
query_names = []
query_imgs = []
train_names = []
train_imgs = []
from google.colab import drive
drive.mount('/content/drive')
"drive/My Drive/<your-folder>"
new_path = '/content/drive/MyDrive/UC_merced_data'
i = 0

for indir in os.listdir("drive/My Drive/UC_merced_data"):
    subdir = "drive/My Drive/UC_merced_data/" + indir                                                                                                                                          
    imname = indir.strip()
    imno = i
    i += 1
    img = cv2.imread(subdir, -1)
  
    if imno % 100 == 0:
        if img.shape == (256, 256, 3):
          query_names.append(imname)
          query_imgs.append(img)
    else:
        if img.shape == (256, 256, 3):
          train_names.append(imname)
          train_imgs.append(img)

# **Индексация с помощью случайных K-d деревьев**


In [None]:
import torch
import torchvision


class AlexNet(object):
    def __init__(self):
        device = "cuda" if torch.cuda.is_available() else "cpu"
        self._device = torch.device(device)

        self.alexnet = torchvision.models.alexnet(pretrained=True)
        self.alexnet.to(self._device)
        self.alexnet.eval()

    def embedding(self, cv_image):
        image = torch.as_tensor(cv_image, dtype=torch.float32) / 255
        image = image.permute(2, 1, 0).unsqueeze(0).to(self._device)
        with torch.no_grad():
            return self.alexnet(image).detach().squeeze().cpu().numpy()


In [None]:
query_desc = []
train_desc = []
A = AlexNet()

for elem in query_imgs:
    query_desc.append(A.embedding(elem))
for elem in train_imgs:
    print(elem.shape)
    train_desc.append(A.embedding(elem))

print(len(query_desc))
print(len(train_desc))

In [None]:
def build_index_kdtrees(img_names, descriptors, n_trees):
    
    FLANN_INDEX_KDTREE = 1
    index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = n_trees)
    search_params = {}  
    flann = cv2.FlannBasedMatcher(index_params,search_params)
    
    start = time.time()
    flann.add(descriptors)
    flann.train()
    stop = time.time()

    training_time = stop - start
    
    return flann, training_time 


In [None]:
def query_image(descs, flann_index, train_names, ratio):

    ratio = 0.75
    init_dict = [(img_name, 0) for img_name in train_names]
    score = dict(init_dict)
    start = time.time()
    matches = flann_index.knnMatch(descs, k = 2) 
    stop = time.time()
    filt_matches = list(filter(lambda m: m[0].distance < m[1].distance * ratio, matches))
    for match in filt_matches: 
        score[train_names[match[0].imgIdx]]+=1


    results = np.array(list(score.values()))
    imgs_names = list(score.keys())
    index_sort = np.argsort(results)[::-1]
    best_imgs = [imgs_names[i] for i in index_sort]
    
    query_time = stop-start
    return best_imgs, query_time


In [None]:
res_dict = {} 
for indir in os.listdir("drive/My Drive/Images"):
    subdir = "drive/My Drive/Images/" + indir
    names_list = []
    for images in os.listdir(subdir): 
      names_list.append(images)
    map_dict[indir] = names_list
    


def compute_recall(query_names, query_descs, index, train_names):
   
    total_results = {}
    recall = 0.0
    query_times = []
    
    for query_name, query_desc in zip(query_names, query_descs):
        results, query_time = query_image(query_desc, index, train_names)
        total_results[query_name] = results 
        query_times.append(query_time)

    recall_array = []
    for key, value in total_results.items():
        intersection = [elem for elem in value if elem in res_dict[key[:-6]]]
        recall_array.append(len(intersection)/len(res_dict[key[:-6]]))

    return total_results, np.array(recall_array), np.array(query_times)


In [None]:
mean_train_time_kd = 0.0
mean_query_time_kd = 0.0
n_iters = 5

for i in range(n_iters):
    index, train_time_kd = build_index_kdtrees(train_names, train_desc, n_trees = 3)
    results, recall, query_time_kd = compute_recall(query_names, query_desc, index, train_names)
    mean_train_t_kd += train_time_kd
    mean_query_t_kd += np.mean(query_time_kd)
    print('Iteration {}\n'.format(i))
    print('Recall: {} \n'.format(np.mean(recall)))
    print('Training time: {} secs.'.format(train_time_kd))
    print('Query response time: {} +- {} secs.'.format(np.mean(query_time_kd), np.std(query_time_kd)))
    print('\n\n')

# Индексация с помощью LSH-хешинга

In [None]:
query_kps_orb = []
query_desc_orb = []
train_kps_orb = []
train_desc_orb = []


orb = cv2.ORB_create(nfeatures = 1500, fastThreshold = 50)
for query in query_imgs: 
    kp,des = orb.detectAndCompute(query, mask=None)
    query_kps_orb.append(kp)
    query_desc_orb.append(des)
    
for train in train_imgs: 
    kp,des = orb.detectAndCompute(train, mask=None)
    train_kps_orb.append(kp)
    train_desc_orb.append(des)
    
    
print(len(query_kps_orb[0]))
print(query_desc_orb[0].shape)
print(query_desc_orb[0])


In [None]:
def build_index_lsh(img_names, descriptors, tables, hash_size):
 
    FLANN_INDEX_LSH = 6
    index_params = dict(algorithm = FLANN_INDEX_LSH, table_number = tables, key_size = hash_size, multi_probe_level = 0)
    search_params = {}  
    flann = cv2.FlannBasedMatcher(index_params,search_params)
    
    start = time.time()
    flann.add(descriptors)
    flann.train()
    stop = time.time()
    
    training_time = stop - start

    return flann, training_time


In [None]:
mean_train_time_lsh = 0.0
mean_query_time_lsh = 0.0
n_iters = 3

for i in range(n_iters):
    index, train_time_lsh = build_index_lsh(train_names, train_desc_orb, n_trees = 3)
    results, recall, query_time_lsh = compute_recall(query_names, query_desc_orb, index, train_names)
    mean_train_t_kd += train_time_lsh
    mean_query_t_kd += np.mean(query_time_lsh)
    print('Iteration {}\n'.format(i))
    print('Recall: {} \n'.format(np.mean(recall)))
    print('Training time: {} secs.'.format(train_time_lsh))
    print('Query response time: {} +- {} secs.'.format(np.mean(query_time_lsh), np.std(query_time_lsh)))
    print('\n\n')

In [None]:
class BoVW(object):

    def __init__(self, vocab_file):
        self.vocab = vocab_file
        self.nwords = self.vocab.getTrainDescriptors()[0].shape[0]
      
        init_dict = [(str(i), []) for i in range(self.nwords)] 
        self.inverted = dict(init_dict)
       

    def build_index(self, img_names, img_descs):
       
        start = time.time()
        for name, descs in zip(img_names, img_descs): 
            matches = self.vocab.match(descs)
            idxs = [match.trainIdx for match in matches]
            unique_idxs = np.unique(np.array(idxs))
            for idx in list(unique_idxs):
                self.inverted[str(idx)].append(name)
        stop = time.time()
        training_time = stop-start
        
        return training_time
    
     

    def query_image(self, descriptors):
        
        start = time.time()
        matches = self.vocab.match(descriptors)
        idxs = [match.trainIdx for match in matches]
        counter = {}
        for idx in idxs: 
            retrieved_imgs = self.inverted[str(idx)]
            for ret_img in retrieved_imgs: 
                if ret_img not in list(counter.keys()):
                    counter[ret_img] = 1
                else: 
                    counter[ret_img] += 1
        
        values = np.array(list(counter.values()))
        imgs_names = list(counter.keys())
        index_sort = np.argsort(values)[::-1]
        best_imgs = [imgs_names[i] for i in index_sort]
        stop = time.time()
        query_time = stop-start
        
        return best_imgs, query_time
     