In [1]:
from Datasets.Oxford.load_data import data_loader
from utils.cluster import KMEANS
from utils.get_features import SIFT
from utils.find_similar_vectors import KNN
import matplotlib.pyplot as plt
import numpy as np

In [2]:
images, image_labels = data_loader()

100%|██████████| 17/17 [01:30<00:00,  5.33s/it]


In [3]:
from sklearn.model_selection import train_test_split

train_images, test_images, train_labels, test_labels = train_test_split(
    images, image_labels, shuffle=True, test_size=0.1)

In [4]:
from importlib import reload

from utils import get_features
reload(get_features)
from utils.get_features import SIFT

In [5]:
orb_detector = SIFT('sift')
features = orb_detector.get_features(train_images)

features_array = np.concatenate(features, axis=0)

100%|██████████| 4556/4556 [11:55<00:00,  6.37it/s]


In [6]:
num_features, dim = features_array.shape
num_clusters = 500

kmeans = KMEANS(dim, num_clusters=num_clusters, niter=500)
kmeans.train(features_array)

Sampling a subset of 128000 / 16106018 for training
Clustering 128000 points in 128D to 500 clusters, redo 1 times, 500 iterations
  Preprocessing in 2.48 s
  Iteration 499 (69.79 s, search 64.34 s): objective=8.80837e+09 imbalance=1.086 nsplit=0       


In [7]:
from tqdm import tqdm


hists = []

for f in tqdm(features):
    labels = kmeans.search(f)[:, 0]
    counts = np.unique(labels, return_counts=True)
    arr = np.zeros(num_clusters)
    arr[counts[0]] = counts[1]
    arr /= np.linalg.norm(arr)
    hists.append(arr[None, :]) # dim: 1 x num_clusters

hists = np.concatenate(hists, axis=0)    

  0%|          | 0/4556 [00:00<?, ?it/s]

100%|██████████| 4556/4556 [00:34<00:00, 133.69it/s]


In [8]:
knn = KNN(num_clusters)
knn.fit(hists)
knn.findKNearest(hists[:3])

array([[   0, 4094, 2026, 1597,  179, 3444, 1202, 4479, 2038, 4348],
       [   1, 4290, 2735, 2780,    9,  255, 1127, 2482, 3250, 2725],
       [   2, 1466, 1122, 1429, 3256,  764, 2188, 3187, 2073, 1088]])

# Testing

In [9]:
def K_precisions(true_labels, ret_labels):
    eq = ret_labels == true_labels
    cumulative = np.cumsum(eq)
    precisions = cumulative / (np.arange(len(ret_labels)) + 1)
    return precisions

In [10]:
K = 5
precision = np.zeros(K)

APs = []
for image, true_label in tqdm(zip(test_images, test_labels)):
    # get the orb features
    f = orb_detector.__get_features__(image)
    # it is possible that no descriptors are obtained
    if f is None:
        print(f'{true_label}: no features extracted')
        continue

    f = np.array(f)
    
    # histogram
    labels = kmeans.search(f)[:, 0]
    counts = np.unique(labels, return_counts=True)
    arr = np.zeros(num_clusters)
    arr[counts[0]] = counts[1]
    arr /= np.linalg.norm(arr)
    
    # finding the nearest neighbours
    indices = knn.findKNearest(arr[None, :], K)[0]
    # finding the labels associated with retrieved
    # neighbours
    ret_labels = np.array([train_labels[i] for i in indices])
    APs.append(K_precisions(true_label, ret_labels))


APs = np.array(APs)
mAP = np.mean(APs)    

507it [02:19,  3.64it/s]


In [11]:
non_general_indices = np.where(np.array(test_labels) != 'general')
APs[non_general_indices].mean(axis=0)

array([0.36883629, 0.34615385, 0.32938856, 0.31508876, 0.29901381])

In [12]:
APs.mean(axis=0)

array([0.36883629, 0.34615385, 0.32938856, 0.31508876, 0.29901381])

In [13]:
mAP

0.3316962524654832