In [1]:
from Datasets.Oxford.load_data import data_loader
from utils.cluster import KMEANS
from utils.get_features import SIFT
from utils.find_similar_vectors import KNN
import matplotlib.pyplot as plt
import numpy as np

In [2]:
images, image_labels = data_loader()
images = images
image_labels = image_labels

100%|██████████| 17/17 [01:18<00:00,  4.61s/it]


In [3]:
from sklearn.model_selection import train_test_split

train_images, test_images, train_labels, test_labels = train_test_split(
    images, image_labels, shuffle=True, test_size=0.1)

In [4]:
from importlib import reload

from utils import get_features
reload(get_features)
from utils.get_features import SIFT

In [5]:
sift_detector = SIFT('sift')
train_keypoints, features = sift_detector.get_features(train_images, True)

features_array = np.concatenate(features, axis=0)

100%|██████████| 4556/4556 [09:48<00:00,  7.74it/s]


In [6]:
num_features, dim = features_array.shape
num_clusters = 500

kmeans = KMEANS(dim, num_clusters=num_clusters, niter=200)
kmeans.train(features_array)

Sampling a subset of 128000 / 16060228 for training
Clustering 128000 points in 128D to 500 clusters, redo 1 times, 200 iterations
  Preprocessing in 1.72 s
  Iteration 199 (9.47 s, search 8.48 s): objective=8.79824e+09 imbalance=1.094 nsplit=0       


In [7]:
from tqdm import tqdm


hists = []

for f in tqdm(features):
    labels = kmeans.search(f)[:, 0]
    counts = np.unique(labels, return_counts=True)
    arr = np.zeros(num_clusters)
    arr[counts[0]] = counts[1]
    arr /= np.linalg.norm(arr)
    hists.append(arr[None, :]) # dim: 1 x num_clusters

hists = np.concatenate(hists, axis=0)    

100%|██████████| 4556/4556 [00:31<00:00, 145.23it/s]


In [8]:
knn = KNN(num_clusters)
knn.fit(hists)
knn.findKNearest(hists[:3])

array([[   0, 1075, 2797,  878, 2599, 2047, 3353, 4491, 1435, 2090],
       [   1, 3603, 1169, 1073, 2819, 1835, 1438, 1582,  497,  340],
       [   2, 1972, 1252, 1093, 3366, 2464, 1680,  755, 2510,  835]])

# Testing

In [9]:
test_keypoints, test_features = sift_detector.get_features(test_images, True)


100%|██████████| 507/507 [01:05<00:00,  7.79it/s]


In [10]:
def K_precisions(true_labels, ret_labels):
    eq = ret_labels == true_labels
    cumulative = np.cumsum(eq)
    precisions = cumulative / (np.arange(len(ret_labels)) + 1)
    return precisions

In [11]:
K = 5

APs = []
for itr in range(len(test_images)):
    image = test_images[itr]
    true_label = test_labels[itr]
    f = test_features[itr]
    kp = test_keypoints[itr]
    # it is possible that no descriptors are obtained
    if f is None:
        print(f'{true_label}: no features extracted')
        continue

    f = np.array(f)
    
    # histogram
    labels = kmeans.search(f)[:, 0]
    counts = np.unique(labels, return_counts=True)
    arr = np.zeros(num_clusters)
    arr[counts[0]] = counts[1]
    arr /= np.linalg.norm(arr)
    
    # finding the nearest neighbours
    indices = knn.findKNearest(arr[None, :], K)[0]
    # finding the labels associated with retrieved
    # neighbours
    ret_labels = np.array([train_labels[i] for i in indices])
    APs.append(K_precisions(true_label, ret_labels))


APs = np.array(APs)
mAP = np.mean(APs)    

In [13]:
APs.mean(axis=0)

array([0.3234714 , 0.32051282, 0.2991453 , 0.28648915, 0.27810651])

In [14]:
mAP

0.3015450361604208

# RANSAC

In [15]:
from importlib import reload
from utils import ransac
reload(ransac)

<module 'utils.ransac' from '/raid/ai20btech11006/others/CBIR/utils/ransac.py'>

In [16]:
from utils.ransac import ransac_sift_multiprocessing, ransac_sift
from multiprocessing import Process, Array


K = 5
precision = np.zeros(K)

APs = []
for itr in tqdm(range(len(test_images))):
    image = test_images[itr]
    true_label = test_labels[itr]
    f = test_features[itr]
    kp = test_keypoints[itr]
    # it is possible that no descriptors are obtained
    if f is None:
        print(f'{true_label}: no features extracted')
        continue

    f = np.array(f)
    
    # histogram
    labels = kmeans.search(f)[:, 0]
    counts = np.unique(labels, return_counts=True)
    arr = np.zeros(num_clusters)
    arr[counts[0]] = counts[1]
    arr /= np.linalg.norm(arr)
    
    # finding the nearest neighbours
    indices = knn.findKNearest(arr[None, :], 5*K)[0]
    kps = [train_keypoints[i] for i in indices]
    ds = [features[i] for i in indices]
    
    scores = ransac_sift_multiprocessing(f, ds, kp, kps)
    
    high_score_indices = np.argsort(scores)[::-1][:K]
    indices = indices[high_score_indices]
    
    # finding the labels associated with retrieved
    # neighbours
    ret_labels = np.array([train_labels[i] for i in indices[:K]])
    APs.append(K_precisions(true_label, ret_labels))


APs = np.array(APs)
mAP = np.mean(APs)    

  0%|          | 1/507 [00:07<59:40,  7.08s/it]

 23%|██▎       | 118/507 [16:13<55:37,  8.58s/it]  Process Process-2951:
Traceback (most recent call last):
Process Process-2952:
Process Process-2953:
  File "/raid/ai20btech11006/miniconda3/envs/ivp/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/raid/ai20btech11006/miniconda3/envs/ivp/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/raid/ai20btech11006/miniconda3/envs/ivp/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/raid/ai20btech11006/others/CBIR/utils/ransac.py", line 33, in rasnac_for_multiprocessing
    res = method(*args)
Traceback (most recent call last):
Traceback (most recent call last):
  File "/raid/ai20btech11006/others/CBIR/utils/ransac.py", line 15, in ransac_sift
    matches = flann.knnMatch(d1, d2, k=2)
  File "/raid/ai20btech11006/miniconda3/envs/ivp/lib/python3.10/multiprocessing/process.py", line 314, in _boot

In [17]:
APs.mean(axis=0), mAP

(array([0.42209073, 0.37968442, 0.34319527, 0.32149901, 0.30374753]),
 0.35404339250493094)