In [1]:
from Datasets.Paris.load_data import data_loader
from utils.cluster import KMEANS
from utils.get_features import SIFT
from utils.find_similar_vectors import KNN
import matplotlib.pyplot as plt
import numpy as np

In [2]:
images, image_labels = data_loader()
images = images
image_labels = image_labels

100%|██████████| 12/12 [01:38<00:00,  8.19s/it]


In [3]:
from sklearn.model_selection import train_test_split

train_images, test_images, train_labels, test_labels = train_test_split(
    images, image_labels, shuffle=True, test_size=0.1)

In [4]:
from importlib import reload

from utils import get_features
reload(get_features)
from utils.get_features import SIFT

In [5]:
sift_detector = SIFT('sift')
train_keypoints, features = sift_detector.get_features(train_images, True)

features_array = np.concatenate(features, axis=0)

100%|██████████| 5752/5752 [12:49<00:00,  7.48it/s]


In [6]:
num_features, dim = features_array.shape
num_clusters = 500

kmeans = KMEANS(dim, num_clusters=num_clusters, niter=200)
kmeans.train(features_array)

Sampling a subset of 128000 / 19715703 for training
Clustering 128000 points in 128D to 500 clusters, redo 1 times, 200 iterations
  Preprocessing in 2.18 s
  Iteration 199 (9.78 s, search 8.73 s): objective=8.72241e+09 imbalance=1.116 nsplit=0       


In [7]:
from tqdm import tqdm


hists = []

for f in tqdm(features):
    labels = kmeans.search(f)[:, 0]
    counts = np.unique(labels, return_counts=True)
    arr = np.zeros(num_clusters)
    arr[counts[0]] = counts[1]
    arr /= np.linalg.norm(arr)
    hists.append(arr[None, :]) # dim: 1 x num_clusters

hists = np.concatenate(hists, axis=0)    

  3%|▎         | 158/5752 [00:01<00:45, 123.45it/s]

100%|██████████| 5752/5752 [00:50<00:00, 113.70it/s]


In [8]:
knn = KNN(num_clusters)
knn.fit(hists)
knn.findKNearest(hists[:3])

array([[   0, 4809, 2752, 4127, 5382, 5509,  158,  504, 2385,   97],
       [   1,  240, 2852, 3750, 1724, 4527,   16, 1901, 1623, 4697],
       [   2, 1364, 2670, 3749,  503,  327, 2182, 1060,   50, 4798]])

# Testing

In [9]:
test_keypoints, test_features = sift_detector.get_features(test_images, True)


100%|██████████| 640/640 [01:24<00:00,  7.60it/s]


In [10]:
def K_precisions(true_labels, ret_labels):
    eq = ret_labels == true_labels
    cumulative = np.cumsum(eq)
    precisions = cumulative / (np.arange(len(ret_labels)) + 1)
    return precisions

In [11]:
K = 5

APs = []
for itr in range(len(test_images)):
    image = test_images[itr]
    true_label = test_labels[itr]
    f = test_features[itr]
    kp = test_keypoints[itr]
    # it is possible that no descriptors are obtained
    if f is None:
        print(f'{true_label}: no features extracted')
        continue

    f = np.array(f)
    
    # histogram
    labels = kmeans.search(f)[:, 0]
    counts = np.unique(labels, return_counts=True)
    arr = np.zeros(num_clusters)
    arr[counts[0]] = counts[1]
    arr /= np.linalg.norm(arr)
    
    # finding the nearest neighbours
    indices = knn.findKNearest(arr[None, :], K)[0]
    # finding the labels associated with retrieved
    # neighbours
    ret_labels = np.array([train_labels[i] for i in indices])
    APs.append(K_precisions(true_label, ret_labels))


APs = np.array(APs)
mAP = np.mean(APs)    

In [12]:
non_general_indices = np.where(np.array(test_labels) != 'general')
APs[non_general_indices].mean(axis=0)

array([0.54674797, 0.50914634, 0.49051491, 0.46849593, 0.45487805])

In [13]:
APs.mean(axis=0)

array([0.51875   , 0.48671875, 0.47291667, 0.45625   , 0.4471875 ])

In [14]:
mAP

0.47636458333333337

# RANSAC

In [15]:
from importlib import reload
from utils import ransac
reload(ransac)

<module 'utils.ransac' from '/raid/ai20btech11006/others/CBIR/utils/ransac.py'>

In [16]:
from utils.ransac import ransac_sift_multiprocessing, ransac_sift
from multiprocessing import Process, Array


K = 5
precision = np.zeros(K)

APs = []
for itr in tqdm(range(len(test_images))):
    image = test_images[itr]
    true_label = test_labels[itr]
    f = test_features[itr]
    kp = test_keypoints[itr]
    # it is possible that no descriptors are obtained
    if f is None:
        print(f'{true_label}: no features extracted')
        continue

    f = np.array(f)
    
    # histogram
    labels = kmeans.search(f)[:, 0]
    counts = np.unique(labels, return_counts=True)
    arr = np.zeros(num_clusters)
    arr[counts[0]] = counts[1]
    arr /= np.linalg.norm(arr)
    
    # finding the nearest neighbours
    indices = knn.findKNearest(arr[None, :], 5*K)[0]
    kps = [train_keypoints[i] for i in indices]
    ds = [features[i] for i in indices]
    
    scores = ransac_sift_multiprocessing(f, ds, kp, kps)
    
    high_score_indices = np.argsort(scores)[::-1][:K]
    indices = indices[high_score_indices]
    
    # finding the labels associated with retrieved
    # neighbours
    ret_labels = np.array([train_labels[i] for i in indices[:K]])
    APs.append(K_precisions(true_label, ret_labels))


APs = np.array(APs)
mAP = np.mean(APs)    

  0%|          | 0/640 [00:00<?, ?it/s]

100%|██████████| 640/640 [1:50:08<00:00, 10.33s/it]


In [17]:
APs.mean(axis=0), mAP

(array([0.575     , 0.5375    , 0.51822917, 0.50039062, 0.486875  ]),
 0.5235989583333334)

In [18]:
non_general_indices = np.where(np.array(test_labels) != 'general')
APs[non_general_indices].mean(axis=0)

array([0.58943089, 0.55182927, 0.54200542, 0.51930894, 0.50447154])