In [None]:
%matplotlib inline

import matplotlib.pyplot as plt

import cv2
import numpy as np

from tqdm import trange
import random

# Image Display

In [None]:
def show_image_and_keypoints( image , kps ) :
    cv2.drawKeypoints( image, kps, image, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS )

    plt.figure(figsize = (10,10))
    plt.imshow(image, aspect='auto')
    plt.axis('off')
    plt.title('Keypoints and descriptors.')
    plt.show()

In [None]:
def show_top_images ( dataset_path, indices , id_test , ids , labels ) :
    
    label = (ids[id_test] - 1) // 80
    name = dataset_path + '/jpg/' + str(label) + '/image_' + str(ids[id_test]).zfill(4) + '.jpg'
    
    image = cv2.imread( name )
    image = cv2.cvtColor( image , cv2.COLOR_BGR2RGB )
    
    top = 0
    show_image_label(top, image, labels[id_test], ids[id_test] )
    
    accuracy = 0
    
    for i in indices[0] :
        label_i = labels[i]
        name = dataset_path + '/jpg/' + str(label_i) + '/image_' + str(ids[i]).zfill(4) + '.jpg'

        image = cv2.imread( name )
        image = cv2.cvtColor( image , cv2.COLOR_BGR2RGB )

        show_image_label(top, image, label_i, ids[i] )   
        top = top + 1
        
    
def show_image_label ( top, image, label , image_id ) :
    
    plt.figure(figsize = (5,5))
    plt.imshow(image, aspect='auto')
    plt.axis('off')
    plt.title(f'{top} - Image id {image_id} with label {label}.')
    plt.show()

# Generate descriptors

In [None]:
def detect_and_describe_keypoints ( image, algorithm='orb' ) :
    
    image_gray = cv2.cvtColor( image , cv2.COLOR_BGR2GRAY )
        
    if algorithm == 'sift' :
        keypoint = sift = cv2.xfeatures2d.SIFT_create()
    
    elif algorithm == 'orb' :
        keypoint = cv2.ORB_create()
   
    else :
        print('Error: algorithm not defined')
        return None
    
    kps = keypoint.detect( image_gray, None )    

    # Describing Keypoints
    kps, descs = keypoint.compute( image_gray, kps )
    
    return kps, descs

In [None]:
def create_bovw_descriptors (image, dictionary, algorithm='orb') :
    
    descs = detect_and_describe_keypoints( image, algorithm=algorithm )[1]

    predicted = dictionary.predict(np.array(descs, dtype=np.double))
    
    desc_bovw = np.histogram(predicted, bins=range(0, dictionary.n_clusters+1))[0]
    
    return desc_bovw

# Dictionary

In [None]:
from sklearn.cluster import MiniBatchKMeans
# from sklearn.cluster import KMeans

def create_dictionary_kmeans ( vocabulary , num_cluster ) :
  
    print( ' -> [I] Dictionary Info:\n', 
        '\nTrain len: ', len(vocabulary),
        '\nDimension: ', len(vocabulary[0]),
        '\nClusters: ', num_cluster 
        )

#     dictionary = KMeans( n_clusters=num_cluster )
    dictionary = MiniBatchKMeans( n_clusters=num_cluster, batch_size=1000 )

    print ( 'Learning dictionary by Kmeans...')
    dictionary = dictionary.fit( vocabulary )
    print ( 'Done.')

    return dictionary

# Data

In [None]:
import scipy.io
import tqdm

def create_vocabulary ( dataset_path , algorithm='orb', show_image=False , debug=False ) :

    mat = scipy.io.loadmat( dataset_path+'/datasplits.mat' )

    ids = mat['trn1'][0] #  'val1' or 'tst1' 
    
    if algorithm == 'orb' :
        train_descs = np.ndarray( shape=(0,32) , dtype=float )
    elif algorithm == 'sift' :
        train_descs = np.ndarray( shape=(0,128) , dtype=float )
    else :
        print('Error:Algorithm not defined.')
        return None

    for id in tqdm.tqdm(ids, desc='Processing train set') :

        label = (id - 1) // 80
        name = dataset_path + '/jpg/' + str(label) + '/image_' + str(id).zfill(4) + '.jpg'

        image = cv2.imread( name )
        
        if image is None:
            print(f'Reading image Error. Path: {name}')
            return None

        kps, descs = detect_and_describe_keypoints ( image, algorithm=algorithm )
            
        train_descs = np.concatenate((train_descs, descs), axis=0)
        
        if show_image :
            show_image_and_keypoints(image, kps)

        if debug :
            print( name )
            print( 'Number of keypoints: ', len(kps) )
            print( 'Number of images: ', len(ids) )
            print( 'Descriptor size: ', len(descs[0]) )
            print( type(descs[0]) )
      
    print( ' -> [I] Image Loader Info:\n', 
      '\nTrain len: ', len(train_descs),
      '\nNumber of images: ', len(ids),
      '\nDescriptor size: ', len(descs[0])      
      )
    
    return train_descs

In [None]:
def represent_dataset( dataset_path, dictionary , algorithm='orb' ) :

    mat = scipy.io.loadmat( dataset_path+'/datasplits.mat' )

    ids = mat['tst1'][0] #  'trn1' or 'val1' 
    
    space = []
    labels = []
    
    for id in tqdm.tqdm(ids, desc='Processing train set') :

        label = (id - 1) // 80
        name = dataset_path + '/jpg/' + str(label) + '/image_' + str(id).zfill(4) + '.jpg'

        image = cv2.imread( name )

        desc_bovw = create_bovw_descriptors(image, dictionary, algorithm=algorithm)

        space.append(desc_bovw)
        labels.append(label)
        
    print( ' -> [I] Space Describing Info:\n', 
        '\nNumber of images: ', len(space), 
        '\nNumber of labels: ', len(labels),
        '\nDimension: ', len(space[0])
        )

    return space , labels 

# Experimental evaluation

In [None]:
from sklearn.neighbors import NearestNeighbors

def run_test ( space , labels , dictionary , dataset_path, algorithm='orb', top=10 ) :
    knn = NearestNeighbors(n_neighbors=top+1).fit(space)
    
    mat = scipy.io.loadmat( dataset_path+'/datasplits.mat' )

    ids = mat['tst1'][0] #  'trn1' or 'val1'
    
    accuracy_t = 0
    
    for id_test in tqdm.tqdm(ids, desc='running the test phase') :
        
        label = (id_test - 1) // 80
        name = dataset_path + '/jpg/' + str(label) + '/image_' + str(id_test).zfill(4) + '.jpg'

        image = cv2.imread( name )
        
        desc_bovw = create_bovw_descriptors(image, dictionary, algorithm=algorithm)

        indices = knn.kneighbors(desc_bovw.reshape(1, -1))[1]

        labels_top = [ labels[i] for i in indices[0] ]

        accuracy = sum( np.equal(labels_top, label) )
        accuracy =( (accuracy-1)/(top) ) * 100 
        accuracy_t = accuracy_t + accuracy
        
    print(f'Average accuracy in the test set: {accuracy_t/len(ids):5.2f}%')
    

In [None]:
def retrieve_single_image ( space , labels , dictionary , dataset_path, algorithm='orb', top=10 ) :
    knn = NearestNeighbors(n_neighbors=top+1).fit(space)
    
    mat = scipy.io.loadmat( dataset_path+'/datasplits.mat' )

    ids = mat['tst1'][0] #  'trn1' or 'val1'
    
    id_test = random.randrange( len(ids) )
        
    label = (ids[id_test] - 1) // 80
    name = dataset_path + '/jpg/' + str(label) + '/image_' + str(ids[id_test]).zfill(4) + '.jpg'
    
    image = cv2.imread( name )

    desc_bovw = create_bovw_descriptors(image, dictionary, algorithm=algorithm)
    
    distances, indices = knn.kneighbors(desc_bovw.reshape(1, -1))
    
    show_top_images(dataset_path, indices, id_test, ids, labels)
    
    labels_top = [ labels[i] for i in indices[0] ]
    
    accuracy = sum( np.equal( label , labels_top ) )
    accuracy =( (accuracy-1)/(top) ) * 100 
    
    print(f'Accuracy for image id {ids[id_test]}: {accuracy:5.2f}%')
    
    print(name)    
    print(f'Image: {ids[id_test]} with label {labels[id_test]}')    
    print(f'Closest image: {ids[indices[0][0]]} with distance {distances[0][0]} and label {labels[indices[0][0]]}')
    print('Distances: ',distances)
    print('Indices: ',indices)
    print('Labels: ',labels_top)

# Execution

In [None]:
dataset_path = '/home/michelms/virtualEnvPython/datasets/flowers_classes/'
algorithm = 'sift'
vocabulary = create_vocabulary( dataset_path, algorithm=algorithm ) 

In [None]:
num_clusters = 100
dictionary = create_dictionary_kmeans( vocabulary , num_clusters )
space, labels = represent_dataset ( dataset_path , dictionary, algorithm=algorithm )

run_test( space, labels, dictionary, dataset_path, algorithm=algorithm )

In [None]:
retrieve_single_image( space, labels, dictionary, dataset_path , algorithm=algorithm)