# Importing Libraries

In [1]:
import os
import pickle
import numpy as np
import pandas as pd
import cv2 as cv
import matplotlib.pyplot as plt
import tensorflow as tf
from tqdm import tqdm_notebook
from scipy.spatial.distance import hamming, cosine

%matplotlib inline

# Dataset utils and preprocessing

In [2]:
def image_loader(path, size):
    # String path to image
    # Tuple size of output image
    image = cv.imread(path)
    image = cv.cvtColor(image, cv.COLOR_BAYER_BGR2RGB)
    image = cv.resize(image, size, cv.INTER_CUBIC)

    return image

In [3]:
def dataset_preprocessing(dataset_path, labels_file_path, size, image_paths_pickle):
    # String path to dataset
    # String path to labels file
    # Tuple size of image
    # String name of pickle file where image paths are stored
    with open(labels_file_path, 'r') as f:
        classes = f.read().split('\n')[:-1]
    
    images = []
    labels = []
    image_paths = []
    
    for image_name in os.listdir(dataset_path):
        try:
            image_path = os.path.join(dataset_path, image_name)
            images.append(image_loader(image_path, size))
            image_paths.append(image_path)
            for idx in range(len(classes)):
                if classes[idx] in image_name:
                    labels.append(idx)
        except:
            pass
    
    with open(image_paths_pickle + ".pickle", 'wb') as f:
        pickle.dump(image_paths, f)
    
    assert len(images) == len(labels)
    return np.array(images), np.array(labels)

# Utils functions

## 1. Cosine similarity
![](static\cosine-similarity-draw.png)
![](static\cosine-similarity.png)

In [4]:
def cosine_distance(training_set_vectors, query_vector, top_n=30):
    
    distances = []
    # comparing each image to all training set
    for i in range(len(training_set_vectors)):
        distances.append(cosine(training_set_vectors[i], query_vector[0]))
    # return sorted indices of 30 most similar images
    return np.argsort(distances)[:top_n]

## 2. Hamming distance
![](static\hamming.png)
![](static\hamming-explained.png)

In [5]:
def hamming_distance(training_set_vectors, query_vector, top_n=50):

    distances = []
    # comparing each image to all training set
    for i in range(len(training_set_vectors)):
        distances.append(hamming(training_set_vectors[i], query_vector[0]))
    # return sorted indices of 30 most similar images   
    return np.argsort(distances)[:top_n]

## 3. Sparse accuracy

In [6]:
def sparse_accuracy(true_labels, predicted_labels):

    # np array real labels of each sample
    # np matrix softmax probabilities
    
    assert len(true_labels) == len(predicted_labels)
    
    correct = 0
    for i in range(len(true_labels)):
        if np.argmax(predicted_labels[i]) == true_labels[i]:
            correct += 1
            
    return correct / len(true_labels)