# Reference:
https://pyimagesearch.com/2014/01/27/hobbits-and-histograms-a-how-to-guide-to-building-your-first-image-search-engine-in-python/

In [1]:
import cv2
import numpy as np
import pickle
import os
import imutils

from os import listdir
from scipy import stats


In [2]:
class RBGHistogram:
    def __init__(self, bins: list):
        self.bins = bins
    
    def describe(self, image):
        '''
            compute 3D histogram in RGB color space
            then normalize the histogram, so that the image with same content (convert to % rather than number of pixels)
            but either scale larger or smaller will have (roughly) the same histogram
        '''
        hist = cv2.calcHist([image], [0,1,2], None, self.bins, [0,256,0,256,0,256])
        # hist = cv2.calcHist([cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)], [0], None, self.bins, [0,256])
        # normalize
        hist /= hist.sum()
        # hist = cv2.normalize(hist, hist)
        
        return hist.flatten()
    
    def image_resize(self, image, width = None, height = None, inter = cv2.INTER_AREA):
        # initialize the dimensions of the image to be resized and
        # grab the image size
        dim = None
        (h, w) = image.shape[:2]

        # if both the width and height are None, then return the
        # original image
        if width is None and height is None:
            return image

        # check to see if the width is None
        if width is None:
            # calculate the ratio of the height and construct the
            # dimensions
            r = height / float(h)
            dim = (int(w * r), height)

        # otherwise, the height is None
        else:
            # calculate the ratio of the width and construct the
            # dimensions
            r = width / float(w)
            dim = (width, int(h * r))

        # resize the image
        resized = cv2.resize(image, dim, interpolation = inter)

        # return the resized image
        return resized


In [3]:
index = {}
desc = RBGHistogram([8,8,8])
# desc = RBGHistogram([256])

# each file name are unique
for path in listdir('../resources/search-dataset/'):
    # split file name and extension e.g. xxx.jpg
    name = path.split('.')[0]
    image = cv2.imread(f"../resources/search-dataset/{path}")
    features = desc.describe(image)
    index[path] = features

# save features
f = open('../model/image_search_model', 'wb')
f.write(pickle.dumps(index))
f.close()

print('[INFO] done... indexed {} images'.format(len(index)))

[INFO] done... indexed 20 images


In [4]:
class Searcher:
    def __init__(self, index: dict):
        self.index = index
    
    def search(self, query_features):
        results = {}
        
        for k, features in self.index.items():
            # distance = self.chi2_distance(features, query_features)
            distance = self.euclidean_distance(features, query_features)
            results[k] = distance
        
        results = sorted([(v,k) for (k,v) in results.items()])
        return results
    
    def chi2_distance(self, hist_a, hist_b, eps=1e-10):
        distance = 0.5 * np.sum([
            ((a-b) ** 2) / (a+b+eps) for (a,b) in zip(hist_a, hist_b)
        ])
        
        return distance

    def euclidean_distance(self, point_a, point_b):
        # reference: https://www.geeksforgeeks.org/calculate-the-euclidean-distance-using-numpy/
        sum_sq = np.sum(np.square(point_a - point_b))
        return np.sqrt(sum_sq)

In [7]:
index = pickle.loads(open('../model/image_search_model', 'rb').read())
searcher = Searcher(index)

query_img = cv2.imread('../resources/search-dataset/inori minase1.jpg')
# desc = RBGHistogram([8,8,8])
query_features = desc.describe(query_img)

results = searcher.search(query_features)


# loop over the top ten results
for j in range(0, 10):
    # grab the result (we are using row-major order) and
    # load the result image
    (score, img) = results[j]
    path = os.path.join('../resources/search-dataset', img)
    result = cv2.imread(path)
    print("\t{}. {} : {:.3f}".format(j + 1, img, score))

    # cv2.imshow('result {}'.format(j), result)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()

	1. inori minase1.jpg : 0.000
	2. matusoka yositsugu3.jpg : 0.214
	3. sakura ayane3.jpg : 0.241
	4. kayano ai2.jpg : 0.255
	5. inori minase5.jpg : 0.331
	6. matusoka yositsugu2.jpg : 0.338
	7. kayano ai1.jpg : 0.356
	8. inori minase2.jpg : 0.366
	9. matusoka yositsugu1.jpg : 0.384
	10. matusoka yositsugu4.jpg : 0.421
