In [1]:
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
import pandas as pd
import pickle # to store data
import tensorflow as tf
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import tqdm_notebook
from scipy.spatial.distance import hamming, cosine
from scipy import misc
import imagehash
from PIL import Image
import tensorflow_hub as hub
from six import BytesIO
import tempfile
import scipy
import random
from matplotlib.pyplot import imread
%matplotlib inline

In [11]:
def image_loader(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED) 
    # cv2.imread loads the image in the BGR format. We need to convert it to RGB 
#     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Now, resize and fill the image if it is smaller with interpolation
#     image = cv2.resize(image, image_size, cv2.INTER_CUBIC)
#     image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    return image

### comparing images

In [12]:
image1_path = "datasets\\set_128\\train\\27\\0b3f356651664968.jpg"
image2_path = "datasets\\set_128\\train\\27\\0fa907fb215b2787.jpg"
image3_path = "datasets\\set_128\\train\\27\\00f928e383e1d121.jpg"
image4_path = "datasets\\set_128\\train\\27\\1dd62cf615dbce34.jpg"

image1 = image_loader(image1_path)
image2 = image_loader(image2_path)
image3 = image_loader(image3_path)
image4 = image_loader(image4_path)

In [13]:
image1.shape

(128, 128, 3)

In [17]:
m = tf.keras.metrics.CosineSimilarity(axis=1)
_ = m.update_state(image1, image2)
m.result().numpy()

0.5956832

In [18]:
cosine_similarity(image1, image3)

ValueError: Found array with dim 3. check_pairwise_arrays expected <= 2.

In [7]:
image1_flattend = image1.flatten()
image2_flattend = image2.flatten()
image3_flattend = image3.flatten()
image4_flattend = image4.flatten()

In [8]:
image1_flattend

array([115, 117, 119, ...,  53,  59,  53], dtype=uint8)

In [16]:
1-cosine(image1_flattend,image4_flattend)

1.1444752748434102

In [10]:
print(image1_flattend.dtype)

uint8


In [17]:
image1_flattend

array([115, 117, 119, ...,  53,  59,  53], dtype=uint8)

In [18]:
image2_flattend

array([222, 222, 221, ..., 125, 119, 121], dtype=uint8)

In [22]:
hash1 = imagehash.average_hash(Image.open(image1_path))
print(hash1)
hash2 = imagehash.average_hash(Image.open(image4_path))
print(hash2)
print(hash1-hash1)
print(hash1-hash2)

ffffffbe0000080c
80808080b999fbf9
0
47


In [20]:
image5_path = "datasets\\group1_set_128\\set_128\\train\\27\\fb8293784e4b96a4.jpg"
hash3 = imagehash.average_hash(Image.open(image5_path))
print(hash3)
print(hash1-hash3)

0000214d7dfffff7
56


In [26]:
# Feature extractor
def extract_features(image_path, vector_size=32):
    image = imread(image_path)
    try:
        # Using KAZE, cause SIFT, ORB and other was moved to additional module
        # which is adding addtional pain during install
        print(image.shape)
        print(image)
        alg = cv2.KAZE_create()
        # Dinding image keypoints
        kps = alg.detect(image)
        print(kps)
        # Getting first 32 of them. 
        # Number of keypoints is varies depend on image size and color pallet
        # Sorting them based on keypoint response value(bigger is better)
        kps = sorted(kps, key=lambda x: -x.response)[:vector_size]
        # computing descriptors vector
        kps, dsc = alg.compute(image, kps)
        print(kps)
        print(dsc)
        # Flatten all of them in one big vector - our feature vector
        dsc = dsc.flatten()
        # Making descriptor of same size
        # Descriptor vector size is 64
        needed_size = (vector_size * 64)
        if dsc.size < needed_size:
            # if we have less the 32 descriptors then just adding zeros at the
            # end of our feature vector
            dsc = np.concatenate([dsc, np.zeros(needed_size - dsc.size)])
    except cv2.error as e:
        print('Error: ', e)
        return None

    return dsc


def batch_extractor(images_path, pickled_db_path="features.pck"):
    files = [os.path.join(images_path, p) for p in sorted(os.listdir(images_path))]

    result = {}
    for f in files:
        print('Extracting features from image %s' % f)
        name = f.split('/')[-1].lower()
        result[name] = extract_features(f)
    
    # saving all our feature vectors in pickled file
    with open(pickled_db_path, 'w') as fp:
        pickle.dump(result, fp)

In [27]:
class Matcher(object):

    def __init__(self, pickled_db_path="features.pck"):
        with open(pickled_db_path) as fp:
            self.data = pickle.load(fp)
        self.names = []
        self.matrix = []
        for k, v in self.data.iteritems():
            self.names.append(k)
            self.matrix.append(v)
        self.matrix = np.array(self.matrix)
        self.names = np.array(self.names)

    def cos_cdist(self, vector):
        # getting cosine distance between search image and images database
        v = vector.reshape(1, -1)
        return scipy.spatial.distance.cdist(self.matrix, v, 'cosine').reshape(-1)

    def match(self, image_path, topn=5):
        features = extract_features(image_path)
        img_distances = self.cos_cdist(features)
        # getting top 5 records
        nearest_ids = np.argsort(img_distances)[:topn].tolist()
        nearest_img_paths = self.names[nearest_ids].tolist()

        return nearest_img_paths, img_distances[nearest_ids].tolist()

In [28]:
def show_img(path):
    img = imread(path)
    plt.imshow(img)
    plt.show()
    
def run():
    images_path = 'datasets\\group1_set_128\\set_128\\train\\27\\'
    files = [os.path.join(images_path, p) for p in sorted(os.listdir(images_path))]
    # getting 3 random images 
    sample = random.sample(files, 3)
    
    batch_extractor(images_path)

    ma = Matcher('features.pck')
    
    for s in sample:
        print('Query image ==========================================')
        show_img(s)
        names, match = ma.match(s, topn=3)
        print('Result images ========================================')
        for i in range(3):
            # we got cosine distance, less cosine distance between vectors
            # more they similar, thus we subtruct it from 1 to get match value
            print('Match %s' % (1-match[i]))
            show_img(os.path.join(images_path, names[i]))

run()

Extracting features from image datasets\group1_set_128\set_128\train\27\00cba0067c078490.jpg
(128, 128, 3)
[[[130 100  76]
  [132 102  78]
  [133 103  79]
  ...
  [145 115  91]
  [146 116  92]
  [145 115  91]]

 [[133 103  79]
  [134 104  80]
  [134 104  80]
  ...
  [146 116  92]
  [146 116  92]
  [145 115  91]]

 [[136 106  82]
  [135 105  81]
  [135 105  81]
  ...
  [148 116  93]
  [147 115  92]
  [146 114  91]]

 ...

 [[130 100  76]
  [131 101  77]
  [134 104  80]
  ...
  [162 124 101]
  [161 123 100]
  [159 121  98]]

 [[129  99  75]
  [131 101  77]
  [133 103  79]
  ...
  [161 123 102]
  [162 124 103]
  [160 122 101]]

 [[129  99  75]
  [130 100  76]
  [131 101  77]
  ...
  [160 122 101]
  [161 123 102]
  [159 121 100]]]
[]
[]
None


AttributeError: 'NoneType' object has no attribute 'flatten'