In [1]:
# Mount Google Drive
from google.colab import drive # import drive from google colab

ROOT = "/content/drive"     # default location for the drive
print(ROOT)                 # print content of ROOT (Optional)

drive.mount(ROOT, force_remount=True)           # we mount the google drive at /content/drive

/content/drive
Mounted at /content/drive


In [2]:
%cd '/content/drive/My Drive/Workspaces/image_similarity'

/content/drive/My Drive/Workspaces/image_similarity


In [3]:
import hashlib
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
import time
import numpy as np

In [4]:
# Hashing
# Take image as an input. And the output would be a compressed fingerprint(key) representing that image
# hashed output - unique output pertaining to the input image
from hashlib import md5

def get_hashed_file(filepath):
  with open(filepath, 'rb') as f:
    return md5(f.read()).hexdigest()


In [5]:
hash1 = get_hashed_file('data/images/lion1.jpg')
hash2 = get_hashed_file('data/images/lion2.jpg')
hash3 = get_hashed_file('data/images/lion3.jpg')

In [6]:
hash1, hash2, hash3

#hash values are same for duplicate images

('320c75116d06c62fa8dd8846cd94960f',
 '5d10e8afb40216c12119af063e0bc490',
 '5d10e8afb40216c12119af063e0bc490')

## Similar images

In [23]:
import cv2
from google.colab.patches import cv2_imshow
lion1 = cv2.imread('data/images/lion1.jpg',0)
lion2 = cv2.imread('data/images/lion2.jpg',0)
sign1 = cv2.imread('data/images/sign1.jpg',0)
sign2 = cv2.imread('data/images/sign2.jpg',0)
sign3 = cv2.imread('data/images/sign3.jpeg',0)
# cv2_imshow(lion2)

In [19]:
def resize(image, height, width):
  resized_row = cv2.resize(image, (height, width)).flatten()
  resized_col = cv2.resize(image, (height, width)).flatten('F')
  return resized_row, resized_col

def intensity_diff(resized_row, resized_col):
  row_diff = np.diff(resized_row)
  col_diff = np.diff(resized_col)
  row_diff = row_diff > 0
  col_diff = col_diff > 0

  return np.vstack((row_diff, col_diff)).flatten()

def difference_score(image, height =30, width = 30):
  resized_row, resized_col = resize(image, height, width)
  difference = intensity_diff(resized_row, resized_col)

  return difference

def hamming_distance(image1, image2):
  return distance.hamming(image1, image2)

In [24]:
from scipy.spatial import distance
hd = hamming_distance(difference_score(sign1), difference_score(sign3))
hd

0.26807563959955505

In [15]:
hash1 = md5(difference_score(lion1)).hexdigest()
hash2 = md5(difference_score(lion2)).hexdigest()

hash1, hash2

('ee4541746b9cdca64b4f5cc65ef45904', 'a53ce160ccbf43c5c7ac2f6be205ad33')