In [136]:
from PIL import ImageChops
import math, operator
from PIL import Image
from functools import reduce
import imagehash

def rmsdiff(im1, im2):
    "Calculate the root-mean-square difference between two images"

    h = ImageChops.difference(im1, im2).histogram()

    # calculate rms
    return math.sqrt(reduce(operator.add,
        map(lambda h, i: h*(i**2), h, range(256))
    ) / (float(im1.size[0]) * im1.size[1]))

def rmsdiff2(img1, img2):
    return imagehash.average_hash(img1) - imagehash.average_hash(img2)

def equal(im1, im2):
    return ImageChops.difference(im1, im2).getbbox() is None

In [131]:
import sys

# from scipy.misc import imread
from scipy.linalg import norm
from scipy import sum, average
import imageio


def compare_images(img1, img2):
    # normalize to compensate for exposure difference, this may be unnecessary
    # consider disabling it
    img1 = normalize(img1)
    img2 = normalize(img2)
    # calculate the difference and its norms
    diff = img1 - img2  # elementwise for scipy arrays
    m_norm = sum(abs(diff))  # Manhattan norm
    z_norm = norm(diff.ravel(), 0)  # Zero norm
    return (m_norm, z_norm)

def to_grayscale(arr):
    "If arr is a color image (3D array), convert it to grayscale (2D array)."
    if len(arr.shape) == 3:
        return average(arr, -1)  # average over the last axis (color channels)
    else:
        return arr
    
def normalize(arr):
    rng = arr.max()-arr.min()
    amin = arr.min()
    return (arr-amin)*255/rng

def compare_main(file1, file2):
    img1 = to_grayscale(imageio.imread(file1).astype(float))
    img2 = to_grayscale(imageio.imread(file2).astype(float))
    # compare
    return compare_images(img1, img2)

In [126]:
from os import listdir
from os.path import isfile, join
import timeit

path1 = '/home/dzanardo/github/apartamento/2449863607/'
path2 = '/home/dzanardo/github/apartamento/2454100925/'

files1 = [f for f in listdir(path1) if isfile(join(path1, f))]
files2 = [f for f in listdir(path2) if isfile(join(path2, f))]

In [132]:
file1 = "/home/dzanardo/github/apartamento/2449863607/33.jpg"
file2 = "/home/dzanardo/github/apartamento/2454100925/20.jpg"

im2 = Image.open(file1)
im1 = Image.open(file2)

print(rmsdiff(im2, im1))
print(rmsdiff2(im2, im1))

# read images as 2D arrays (convert to grayscale for simplicity)
n_m, n_0 = compare_main(file2, file1)
print("Manhattan norm:", n_m, "/ per pixel:", n_m/img1.size)
print("Zero norm:", n_0, "/ per pixel:", n_0*1.0/img1.size)

67.85159101139617
12
Manhattan norm: 4173883.0 / per pixel: 48.30883101851852
Zero norm: 85977.0 / per pixel: 0.9951041666666667


In [127]:
start = timeit.default_timer()
similares = 0

for file1 in files1:
    im2 = Image.open(path1 + file1)
    
    diff_p = 0
    #print('--------------------------------- ' + str(file1))
    for file2 in files2:
        im1 = Image.open(path2 + file2)
        diff = rmsdiff(im2, im1)
        
        if diff < 40:
            similares += 1
            #print(diff, file1, file2)
            break
            
print(similares)
stop = timeit.default_timer()
print(stop - start)


27
2.519704706995981


In [137]:
start = timeit.default_timer()
similares = 0

for file1 in files1:
    im2 = Image.open(path1 + file1)
    
    diff_p = 0
    #print('--------------------------------- ' + str(file1))
    for file2 in files2:
        im1 = Image.open(path2 + file2)
        diff = rmsdiff2(im2, im1)
        
        if diff < 5:
            similares += 1
            #print(diff, file1, file2)
            break
            

print(similares)
stop = timeit.default_timer()
print(stop - start)

23
3.514025796990609


In [135]:
start = timeit.default_timer()
similares = 0

for file1 in files1:
    im2 = path1 + file1
    
    diff_p = 0
    #print('--------------------------------- ' + str(file1))
    for file2 in files2:
        im1 = path2 + file2
        n_m, n_0 = compare_main(im1, im2)
        
        if n_m/img1.size < 30:
            similares += 1
            #print(n_m/img1.size, file1, file2)
            break
            

print(similares)
stop = timeit.default_timer()
print(stop - start)

28
8.612002982001286
