In [136]:
#Image Histogram Entropy

#Install Macports here
#https://guide.macports.org/chunked/installing.macports.html
#Install pyentropy here
#https://github.com/robince/pyentropy/blob/master/docs/source/examples.rst

In [187]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import scipy
import scipy.misc
import os
import random
from PIL import ImageChops
from PIL import Image
from scipy import stats
import math

In [138]:
#Constants
imageX = 64
imageY = 64

In [139]:
def forceLandscape(image):
    if image.shape[0] > image.shape[1]:
        return np.rot90(image)
    return image

#functions for array
def resizeImage(image, x, y):
    return scipy.misc.imresize(image,(x,y), interp='nearest')

def grayscale(image):
    return np.dot(image[...,:3], [0.21, 0.72, 0.07])

def reshape(image):
    return image.flatten()

#functions for images
def resizeNearest(image, x, y):
    # use nearest neighbour
     return (image.resize((x, y), Image.NEAREST))
    
def resizeLinear(image, x, y):
    # linear interpolation in a 2x2 environment
    return(image.resize((x, y), Image.BILINEAR))

def resizeLinear(image, x, y):
    # cubic spline interpolation in a 4x4 environment
    return(image.resize((x, y), Image.BICUBIC))

def resizeLinear(image, x, y):
    # best down-sizing filter
    return (image.resize((x, y), Image.ANTIALIAS))


In [140]:
def formatImage(image):
    image = forceLandscape(image)
    image = resizeImage(image, imageX, imageY)
    image = grayscale(image)
    return image

In [158]:
#Load Movie Images
datasetPath = '../../../TERC/WinEarthPhotosByKeyword/'
imageClass = 'Movies3of36PhotosEach'
imageDirectory = os.listdir(datasetPath + imageClass)
if imageDirectory[0] == '.DS_Store':
    imageDirectory.pop(0)
data = imageDirectory

In [176]:
#Load Random Images
datasetPath2 = '../../../BU10000SetA/'
imageDirectory2 = [random.choice(os.listdir(datasetPath2)) for x in range(0,108)]
if imageDirectory2[0] == '.DS_Store':
    imageDirectory2.pop(0)
randomData = imageDirectory2

In [160]:
for i in range(0, len(data)):
    imagePath = datasetPath + imageClass + "/" +  data[i]
    #img = mpimg.imread(imagePath)
    #img = formatImage(img)
    #img = reshape(img)        #flatten the img
    img = Image.open(imagePath)
    data[i] = resizeNearest(img, imageX, imageY)
    
print("loading complete")

loading complete


In [177]:
#random data loading
for i in range(0, len(randomData)):
    imagePath = datasetPath2 +  randomData[i]
    #img = mpimg.imread(imagePath)
    #img = formatImage(img)
    #img = reshape(img)        #flatten the img
    img = Image.open(imagePath)
    randomData[i] = resizeNearest(img, imageX, imageY)

print("loading complete")

loading complete


In [178]:
def DoComparison(image1, image2):
    diffs = ImageChops.difference(image1, image2)
    return ImageEntropy(diffs)

def ImageEntropy(image):
    histogram   = image.histogram()
    histlength  = sum(histogram)
    probability = [float(h) / histlength for h in histogram]
    return -sum([p * math.log(p, 2) for p in probability if p != 0])

In [179]:
def getResult(dataset):
    results = []
    for i in range(0,len(dataset)-1):
        results.append(DoComparison(dataset[i],dataset[i+1]))
    return (results)

In [183]:
#Calculate mean entropy within image set
set1 = data[:36]
set2 = data[36:72]
set3 = data[72:]

results = [getResult(set1),getResult(set2),getResult(set3)]
mean = [np.mean(results[0]),np.mean(results[1]),np.mean(results[2])]
std = [np.std(results[0]),np.std(results[1]),np.std(results[2])]

In [184]:
#Calculate mean entropy within random image set
rset1 = randomData[:36]
rset2 = randomData[36:72]
rset3 = randomData[72:]

randomResults = [getResult(rset1),getResult(rset2),getResult(rset3)]
randomMean = [np.mean(randomResults[0]),np.mean(randomResults[1]),np.mean(randomResults[2])]
randomStd = [np.std(randomResults[0]),np.std(randomResults[1]),np.std(randomResults[2])]

In [202]:
# print(results)
print("Mean of Image Histogram Entropy for each Movie set:")
print(mean)
print("")

print("Standard Deviation of Image Histogram Entropy for each Movie set:")
print(std)
print("")

# print(randomResults)
print("Mean of Image Histogram Entropy for some random set:")
print(randomMean)
print("")

print("Standard Deviation of Image Histogram Entropy for some random set:")
print(randomStd)

Mean of Image Histogram Entropy for each Movie set:
[6.03815683856444, 5.8963458297855427, 7.0027725509926499]

Standard Deviation of Image Histogram Entropy for each Movie set:
[0.072218547551981982, 0.081177359126374013, 0.26621763876837945]

Mean of Image Histogram Entropy for some random set:
[8.3345101739981189, 8.4000857948595034, 8.5795720807816362]

Standard Deviation of Image Histogram Entropy for some random set:
[0.52367929699478311, 0.61853269880855044, 0.3723146448851769]


In [199]:
#Compare similarity
ttest = stats.ttest_ind(results[0] + results[1] + results[2], randomResults[0] + randomResults[1] + randomResults[2])
print("T-test Results for 2 samples:")
print(ttest)

T-test Results for 2 samples:
Ttest_indResult(statistic=-29.361304588263625, pvalue=6.4247764833532715e-76)


In [None]:
#Null Hypothesis = There is difference between samples
#alpha = 0.05
#p-value = 6.4247764833532715e-76
#Since p-value <<< alpha, reject the null hypothesis. 
#Therefore, the difference between the population means is statistically significant