In [82]:
#Naive entropy estimator

"""
USE PYTHON 2.7
"""

#Install Macports here
#https://guide.macports.org/chunked/installing.macports.html
#Install pyentropy here
#https://github.com/robince/pyentropy/blob/master/docs/source/examples.rst

'\nUSE PYTHON 2.7\n'

In [100]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import scipy
import scipy.misc
import os
import random
from scipy import stats
from pyentropy import DiscreteSystem

In [48]:
#Constants
imageX = 64
imageY = 64

In [72]:
def forceLandscape(image):
    if image.shape[0] > image.shape[1]:
        return np.rot90(image)
    return image

def resizeImage(image, x,y):
    return scipy.misc.imresize(image,(x,y), interp='nearest')

def grayscale(image):
    return np.dot(image[...,:3], [0.21, 0.72, 0.07])

def reshape(image):
    return image.flatten()

In [73]:
def formatImage(image):
    image = forceLandscape(image)
    image = resizeImage(image, imageX, imageY)
    image = grayscale(image)
    return image

In [77]:
#Load Images
datasetPath = '../../../TERC/WinEarthPhotosByKeyword/'
imageClass = 'Movies3of36PhotosEach'
imageDirectory = os.listdir(datasetPath + imageClass)
if imageDirectory[0] == '.DS_Store':
    imageDirectory.pop(0)
data = imageDirectory


In [78]:
#Load Random Images
datasetPath2 = '../../../BU10000SetA/'
imageDirectory2 = [random.choice(os.listdir(datasetPath2)) for x in range(0,108)]
if imageDirectory2[0] == '.DS_Store':
    imageDirectory2.pop(0)
randomData = imageDirectory2

In [52]:
# data = imageDirectory
# labels = ['Movie'] * len(imageDirectory)
# results = ['NotMovie'] * len(imageDirectory)

In [79]:
for i in range(0, len(data)):
    imagePath = datasetPath + imageClass + "/" +  data[i]
    img = mpimg.imread(imagePath)
    img = formatImage(img)
    img = reshape(img)        #flatten the img
    data[i] = img
    
print("loading complete")

loading complete


In [81]:
#random data loading
for i in range(0, len(randomData)):
    imagePath2 = datasetPath2 +  randomData[i]
    img2 = mpimg.imread(imagePath2)
    img2 = formatImage(img2)
    img2 = reshape(img2)        #flatten the img
    randomData[i] = img2
    
print("loading complete")

loading complete


In [90]:
def discreteSystem(data1, data2):
    s = DiscreteSystem(data1.astype(int),(1,255), data2.astype(int),(1,255))
    s.calculate_entropies(method='plugin', calc=['HX', 'HXY'])
    return s.I()

In [91]:
def getResult(dataset):
    results = []
    for i in range(0,len(dataset)-1):
        results.append(discreteSystem(dataset[i],dataset[i+1]))
    return (results)

In [95]:
%%capture
#Calculate mean entropy within data set
set1 = data[:36]
set2 = data[36:72]
set3 = data[72:]

results = [getResult(set1),getResult(set2),getResult(set3)]
mean = [np.mean(results[0]),np.mean(results[1]),np.mean(results[2])]
std = [np.std(results[0]),np.std(results[1]),np.std(results[2])]

In [96]:
%%capture
#Compare similarity measures with entropy of random images
rset1 = randomData[:36]
rset2 = randomData[36:72]
rset3 = randomData[72:]

randomResults = [getResult(rset1),getResult(rset2),getResult(rset3)]
randomMean = [np.mean(randomResults[0]),np.mean(randomResults[1]),np.mean(randomResults[2])]
randomStd = [np.std(randomResults[0]),np.std(randomResults[1]),np.std(randomResults[2])]

In [103]:
# print(results)
print("Mean Entropy for each Movie set:")
print(mean)
print("")

print("Standard Deviation of Entropy for each Movie set:")
print(std)
print("")

# print(randomResults)
print("Mean Entropy for some random set:")
print(randomMean)
print("")

print("Standard Deviation of Entropy for some random set:")
print(randomStd)

Mean Entropy for each Movie set:
[2.7672834381732807, 3.7547104661363244, 3.6430990870347659]

Standard Deviation of Entropy for each Movie set:
[0.05207266781172263, 0.10874217411375474, 0.070997574576869782]

Mean Entropy for some random set:
[1.6741972371507636, 1.6576196807272399, 1.6877417764444296]

Standard Deviation of Entropy for some random set:
[0.58708479490632404, 0.65823111019904945, 0.68502046056624466]


In [101]:
#Compare similarity
ttest = stats.ttest_ind(results[0] + results[1] + results[2], randomResults[0] + randomResults[1] + randomResults[2])
print("T-test Results for 2 samples:")
print(ttest)

T-test Results for 2 samples:
Ttest_indResult(statistic=22.261717234841257, pvalue=5.9613698327357999e-57)


In [102]:
#Null Hypothesis = There is no difference between samples
#alpha = 0.05
#p-value = 5.9613698327357999e-57
#Since p-value <<< alpha, reject the null hypothesis. 
#Therefore, the difference between the population means is statistically significant