In [1]:
# Ref : http://blog.zeevgilovitz.com/detecting-dominant-colours-in-python/

from PIL import Image
import random
import numpy
import pandas as pd

In [2]:
df = pd.read_csv('../data/data_updated.csv')
df = df.drop('Unnamed: 0', 1)

In [3]:
df.head()

Unnamed: 0,target_market,score,cover_image,sugar_result,fiber_result,sodium_result,sugar_per_serving,fiber_per_serving,sodium_per_serving,calories_per_serving,...,has_cha,down_gaze,b_mean,g_mean,r_mean,b_sd,g_sd,r_sd,most_fre_color,most_fre_color_count
0,Family,50,http://cerealfacts.org/media/cereal_images/Ann...,24%,3%,293,7,1,85,120,...,1,0,35.58,71.37,208.29,54.28,86.32,62.01,#e70000,256
1,Family,52,http://cerealfacts.org/media/cereal_images/Ann...,24%,3%,310,7,1,90,110,...,1,0,53.31,107.21,195.62,46.53,48.07,54.99,#d8501e,256
2,Family,50,http://cerealfacts.org/media/cereal_images/Ann...,24%,3%,293,7,1,85,120,...,1,0,65.4,123.12,191.4,52.63,60.91,40.15,#bd582a,256
3,Family,64,http://cerealfacts.org/media/cereal_images/Ann...,7%,3%,379,2,1,110,120,...,1,0,95.78,78.63,115.69,47.58,71.51,58.18,#581680,1272
4,Family,58,http://cerealfacts.org/media/cereal_images/Bar...,23%,10%,267,7,3,80,120,...,1,0,108.41,153.04,186.66,48.37,51.09,53.44,#ffffff,681


In [4]:
def gen_file_name(row):
    file_name = row.cover_image.split('/')[len(row.cover_image.split('/'))-1]
    file_name = 'img_' + str(row['index']).zfill (3) + '_' + (file_name).lower()
    return(file_name)

In [5]:
df['file_name'] = df.apply(gen_file_name, axis=1)

In [6]:
df.head()

Unnamed: 0,target_market,score,cover_image,sugar_result,fiber_result,sodium_result,sugar_per_serving,fiber_per_serving,sodium_per_serving,calories_per_serving,...,down_gaze,b_mean,g_mean,r_mean,b_sd,g_sd,r_sd,most_fre_color,most_fre_color_count,file_name
0,Family,50,http://cerealfacts.org/media/cereal_images/Ann...,24%,3%,293,7,1,85,120,...,0,35.58,71.37,208.29,54.28,86.32,62.01,#e70000,256,img_001_anniesfruitybunnies.jpg
1,Family,52,http://cerealfacts.org/media/cereal_images/Ann...,24%,3%,310,7,1,90,110,...,0,53.31,107.21,195.62,46.53,48.07,54.99,#d8501e,256,img_002_annieshoneybunnies.jpg
2,Family,50,http://cerealfacts.org/media/cereal_images/Ann...,24%,3%,293,7,1,85,120,...,0,65.4,123.12,191.4,52.63,60.91,40.15,#bd582a,256,img_003_anniescinnamonrollbunnyos.jpg
3,Family,64,http://cerealfacts.org/media/cereal_images/Ann...,7%,3%,379,2,1,110,120,...,0,95.78,78.63,115.69,47.58,71.51,58.18,#581680,1272,img_004_anniesorganicbunnyos.jpg
4,Family,58,http://cerealfacts.org/media/cereal_images/Bar...,23%,10%,267,7,3,80,120,...,0,108.41,153.04,186.66,48.37,51.09,53.44,#ffffff,681,img_005_barbarasbakerypuffinspuffscrunchycocoa...


In [7]:
# K-mean Clustering

# Class : Cluster
class Cluster(object):

    def __init__(self):
        self.pixels = []
        self.lastpixels = []
        self.centroid = None

    def addPoint(self, pixel):
        self.pixels.append(pixel)
        
    def pixelCount(self):
        return len(self.lastpixels)

    def setNewCentroid(self):

        R = [colour[0] for colour in self.pixels]
        G = [colour[1] for colour in self.pixels]
        B = [colour[2] for colour in self.pixels]

        R = sum(R) / len(R)
        G = sum(G) / len(G)
        B = sum(B) / len(B)
        
        self.centroid = (R, G, B)
        self.lastpixels = self.pixels
        self.pixels = []

        return self.centroid

In [8]:
# Class : Kmeans
class Kmeans(object):

    def __init__(self, k=1, max_iterations=5, min_distance=2.0, size=300):
        self.k = k
        self.max_iterations = max_iterations
        self.min_distance = min_distance
        self.size = (size, size + 1)

    def run(self, image):
        self.image = image
        self.image.thumbnail(self.size)
        self.pixels = numpy.array(image.getdata(), dtype=numpy.uint8)

        self.clusters = [None for i in range(self.k)]
        self.oldClusters = None

        randomPixels = random.sample(self.pixels, self.k)

        for idx in range(self.k):
            self.clusters[idx] = Cluster()
            self.clusters[idx].centroid = randomPixels[idx]

        iterations = 0

        while self.shouldExit(iterations) is False:

            self.oldClusters = [cluster.centroid for cluster in self.clusters]

            # print iterations

            for pixel in self.pixels:
                self.assignClusters(pixel)

            for cluster in self.clusters:
                cluster.setNewCentroid()

            iterations += 1
        
        # pixelCounts = [cluster.pixelCount() for cluster in self.clusters]
        clusters_centroid = [cluster.centroid for cluster in self.clusters]
        
        return clusters_centroid

    def assignClusters(self, pixel):
        shortest = float('Inf')
        for cluster in self.clusters:
            distance = self.calcDistance(cluster.centroid, pixel)
            if distance < shortest:
                shortest = distance
                nearest = cluster

        nearest.addPoint(pixel)

    def calcDistance(self, a, b):

        result = numpy.sqrt(sum((a - b) ** 2))
        return result

    def shouldExit(self, iterations):

        if self.oldClusters is None:
            return False

        for idx in range(self.k):
            dist = self.calcDistance(
                numpy.array(self.clusters[idx].centroid),
                numpy.array(self.oldClusters[idx])
            )
            if dist < self.min_distance:
                return True

        if iterations <= self.max_iterations:
            return False

        return True

    # The remaining methods are used for debugging
    
    def showImage(self):
        self.image.show()

    def showCentroidColours(self):

        for cluster in self.clusters:
            image = Image.new("RGB", (200, 200), cluster.centroid)
            image.show()

    def showClustering(self):

        localPixels = [None] * len(self.image.getdata())

        for idx, pixel in enumerate(self.pixels):
                shortest = float('Inf')
                for cluster in self.clusters:
                    distance = self.calcDistance(
                        cluster.centroid,
                        pixel
                    )
                    if distance < shortest:
                        shortest = distance
                        nearest = cluster

                localPixels[idx] = nearest.centroid

        w, h = self.image.size
        localPixels = numpy.asarray(localPixels)\
            .astype('uint8')\
            .reshape((h, w, 3))

        colourMap = Image.fromarray(localPixels)
        colourMap.show()
        
    def printCentroidColoursHexCode(self):
        hexcodes = []
        for cluster in self.clusters:
            hexcode = '#'+''.join(map(chr, cluster.centroid)).encode('hex')
            hexcodes.append(hexcode)
            # print cluster.centroid, hexcode
          
        print hexcodes
        
        return hexcodes
    
    def printCentroidColoursRGB(self):
        RGBs = []
        for cluster in self.clusters:
            RGBs.append(cluster.centroid)
        
        print RGBs
        
        return RGBs

In [9]:
img_name = df['file_name']

In [139]:
hexcodes_dominant_color = []

for i in range(260):
    img = Image.open('../img/' + img_name[i])
    
    k = Kmeans()
    result = k.run(img)
    hexcode = k.printCentroidColoursHexCode()
    
    hexcodes_dominant_color.append(hexcode)

['#e49883']
['#c46b34']
['#c38751']
['#744e5f']
['#bb9a6e']
['#c2a281']
['#b9936b']
['#b08f73']
['#b28f6d']
['#b9987a']
['#a28873']
['#ac9378']
['#b18e6f']
['#b59676']
['#b2957c']
['#7f796a']
['#aa846c']
['#b09278']
['#cbb4a1']
['#d3c1ab']
['#cebba8']
['#bd9871']
['#d9bfa5']
['#d3bea6']
['#cdb59f']
['#decfb9']
['#dfcec4']
['#d2c8ae']
['#c6bba2']
['#c2bba9']
['#cbc2b0']
['#d7d2ca']
['#c4bca3']
['#d3d0c7']
['#d5c8b1']
['#b2a28f']
['#c3c3ab']
['#cbbea8']
['#cfb0a3']
['#e0d6c3']
['#d0cab8']
['#a0b7cd']
['#719e68']
['#c8c08e']
['#c28247']
['#929fb5']
['#d56250']
['#c9c0bf']
['#677d9a']
['#dbc594']
['#cfc6c9']
['#c1ac9d']
['#ad9167']
['#806757']
['#896154']
['#d0bab7']
['#b7b291']
['#c59987']
['#d9c2bc']
['#a5aab7']
['#e0d0c2']
['#c0a18f']
['#b65c64']
['#866443']
['#957477']
['#b5bbc6']
['#b7a99e']
['#97602f']
['#c6c3cb']
['#cab1af']
['#b9b6ab']
['#9c866e']
['#c1a384']
['#bfbcb9']
['#ccb689']
['#c5ac86']
['#bea199']
['#d1c3bf']
['#9b4d7f']
['#c7b680']
['#b3a979']
['#96587a']
['#c3aba9']
['#d

ValueError: operands could not be broadcast together with shapes (3,) (4,) 

In [137]:
print hexcodes_dominant_color

[['#e49883'], ['#c46b34'], ['#c38751'], ['#744e5f'], ['#bb9a6e']]
