In [1]:
# Ref : http://blog.zeevgilovitz.com/detecting-dominant-colours-in-python/

from PIL import Image
import random
import numpy as np
import pandas as pd
import itertools
import webcolors

In [2]:
df = pd.read_csv('../data/data_updated.csv')
df = df.drop('Unnamed: 0', 1)

In [3]:
df.head()

Unnamed: 0,target_market,score,cover_image,sugar_result,fiber_result,sodium_result,sugar_per_serving,fiber_per_serving,sodium_per_serving,calories_per_serving,...,b_mean,g_mean,r_mean,b_sd,g_sd,r_sd,most_fre_color,most_fre_color_count,file_name,most_domin_color
0,Family,50,http://cerealfacts.org/media/cereal_images/Ann...,24%,3%,293,7,1,85,120,...,35.58,71.37,208.29,54.28,86.32,62.01,#e70000,256,img_001_anniesfruitybunnies.jpg,white
1,Family,52,http://cerealfacts.org/media/cereal_images/Ann...,24%,3%,310,7,1,90,110,...,53.31,107.21,195.62,46.53,48.07,54.99,#d8501e,256,img_002_annieshoneybunnies.jpg,brown
2,Family,50,http://cerealfacts.org/media/cereal_images/Ann...,24%,3%,293,7,1,85,120,...,65.4,123.12,191.4,52.63,60.91,40.15,#bd582a,256,img_003_anniescinnamonrollbunnyos.jpg,brown
3,Family,64,http://cerealfacts.org/media/cereal_images/Ann...,7%,3%,379,2,1,110,120,...,95.78,78.63,115.69,47.58,71.51,58.18,#581680,1272,img_004_anniesorganicbunnyos.jpg,brown
4,Family,58,http://cerealfacts.org/media/cereal_images/Bar...,23%,10%,267,7,3,80,120,...,108.41,153.04,186.66,48.37,51.09,53.44,#ffffff,681,img_005_barbarasbakerypuffinspuffscrunchycocoa...,grey


In [4]:
def gen_file_name(row):
    file_name = row.cover_image.split('/')[len(row.cover_image.split('/'))-1]
    file_name = 'img_' + str(row['index']).zfill (3) + '_' + (file_name).lower()
    return(file_name)

In [5]:
df['file_name'] = df.apply(gen_file_name, axis=1)

In [6]:
df.head()

Unnamed: 0,target_market,score,cover_image,sugar_result,fiber_result,sodium_result,sugar_per_serving,fiber_per_serving,sodium_per_serving,calories_per_serving,...,b_mean,g_mean,r_mean,b_sd,g_sd,r_sd,most_fre_color,most_fre_color_count,file_name,most_domin_color
0,Family,50,http://cerealfacts.org/media/cereal_images/Ann...,24%,3%,293,7,1,85,120,...,35.58,71.37,208.29,54.28,86.32,62.01,#e70000,256,img_001_anniesfruitybunnies.jpg,white
1,Family,52,http://cerealfacts.org/media/cereal_images/Ann...,24%,3%,310,7,1,90,110,...,53.31,107.21,195.62,46.53,48.07,54.99,#d8501e,256,img_002_annieshoneybunnies.jpg,brown
2,Family,50,http://cerealfacts.org/media/cereal_images/Ann...,24%,3%,293,7,1,85,120,...,65.4,123.12,191.4,52.63,60.91,40.15,#bd582a,256,img_003_anniescinnamonrollbunnyos.jpg,brown
3,Family,64,http://cerealfacts.org/media/cereal_images/Ann...,7%,3%,379,2,1,110,120,...,95.78,78.63,115.69,47.58,71.51,58.18,#581680,1272,img_004_anniesorganicbunnyos.jpg,brown
4,Family,58,http://cerealfacts.org/media/cereal_images/Bar...,23%,10%,267,7,3,80,120,...,108.41,153.04,186.66,48.37,51.09,53.44,#ffffff,681,img_005_barbarasbakerypuffinspuffscrunchycocoa...,grey


In [7]:
# K-mean Clustering

# Class : Cluster
class Cluster(object):

    def __init__(self):
        self.pixels = []
        self.lastpixels = []
        self.centroid = None

    def addPoint(self, pixel):
        self.pixels.append(pixel)
        
    def pixelCount(self):
        return len(self.lastpixels)

    def setNewCentroid(self):

        R = [colour[0] for colour in self.pixels]
        G = [colour[1] for colour in self.pixels]
        B = [colour[2] for colour in self.pixels]

        R = sum(R) / (len(R) + 1)
        G = sum(G) / (len(G) + 1)
        B = sum(B) / (len(B) + 1)
        
        self.centroid = (R, G, B)
        self.lastpixels = self.pixels
        self.pixels = []

        return self.centroid

In [8]:
# Class : Kmeans
class Kmeans(object):

    def __init__(self, k=3, max_iterations=5, min_distance=2.0, size=300):
        self.k = k
        self.max_iterations = max_iterations
        self.min_distance = min_distance
        self.size = (size, size + 1)

    def run(self, image):
        self.image = image
        self.image.thumbnail(self.size)
        self.pixels = np.array(image.getdata(), dtype=np.uint8)

        self.clusters = [None for i in range(self.k)]
        self.oldClusters = None

        randomPixels = random.sample(self.pixels, self.k)

        for idx in range(self.k):
            self.clusters[idx] = Cluster()
            self.clusters[idx].centroid = randomPixels[idx]

        iterations = 0

        while self.shouldExit(iterations) is False:

            self.oldClusters = [cluster.centroid for cluster in self.clusters]

            # print iterations

            for pixel in self.pixels:
                self.assignClusters(pixel)

            for cluster in self.clusters:
                cluster.setNewCentroid()

            iterations += 1
        
        pixelCounts = [cluster.pixelCount() for cluster in self.clusters]
        
        clusters_centroid = [cluster.centroid for cluster in self.clusters]
        
        return pixelCounts

    def assignClusters(self, pixel):
        shortest = float('Inf')
        for cluster in self.clusters:
            distance = self.calcDistance(cluster.centroid, pixel)
            if distance < shortest:
                shortest = distance
                nearest = cluster

        nearest.addPoint(pixel)

    def calcDistance(self, a, b):

        result = np.sqrt(sum((a - b) ** 2))
        return result

    def shouldExit(self, iterations):

        if self.oldClusters is None:
            return False

        for idx in range(self.k):
            dist = self.calcDistance(
                np.array(self.clusters[idx].centroid),
                np.array(self.oldClusters[idx])
            )
            if dist < self.min_distance:
                return True

        if iterations <= self.max_iterations:
            return False

        return True

    # The remaining methods are used for debugging
    
    def showImage(self):
        self.image.show()

    def showCentroidColours(self):

        for cluster in self.clusters:
            image = Image.new("RGB", (200, 200), cluster.centroid)
            image.show()

    def showClustering(self):

        localPixels = [None] * len(self.image.getdata())

        for idx, pixel in enumerate(self.pixels):
                shortest = float('Inf')
                for cluster in self.clusters:
                    distance = self.calcDistance(
                        cluster.centroid,
                        pixel
                    )
                    if distance < shortest:
                        shortest = distance
                        nearest = cluster

                localPixels[idx] = nearest.centroid

        w, h = self.image.size
        localPixels = np.asarray(localPixels)\
            .astype('uint8')\
            .reshape((h, w, 3))

        colourMap = Image.fromarray(localPixels)
        colourMap.show()
        
    def printCentroidColoursHexCode(self):
        hexcodes = []
        for cluster in self.clusters:
            hexcode = '#'+''.join(map(chr, cluster.centroid)).encode('hex')
            hexcodes.append(hexcode)
            # print cluster.centroid, hexcode
          
        print hexcodes
        
        return hexcodes
    
    def printCentroidColoursRGB(self):
        RGBs = []
        for cluster in self.clusters:
            RGBs.append(cluster.centroid)
        
        print RGB
        
        return RGB

In [9]:
img_name = df['file_name']

In [10]:
hexcodes_dominant_color = []
pixels_num_color = []

for i in range(260):
    img = Image.open('../img/' + img_name[i])
    
    k = Kmeans()
    pixels_num = k.run(img)
    
    hexcode = k.printCentroidColoursHexCode()
    print pixels_num
    
    hexcodes_dominant_color.append(hexcode)
    pixels_num_color.append(pixels_num)

['#fdfcfc', '#c7a43e', '#d3150f']
[40663, 16677, 32660]
['#d0bc7a', '#555636', '#da5b23']
[10427, 9301, 42880]
['#f0e3ae', '#bd642e', '#6a753c']
[16480, 40899, 5530]
['#756538', '#c4af5a', '#4e186f']
[12801, 16400, 34912]
['#48374a', '#c18d49', '#d6bf97']
[8205, 23331, 29266]
['#623849', '#cea76e', '#e7d7c0']
[10861, 30900, 18439]
['#623c3a', '#ccb28f', '#c78841']
[9466, 31532, 16493]
['#5e3436', '#b79269', '#d4bc9f']
[11190, 25553, 21049]
['#512f34', '#d2b995', '#bc8c59']
[10779, 26146, 20265]
['#dbcab3', '#583842', '#c0996d']
[14855, 7836, 34499]
['#463540', '#e0d0bb', '#ba9872']
[17099, 12825, 32082]
['#ceb593', '#3e3749', '#a8855f']
[31206, 9281, 16402]
['#593430', '#b68659', '#cfb595']
[10681, 18555, 28556]
['#d7c5ad', '#bc986f', '#6e4835']
[16024, 31044, 10423]
['#d5bea2', '#4d4050', '#b7916c']
[21447, 8986, 27359]
['#6d5f4d', '#737877', '#a6a693']
[34812, 12487, 20426]
['#d0b49b', '#391a18', '#b37652']
[45441, 17451, 26204]
['#b89673', '#5f3a38', '#d6c0a7']
[27028, 11519, 18944]

In [11]:
print len(pixels_num_color)

260


In [12]:
print len(hexcodes_dominant_color)

260


In [13]:
pixels_num_color[0]

[40663, 16677, 32660]

In [14]:
hexcodes_dominant_color[0][np.argmax(pixels_num_color[0])]

'#fdfcfc'

In [15]:
most_domin_colors = []
for i in range(len(pixels_num_color)):
    most_domin_colors.append(hexcodes_dominant_color[i][np.argmax(pixels_num_color[i])])

In [16]:
# most_domin_colors return the most dominant color in each image
print most_domin_colors

['#fdfcfc', '#da5b23', '#bd642e', '#4e186f', '#d6bf97', '#cea76e', '#ccb28f', '#b79269', '#d2b995', '#c0996d', '#ba9872', '#ceb593', '#cfb595', '#bc986f', '#b7916c', '#6d5f4d', '#d0b49b', '#b89673', '#c2a07a', '#c4a57e', '#c6a67f', '#c6a467', '#cfae82', '#c7a77a', '#c7a37a', '#fcfbfb', '#f7f8f7', '#fafaf7', '#f8f7f1', '#e2e5e5', '#f0f0ec', '#f5f5f4', '#eaebe5', '#f3f5f6', '#fcfbf9', '#dadad8', '#e1e7e8', '#f9f8f4', '#f4f3f0', '#f2f6f8', '#f6f5f3', '#f3f9fb', '#83914c', '#f4e6a4', '#eee374', '#f8f6e4', '#d5121c', '#f4f4f3', '#47366b', '#ead69c', '#f2f2f2', '#f3efe7', '#c29541', '#4d302a', '#7d311e', '#f7f6f6', '#eeeeda', '#f9f9f6', '#f6f3f2', '#e9ecec', '#f6f6f5', '#dfd2d7', '#ca2249', '#62361a', '#3f141c', '#f6f7f7', '#f5f6f4', '#561c0e', '#f6f7f6', '#f8f5f3', '#f0f0ef', '#ddae47', '#f4e3c1', '#f5f0e6', '#f4ebcd', '#f2ebcc', '#f5e0c2', '#f8f7f6', '#4c1f55', '#e9ba2f', '#dbab26', '#8f2d6f', '#fafaf9', '#f9f9f7', '#fbf9f5', '#fbf9f7', '#e1ddd1', '#d5cdd2', '#e3d6b3', '#ecc8a2', '#451e5a'

In [17]:
print most_domin_colors[48]

#47366b


In [18]:
def hex_to_rgb(value):
    value = value.lstrip('#')
    lv = len(value)
    return tuple(int(value[i:i + lv // 3], 16) for i in range(0, lv, lv // 3))

In [19]:
hex_to_rgb('#958d8d')

(149, 141, 141)

In [20]:
def closest_colour(requested_colour):
    pri_color = ['Red', 'Orange', 'Yellow', 'Green', 'Blue', 'Violet', 'Brown', 'Black', 'Grey', 'White']
    pri_color = [x.lower() for x in pri_color]
    min_colours = {}
    #for key, name in webcolors.css3_hex_to_names.items():
    for key, name in [x for x in itertools.ifilter(lambda x: x[1]  in pri_color, webcolors.css3_hex_to_names.items())]:
        r_c, g_c, b_c = webcolors.hex_to_rgb(key)
        rd = (r_c - requested_colour[0]) ** 2
        gd = (g_c - requested_colour[1]) ** 2
        bd = (b_c - requested_colour[2]) ** 2
        min_colours[(rd + gd + bd)] = name
    return min_colours[min(min_colours.keys())]

In [21]:
closest_colour(hex_to_rgb(most_domin_colors[0]))

u'white'

In [22]:
domin_colors = []
for i in range(260):
    domin_colors.append(closest_colour(hex_to_rgb(most_domin_colors[i])))

In [23]:
print domin_colors

[u'white', u'brown', u'brown', u'brown', u'violet', u'grey', u'grey', u'grey', u'grey', u'grey', u'grey', u'grey', u'grey', u'grey', u'grey', u'grey', u'grey', u'grey', u'grey', u'grey', u'grey', u'grey', u'grey', u'grey', u'grey', u'white', u'white', u'white', u'white', u'white', u'white', u'white', u'white', u'white', u'white', u'white', u'white', u'white', u'white', u'white', u'white', u'white', u'grey', u'white', u'yellow', u'white', u'red', u'white', u'grey', u'white', u'white', u'white', u'orange', u'brown', u'brown', u'white', u'white', u'white', u'white', u'white', u'white', u'white', u'brown', u'brown', u'black', u'white', u'white', u'brown', u'white', u'white', u'white', u'orange', u'white', u'white', u'white', u'white', u'white', u'white', u'brown', u'orange', u'orange', u'brown', u'white', u'white', u'white', u'white', u'white', u'white', u'white', u'violet', u'brown', u'grey', u'white', u'violet', u'brown', u'black', u'white', u'white', u'white', u'grey', u'white', u'white

In [24]:
def genDominColor(row):
    return domin_colors[row['index'] - 1]

In [25]:
df['most_domin_color'] = df.apply(genDominColor, axis = 1)

In [26]:
df.head()

Unnamed: 0,target_market,score,cover_image,sugar_result,fiber_result,sodium_result,sugar_per_serving,fiber_per_serving,sodium_per_serving,calories_per_serving,...,b_mean,g_mean,r_mean,b_sd,g_sd,r_sd,most_fre_color,most_fre_color_count,file_name,most_domin_color
0,Family,50,http://cerealfacts.org/media/cereal_images/Ann...,24%,3%,293,7,1,85,120,...,35.58,71.37,208.29,54.28,86.32,62.01,#e70000,256,img_001_anniesfruitybunnies.jpg,white
1,Family,52,http://cerealfacts.org/media/cereal_images/Ann...,24%,3%,310,7,1,90,110,...,53.31,107.21,195.62,46.53,48.07,54.99,#d8501e,256,img_002_annieshoneybunnies.jpg,brown
2,Family,50,http://cerealfacts.org/media/cereal_images/Ann...,24%,3%,293,7,1,85,120,...,65.4,123.12,191.4,52.63,60.91,40.15,#bd582a,256,img_003_anniescinnamonrollbunnyos.jpg,brown
3,Family,64,http://cerealfacts.org/media/cereal_images/Ann...,7%,3%,379,2,1,110,120,...,95.78,78.63,115.69,47.58,71.51,58.18,#581680,1272,img_004_anniesorganicbunnyos.jpg,brown
4,Family,58,http://cerealfacts.org/media/cereal_images/Bar...,23%,10%,267,7,3,80,120,...,108.41,153.04,186.66,48.37,51.09,53.44,#ffffff,681,img_005_barbarasbakerypuffinspuffscrunchycocoa...,violet


In [27]:
df.tail()

Unnamed: 0,target_market,score,cover_image,sugar_result,fiber_result,sodium_result,sugar_per_serving,fiber_per_serving,sodium_per_serving,calories_per_serving,...,b_mean,g_mean,r_mean,b_sd,g_sd,r_sd,most_fre_color,most_fre_color_count,file_name,most_domin_color
255,Adult,70,http://cerealfacts.org/media/cereal_images/Qua...,16%,9%,339,9,5,190,210,...,164.55,136.89,90.92,62.61,58.52,98.7,#0093d8,249,img_256_quakeroatmealsquaresbrownsugar.jpg,grey
256,Adult,70,http://cerealfacts.org/media/cereal_images/Qua...,16%,9%,339,9,5,190,210,...,97.41,126.17,178.93,58.6,61.11,85.87,#26397b,1792,img_257_quakeroatmealsquarescinnamon.jpg,orange
257,Adult,70,http://cerealfacts.org/media/cereal_images/Qua...,16%,9%,339,9,5,190,210,...,91.22,121.25,167.26,65.59,61.71,74.12,#1b4482,256,img_258_quakeroatmealsquaresgoldenmaple.jpg,brown
258,Family,58,http://cerealfacts.org/media/cereal_images/Qua...,19%,19%,281,6,6,90,110,...,158.91,175.17,203.98,90.9,73.27,47.04,#e4e3e1,148,img_259_quakerlifecrunchtimestrawberry.jpg,white
259,Family,58,http://cerealfacts.org/media/cereal_images/Qua...,22%,19%,281,7,6,90,110,...,88.67,104.01,126.5,51.3,39.4,38.09,#817d7c,256,img_260_quakerlifecrunchtimeapplecinnamon.jpg,brown


In [28]:
df.to_csv('../data/data_updated.csv')