In [2]:
import pandas as pd
import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt

In [4]:
# flower images sourced from project by Maria-Elena Nilsback and Andrew Zisserman
# https://www.robots.ox.ac.uk/~vgg/data/flowers/102/index.html

In [5]:
# import color naming data
# table sourced from Joost van der Weijer'r website
# http://lear.inrialpes.fr/people/vandeweijer/color_names.html

data = pd.read_csv('ColorNaming/w2c.txt', sep=" ", header=None)
naming = data.drop(14, axis=1)
naming.columns = ['r', 'g', 'b', 'black', 'blue', 'brown', 'gray', 'green',
                  'orange', 'pink', 'purple', 'red', 'white', 'yellow']

naming

Unnamed: 0,r,g,b,black,blue,brown,gray,green,orange,pink,purple,red,white,yellow
0,3.5,3.5,3.5,0.293958,0.020885,0.037604,0.071886,0.141474,0.064986,0.058097,0.125870,0.059671,0.006976,0.118593
1,11.5,3.5,3.5,0.330787,0.028917,0.050061,0.072332,0.123315,0.062952,0.056099,0.114169,0.058952,0.005640,0.096776
2,19.5,3.5,3.5,0.375413,0.028445,0.055683,0.072660,0.096563,0.067473,0.054926,0.091975,0.071449,0.005915,0.079498
3,27.5,3.5,3.5,0.298999,0.022142,0.111073,0.074210,0.062086,0.093956,0.060683,0.098694,0.111921,0.005485,0.060749
4,35.5,3.5,3.5,0.092343,0.017636,0.229077,0.064490,0.048865,0.128829,0.070291,0.135458,0.177669,0.002874,0.032470
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32763,219.5,251.5,251.5,0.055417,0.281410,0.070617,0.034611,0.102572,0.063057,0.034803,0.037092,0.058153,0.208652,0.053616
32764,227.5,251.5,251.5,0.058489,0.210541,0.060814,0.031589,0.082329,0.061830,0.048448,0.033059,0.059745,0.305543,0.047613
32765,235.5,251.5,251.5,0.061535,0.146413,0.050184,0.027835,0.061174,0.060009,0.061232,0.026390,0.059320,0.404151,0.041757
32766,243.5,251.5,251.5,0.064405,0.089896,0.038576,0.023415,0.039662,0.057233,0.072949,0.017149,0.056683,0.503765,0.036267


In [6]:
naming.r.unique()

# starts from 3.5 and increasing by 8 up to 251.5

array([  3.5,  11.5,  19.5,  27.5,  35.5,  43.5,  51.5,  59.5,  67.5,
        75.5,  83.5,  91.5,  99.5, 107.5, 115.5, 123.5, 131.5, 139.5,
       147.5, 155.5, 163.5, 171.5, 179.5, 187.5, 195.5, 203.5, 211.5,
       219.5, 227.5, 235.5, 243.5, 251.5])

In [6]:
# locate correct rbg bin for the ref table

BINS = [3.5,  11.5,  19.5,  27.5,  35.5,  43.5,  51.5,  59.5,  67.5,
    75.5,  83.5,  91.5,  99.5, 107.5, 115.5, 123.5, 131.5, 139.5,
   147.5, 155.5, 163.5, 171.5, 179.5, 187.5, 195.5, 203.5, 211.5,
   219.5, 227.5, 235.5, 243.5, 251.5]

def find_bin(val, bins=BINS):
    pt = int(len(bins)/2)
    mx = int(len(bins))
    mn = 0
    for i in range (10):
        if abs(val - bins[pt]) <= 4:
            break
        else:
            pt = int(pt)
            if bins[pt] > val:
                mx = pt
                pt = int((pt + mn)/2)
            elif bins[pt] < val:
                mn = pt
                pt = int(((pt + mx)/2))

    return bins[pt]

In [7]:
# use table to select a name  

COLORS = ['black', 'blue', 'brown', 'gray', 'green','orange','pink', 'purple', 'red', 'white', 'yellow']

def pixel_color(pic, coord1, coord2, ref=naming):
    r, g, b = pic[coord1][coord2]
    
    r = find_bin(r)
    g = find_bin(g)
    b = find_bin(b)

    mask1 = naming[naming['r'] == r]
    mask2 = mask1[mask1['g'] == g]
    mask3 = mask2[mask2['b'] == b]
    
    best = 0
    most_likely = ''
    for color in COLORS:
        if mask3[color].iloc[0] > best:
            best = mask3[color].iloc[0]
            most_likely = color
            
    return most_likely

In [8]:
#  iterate through every pixel of an image and call the naming function
# use this to keep a tally of "points" for each color category
# return color percentages

def whole_image_colors(pic):
    colors = {'black': 0, 'blue':0, 'brown':0, 'gray':0, 'green':0,'orange':0, 
              'pink':0, 'purple':0, 'red':0, 'white':0, 'yellow':0}
    
    total = 0
    for i in range(len(pic)):
        for j in range(len(pic[i])):
            pixel = pixel_color(pic, i, j)
            colors[pixel] = colors[pixel] + 1
            total += 1
    
    color_percent = {}
    for key in colors.keys():
        color_percent[key] = int(colors[key])/total
        
    return color_percent

In [9]:
# take a name from a row of a pd table and read the corresponding image
# feed image into whole_image_colors func

def row_read(row, df, col_title='filename'):
    name = 'flowers/'+ str(df[col_title][row]) + '.jpg'
    img = cv.imread(name)
    img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
    
    return whole_image_colors(img)

In [4]:
#  list of images used in the survey

survey_img = ['image_07919', 'image_02889', 'image_02606', 'image_02582', 'image_02524', 'image_02468', 
             'image_02442', 'image_02190', 'image_01807', 'image_001726', 'image_01705', 'image_01377', 
             'image_01354', 'image_01335', 'image_00968', 'image_00762', 'image_00539', 'image_00524', 
             'image_00333', 'image_00001']

In [12]:
# save to file to allow images to be analyzed a bit at a time

results = pd.DataFrame({'filename': survey_img, 'output1': [0 for i in range(len(survey_img))]})

# loading file:
# results = pd.read_csv('699_results.csv')

In [30]:
# to save progress at the end of a session, save the results csv using later code block
# to continue prev work, load the csv using code in the preceding block

results['output1'][0] = row_read(0, df=results)
results['output1'][1] = row_read(1, df=results)
results['output1'][2] = row_read(2, df=results)
results['output1'][3] = row_read(3, df=results)
results['output1'][4] = row_read(4, df=results)
results['output1'][5] = row_read(5, df=results)
results['output1'][6] = row_read(6, df=results)
results['output1'][7] = row_read(7, df=results)
results['output1'][8] = row_read(8, df=results)
results['output1'][9] = row_read(9, df=results)
results['output1'][10] = row_read(10, df=results)
results['output1'][11] = row_read(11, df=results)
results['output1'][12] = row_read(12, df=results)
results['output1'][13] = row_read(13, df=results)
results['output1'][14] = row_read(14, df=results)
results['output1'][15] = row_read(15, df=results)
results['output1'][16] = row_read(16, df=results)
results['output1'][17] = row_read(17, df=results)
results['output1'][18] = row_read(18, df=results)
results['output1'][19] = row_read(19, df=results)

In [13]:
results

Unnamed: 0,filename,output1,output2
0,image_07919,"{'black': 0.16012293853073464, 'blue': 0.01209...","{'fg': {'black': 0.0, 'blue': 0.02442550546122..."
1,image_02889,"{'black': 0.058264264264264266, 'blue': 0.0, '...","{'fg': {'black': 0.0, 'blue': 0.0, 'brown': 0...."
2,image_02606,"{'black': 0.015041800643086817, 'blue': 0.0, '...","{'fg': {'black': 0.0, 'blue': 0.0, 'brown': 0...."
3,image_02582,"{'black': 0.006131934032983509, 'blue': 0.0012...","{'fg': {'black': 0.0, 'blue': 0.00160446996104..."
4,image_02524,"{'black': 0.08323728813559322, 'blue': 0.13017...","{'fg': {'black': 0.0, 'blue': 0.25688320526271..."
5,image_02468,"{'black': 0.003394302848575712, 'blue': 0.1297...","{'fg': {'black': 0.0, 'blue': 0.65981584246691..."
6,image_02442,"{'black': 0.02641781548250265, 'blue': 0.00073...","{'fg': {'black': 0.0, 'blue': 0.00159756561430..."
7,image_02190,"{'black': 0.0017426470588235293, 'blue': 0.0, ...","{'fg': {'black': 0.0, 'blue': 0.0, 'brown': 0...."
8,image_01807,"{'black': 0.007767379679144385, 'blue': 0.0001...","{'fg': {'black': 0.0, 'blue': 0.00053419709328..."
9,image_01726,"{'black': 0.04331466666666667, 'blue': 0.0, 'b...","{'fg': {'black': 0.0, 'blue': 0.0, 'brown': 0...."


In [28]:
# save progress
# results.to_csv('699_results.csv', index=False)

In [14]:
def segmented_image_colors(pic):
    
#     using code from opencv tutorial to create an array distinguishing foreground and background
    gray = cv.cvtColor(pic,cv.COLOR_BGR2GRAY)
    ret, thresh = cv.threshold(gray,0,255,cv.THRESH_BINARY_INV+cv.THRESH_OTSU)
    
    colors = {'fg': {'black': 0, 'blue':0, 'brown':0, 'gray':0, 'green':0,'orange':0, 
              'pink':0, 'purple':0, 'red':0, 'white':0, 'yellow':0}, 
              'bg': {'black': 0, 'blue':0, 'brown':0, 'gray':0, 'green':0,'orange':0, 
              'pink':0, 'purple':0, 'red':0, 'white':0, 'yellow':0}}

#     keep track of the total no. of pixels in foreground and background
#     use later to calculate percentages
    fg_total = 0
    bg_total = 0

#     loop through the rows and columns of the img
#     consult array to determine if the pixel is classified as foreground or background
    for i in range(len(pic)):
        for j in range(len(pic[i])):
            if thresh[i][j] == 255:
                pixel = pixel_color(pic, i, j)
                colors['bg'][pixel] = colors['bg'][pixel] + 1
                bg_total += 1
            else:
                pixel = pixel_color(pic, i, j)
                colors['fg'][pixel] = colors['fg'][pixel] + 1
                fg_total += 1
    
#     calculate percentage of pixels with a specific color label in foreground
    fg_color_percent = {}
    for key in colors['fg'].keys():
        fg_color_percent[key] = int(colors['fg'][key])/fg_total
    
#     calculate percentage of pixels with a specific color label in background
    bg_color_percent = {}
    for key in colors['bg'].keys():
        bg_color_percent[key] = int(colors['bg'][key])/bg_total
    
#     combine fg and bg percentage dicts into one result dict
    color_percent = {}
    color_percent['fg'] = fg_color_percent
    color_percent['bg'] = bg_color_percent
        
    return color_percent

In [15]:
# take a name from a row of a pd table and read the corresponding image
# feed image into segmented_image_colors func

def row_read_2(row, df, col_title='filename'):
    name = 'flowers/'+ str(df[col_title][row]) + '.jpg'
    pic = cv.imread(name)
    pic = cv.cvtColor(pic, cv.COLOR_BGR2RGB)
    
    return segmented_image_colors(pic)

In [17]:
# add fg and bg percents

results['output2'][0] = row_read_2(0, df=results)
results['output2'][1] = row_read_2(1, df=results)
results['output2'][2] = row_read_2(2, df=results)
results['output2'][3] = row_read_2(3, df=results)
results['output2'][4] = row_read_2(4, df=results)
results['output2'][5] = row_read_2(5, df=results)
results['output2'][6] = row_read_2(6, df=results)
results['output2'][7] = row_read_2(7, df=results)
results['output2'][8] = row_read_2(8, df=results)
results['output2'][9] = row_read_2(9, df=results)
results['output2'][10] = row_read_2(10, df=results)
results['output2'][11] = row_read_2(11, df=results)
results['output2'][12] = row_read_2(12, df=results)
results['output2'][13] = row_read_2(13, df=results)
results['output2'][14] = row_read_2(14, df=results)
results['output2'][15] = row_read_2(15, df=results)
results['output2'][16] = row_read_2(16, df=results)
results['output2'][17] = row_read_2(17, df=results)
results['output2'][18] = row_read_2(18, df=results)
results['output2'][19] = row_read_2(19, df=results)

In [18]:
results

Unnamed: 0,filename,output1,output2
0,image_07919,"{'black': 0.16012293853073464, 'blue': 0.01209...","{'fg': {'black': 0.0, 'blue': 0.02442550546122..."
1,image_02889,"{'black': 0.058264264264264266, 'blue': 0.0, '...","{'fg': {'black': 0.0, 'blue': 0.0, 'brown': 0...."
2,image_02606,"{'black': 0.015041800643086817, 'blue': 0.0, '...","{'fg': {'black': 0.0, 'blue': 0.0, 'brown': 0...."
3,image_02582,"{'black': 0.006131934032983509, 'blue': 0.0012...","{'fg': {'black': 0.0, 'blue': 0.00160446996104..."
4,image_02524,"{'black': 0.08323728813559322, 'blue': 0.13017...","{'fg': {'black': 0.0, 'blue': 0.25688320526271..."
5,image_02468,"{'black': 0.003394302848575712, 'blue': 0.1297...","{'fg': {'black': 0.0, 'blue': 0.65981584246691..."
6,image_02442,"{'black': 0.02641781548250265, 'blue': 0.00073...","{'fg': {'black': 0.0, 'blue': 0.00159756561430..."
7,image_02190,"{'black': 0.0017426470588235293, 'blue': 0.0, ...","{'fg': {'black': 0.0, 'blue': 0.0, 'brown': 0...."
8,image_01807,"{'black': 0.007767379679144385, 'blue': 0.0001...","{'fg': {'black': 0.0, 'blue': 0.00053419709328..."
9,image_01726,"{'black': 0.04331466666666667, 'blue': 0.0, 'b...","{'fg': {'black': 0.0, 'blue': 0.0, 'brown': 0...."


In [19]:
results.to_csv('699_results.csv', index=False)