In [None]:
#dependancies

from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import numpy as np
import cv2
from collections import Counter
from skimage.color import rgb2lab, deltaE_cie76
import os
import urllib
import webcolors 

In [None]:
def get_image(image_path):

    resp = urllib.request.urlopen(image_path)
    image = np.asarray(bytearray(resp.read()), dtype="uint8")
    image = cv2.imdecode(image, cv2.IMREAD_COLOR)
    
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_modify = cv2.resize(image, (400,400), interpolation = cv2.INTER_AREA)
    image_final = image_modify.reshape(image_modify.shape[0]*image_modify.shape[1], 3)
    return image_final


def RGB2HEX(color):
    return "#{:02x}{:02x}{:02x}".format(int(color[0]), int(color[1]), int(color[2]))

def closest_color(requested_color):
    min_colors = {}
    for key, name in webcolors.css3_hex_to_names.items():
        r_c, g_c, b_c = webcolors.hex_to_rgb(key)
        rd = (r_c - requested_color[0]) ** 2
        gd = (g_c - requested_color[1]) ** 2
        bd = (b_c - requested_color[2]) ** 2
        min_colors[(rd + gd + bd)] = name
    return min_colors[min(min_colors.keys())]

def get_color_name(requested_color):
    try:
        closest_name = actual_name = webcolors.rgb_to_name(requested_color)
    except ValueError:
        closest_name = closest_color(requested_color)
        actual_name = None
    return closest_name

def get_colors(image, number):
    clf = KMeans(n_clusters = number)
    labels = clf.fit_predict(image)
    counts = Counter(labels)
    
    center_colors = clf.cluster_centers_
    ordered_colors = [center_colors[i] for i in counts.keys()]
    hex_colors = [RGB2HEX(ordered_colors[i]) for i in counts.keys()]
    rgb_colors = [webcolors.hex_to_rgb(hex_colors[i]) for i in counts.keys()]

    named_colors = [get_color_name(rgb_colors[i]) for i in counts.keys()]

    values = list(counts.values())
    percentages = []

    for i in values: 
        p = round((i/160000)*100, 0)
        percentages.append(p)

    package = list(zip(hex_colors, named_colors, percentages))
    
    return package

In [3]:
import pandas as pd

paintings = pd.read_csv('paintings_final.csv')
print(paintings.Object_id.count())

2652


In [None]:
painting_colors = {}

#itterate through and get colors for each painting
for index, row in paintings.iterrows():
    #grab the image link and the id
    path = paintings.loc[index, "Met_link"]
    id = paintings.loc[index, "Object_id"]
    
    #get the colors. It returns a list of tuples, which we turn into a list of lists to easier processing later
    try:
        painting_stats = get_colors(get_image(path), 10)
        color_list = [list(elem) for elem in painting_stats]

        painting_colors[id] = color_list
    
        print("Processing painting: " + str(index))
    
    except:
        print("Error in processing painting")
        


Processing painting: 0
Processing painting: 1
Processing painting: 2
Processing painting: 3
Processing painting: 4
Processing painting: 5
Processing painting: 6
Processing painting: 7
Processing painting: 8
Processing painting: 9
Processing painting: 10
Processing painting: 11
Processing painting: 12
Processing painting: 13
Processing painting: 14
Processing painting: 15
Processing painting: 16
Processing painting: 17
Processing painting: 18
Processing painting: 19
Processing painting: 20
Processing painting: 21
Processing painting: 22
Processing painting: 23
Processing painting: 24
Processing painting: 25
Processing painting: 26
Processing painting: 27
Processing painting: 28
Processing painting: 29
Processing painting: 30
Processing painting: 31
Processing painting: 32
Processing painting: 33
Processing painting: 34
Processing painting: 35
Processing painting: 36
Processing painting: 37
Processing painting: 38
Error in processing painting
Error in processing painting
Processing paint

Processing painting: 330
Processing painting: 331
Processing painting: 332
Processing painting: 333
Processing painting: 334
Processing painting: 335
Processing painting: 336
Processing painting: 337
Processing painting: 338
Processing painting: 339
Processing painting: 340
Processing painting: 341
Processing painting: 342
Processing painting: 343
Processing painting: 344
Processing painting: 345
Processing painting: 346
Processing painting: 347
Processing painting: 348
Processing painting: 349
Processing painting: 350
Processing painting: 351
Processing painting: 352
Processing painting: 353
Processing painting: 354
Processing painting: 355
Processing painting: 356
Processing painting: 357
Processing painting: 358
Processing painting: 359
Processing painting: 360
Processing painting: 361
Processing painting: 362
Processing painting: 363
Processing painting: 364
Processing painting: 365
Processing painting: 366
Processing painting: 367
Processing painting: 368
Processing painting: 369


Processing painting: 657
Processing painting: 658
Processing painting: 659
Processing painting: 660
Processing painting: 661
Processing painting: 662
Processing painting: 663
Processing painting: 664
Processing painting: 665
Processing painting: 666
Processing painting: 667
Processing painting: 668
Processing painting: 669
Processing painting: 670
Processing painting: 671
Processing painting: 672
Processing painting: 673
Processing painting: 674
Processing painting: 675
Processing painting: 676
Processing painting: 677
Processing painting: 678
Processing painting: 679
Processing painting: 680
Processing painting: 681
Processing painting: 682
Processing painting: 683
Processing painting: 684
Processing painting: 685
Processing painting: 686
Processing painting: 687
Processing painting: 688
Processing painting: 689
Processing painting: 690
Processing painting: 691
Processing painting: 692
Processing painting: 693
Processing painting: 694
Processing painting: 695
Processing painting: 696


In [None]:
#Create new table, then melt so that each color pair is a unique row with it's assigned (repeating) index
colors = pd.DataFrame.from_dict(painting_colors, orient='index')
colors = colors.reset_index()
colors = pd.melt(colors, id_vars = ["index"])

print(colors["index"].nunique())
print(colors["index"].count())
colors.head()

In [None]:
colors["value"] = colors["value"].astype(str)

split_colors = colors["value"].str.split(",", expand = True) 
colors["Hex"] = split_colors[0].str.strip("[ '")
colors["Color name"] = split_colors[1].str.strip("' ''")
colors["Size"] = split_colors[2].str.strip("]")

colors = colors.drop(columns=["variable", "value"])
colors = colors.rename(columns = {'index':"Object_id"})

In [None]:
colors.head()


In [142]:
colors.to_csv('paintings_colors.csv', index = False)