## Imports

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

import sklearn
from sklearn.cluster import KMeans
from collections import Counter

from tqdm import tqdm
import time

from selenium import webdriver
from selenium.webdriver.common.by import By

from urllib.request import urlretrieve

import cv2

import warnings
warnings.filterwarnings('ignore')

## Collect information and images

In [2]:
# this is the page where the paint swatch colors are from
url = 'https://www.williamsburgoils.com/products/colors'

# prepare the option for the chrome driver
options = webdriver.ChromeOptions()
options.add_argument('headless')

# start chrome browser
browser = webdriver.Chrome(options=options)

# open url
browser.get(url)

# allow to load
time.sleep(5)

In [3]:
# create an empty list to store the results of the following for loop
colors = []

# iterate through all of elements ("e") with the class name of "colorColumn"
for e in tqdm(browser.find_elements(By.CLASS_NAME, "colorColumn")):
    # create a new dictionary for each loop where the information from
    # each color can be stored
    color = {}

    # the text in each element returns the official name of the color
    # and the alternate name of the color as one item with a line break
    
    # splitting the text up creates a list that can be used to identify
    # the official name and the alternate name by index value, 
    # as done below
    text = e.text.split('\n')
    
    # add each value to the color dictionary created at the 
    # top of the loop
    color['name'] = text[0]
    color['alt_name'] = text[1]
    
    # each "e" contained multiple child elements ("line"), not all 
    # of which have the tag "img". if the line element contains 
    # the "img" tag, the code below grabs the "src" attribute and 
    # saves it to the color dictionary to be used a couple cells down
    for line in e.find_elements(By.TAG_NAME, 'img'):
        color['swatch_image_url'] = line.get_attribute('src')
    
    # append the color dict to the list created above
    colors.append(color)
    
    # sleep for a couple seconds to not hit the page too many times
    time.sleep(2)

# create a dataframe from the information collected
df = pd.DataFrame(colors)

100%|██████████| 176/176 [05:57<00:00,  2.03s/it]


In [25]:
# the code below creates a new column in the dataframe 
# with the path to where the image files collected for the 
# computer vision part of the code will be stored
df['path_to_image'] = df['name'].map(
    lambda x: f"./images/{x.lower().replace(' ','_')}_swatch.jpg")
df['path_to_returned_color'] = df['name'].map(
    lambda x: f"./returned_colors/{x.lower().replace(' ','_')}_returned.jpg")

In [5]:
# iterate through each row in the dataframe
for i in tqdm(range(len(df))):

    # call the "src" attribute collected while scraping
    # Williamsburg Oils' website for the url
    img_url = df.loc[i, 'swatch_image_url']
    
    # call the path created in the cell above that will 
    # store the images in the images directory
    save_path = df.loc[i, 'path_to_image']

    # use url retrive with the variables above to save the images
    urlretrieve(img_url, save_path)
    
    # sleep to not hit the site too many times
    time.sleep(2)

100%|██████████| 176/176 [09:05<00:00,  3.10s/it]


## Computer vision & color identification

Credit to [this article](https://towardsdatascience.com/image-color-identification-with-machine-learning-and-image-processing-using-python-f3dd0606bdca). 

In [3]:
def get_image(path):
    '''
    path: file path for where to find the image
    
    returns an array of RGB values for the image
    '''
    image = cv2.imread(path, cv2.IMREAD_UNCHANGED)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return image


def RGB2HEX(color):
    return "#{:02x}{:02x}{:02x}".format(int(color[0]), int(color[1]),
                                        int(color[2]))

In [24]:
df = pd.read_csv('williamsburg_oil_colors.csv', converters={'most_common_rgb':eval})
df.head()

# from matplotlib.figure import Figure

# path = './images/brilliant_yellow_extra_pale_swatch.jpg'
# image = get_image(path)
# plt.imshow(image)


Unnamed: 0,name,alt_name,swatch_image_url,path_to_image,most_common_hex,most_common_rgb
0,Brilliant Yellow Extra Pale,#6000202,https://goldenhub.goldenpaints.com/storage/upl...,./images/brilliant_yellow_extra_pale_swatch.jpg,#f9f4d7,"[249, 244, 215]"
1,Brilliant Yellow Pale,#6000212,https://goldenhub.goldenpaints.com/storage/upl...,./images/brilliant_yellow_pale_swatch.jpg,#f6eea7,"[246, 238, 167]"
2,Nickel Yellow,#6000224,https://goldenhub.goldenpaints.com/storage/upl...,./images/nickel_yellow_swatch.jpg,#e4d95e,"[228, 217, 94]"
3,Bismuth Vanadate Yellow,#6001929,https://goldenhub.goldenpaints.com/storage/upl...,./images/bismuth_vanadate_yellow_swatch.jpg,#f6df08,"[246, 223, 8]"
4,Cadmium Lemon,#6000246,https://goldenhub.goldenpaints.com/storage/upl...,./images/cadmium_lemon_swatch.jpg,#ece040,"[236, 224, 64]"


In [7]:
def get_most_common_color(path, number_of_colors=10):
    # get the image path
    image = get_image(path)

    # get the number of colors we want to retrieve
    number_of_colors = number_of_colors

    # reshape the image
    modified_image = image.reshape(image.shape[0] * image.shape[1], 3)

    #instantiate the KMeans model
    clf = KMeans(n_clusters=number_of_colors)

    # get the labels for the image from the model
    labels = clf.fit_predict(modified_image)

    # get the counts of the labels produced from the prediction
    counts = Counter(labels)

    # get the RGB values from the center of the clusters
    center_colors = clf.cluster_centers_

    # get ordered colors by iterating through the keys
    ordered_colors = [center_colors[i] for i in counts.keys()]

    # get the hex and rgb colors as dicts
    hex_dict = {i: RGB2HEX(ordered_colors[i]) for i in counts.keys()}
    rgb_dict = {i: ordered_colors[i] for i in counts.keys()}

    # most common label
    most_common_label = counts.most_common()[0][0]

    # most common hex color
    most_common_hex = hex_dict[most_common_label]

    # most common RGB values as integers
    most_common_rgb = [int(i) for i in rgb_dict[most_common_label]]

    return most_common_hex, most_common_rgb

In [28]:
hex_rgb = df.path_to_image.map(lambda x: get_most_common_color(x, 1))
df['most_common_hex'] = hex_rgb.map(lambda x: x[0])
df['most_common_rgb'] = hex_rgb.map(lambda x: x[1])
df.head()

Unnamed: 0,name,alt_name,swatch_image_url,path_to_image,most_common_hex,most_common_rgb
0,Brilliant Yellow Extra Pale,#6000202,https://goldenhub.goldenpaints.com/storage/upl...,./images/brilliant_yellow_extra_pale_swatch.jpg,#f4f0d4,"[244, 240, 212]"
1,Brilliant Yellow Pale,#6000212,https://goldenhub.goldenpaints.com/storage/upl...,./images/brilliant_yellow_pale_swatch.jpg,#f4eca0,"[244, 236, 160]"
2,Nickel Yellow,#6000224,https://goldenhub.goldenpaints.com/storage/upl...,./images/nickel_yellow_swatch.jpg,#e7dd69,"[231, 221, 105]"
3,Bismuth Vanadate Yellow,#6001929,https://goldenhub.goldenpaints.com/storage/upl...,./images/bismuth_vanadate_yellow_swatch.jpg,#f4dd0c,"[244, 221, 12]"
4,Cadmium Lemon,#6000246,https://goldenhub.goldenpaints.com/storage/upl...,./images/cadmium_lemon_swatch.jpg,#ebde33,"[235, 222, 51]"


In [9]:
# save the resulting dataframe as a csv file
df.to_csv('./williamsburg_oil_colors.csv', index=False)

In [72]:
for i in tqdm(range(len(df))):
    rgb = df.loc[i, 'most_common_rgb']
    first = []
    full = []
    first_count = 0
    full_count = 0
    while first_count < 100:
        first_count += 1
        first.append(rgb)
    while full_count < 100:
        full_count += 1
        full.append(first)

    plt.rcParams['figure.figsize'] = [2.5, 2.5]
    plt.rcParams['figure.constrained_layout.h_pad'] = 0
    plt.rcParams['figure.constrained_layout.w_pad'] = 0
    plt.rcParams['figure.constrained_layout.hspace'] = 0
    plt.rcParams['figure.constrained_layout.wspace'] = 0
    plt.rcParams['figure.constrained_layout.use'] = True
    plt.rcParams['axes.edgecolor'] = 'w'
    plt.rcParams['axes.labelpad'] = 0
    plt.rcParams['axes.linewidth'] = 0
    plt.rcParams['axes.xmargin'] = 0
    plt.rcParams['axes.spines.right'] = 0

    plt.xticks(ticks=[])
    plt.yticks(ticks=[])
    plt.imshow(full)
    plt.savefig(df.loc[i, 'path_to_returned_color'])
    plt.close()

100%|██████████| 176/176 [00:09<00:00, 17.73it/s]


In [44]:
plt.rcParams

RcParams({'_internal.classic_mode': False,
          'agg.path.chunksize': 0,
          'animation.avconv_args': [],
          'animation.avconv_path': 'avconv',
          'animation.bitrate': -1,
          'animation.codec': 'h264',
          'animation.convert_args': [],
          'animation.convert_path': 'convert',
          'animation.embed_limit': 20.0,
          'animation.ffmpeg_args': [],
          'animation.ffmpeg_path': 'ffmpeg',
          'animation.frame_format': 'png',
          'animation.html': 'none',
          'animation.html_args': [],
          'animation.writer': 'ffmpeg',
          'axes.autolimit_mode': 'data',
          'axes.axisbelow': 'line',
          'axes.edgecolor': 'black',
          'axes.facecolor': 'white',
          'axes.formatter.limits': [-5, 6],
          'axes.formatter.min_exponent': 0,
          'axes.formatter.offset_threshold': 4,
          'axes.formatter.use_locale': False,
          'axes.formatter.use_mathtext': False,
          'axes.f

In [39]:
# df = pd.read_csv('./williamsburg_oil_colors.csv')
df.sort_values(by='name',inplace=True)
df = df.reset_index(drop=True)
df

Unnamed: 0,name,alt_name,swatch_image_url,path_to_image,most_common_hex,most_common_rgb,path_to_returned_color
0,Alizarin Crimson,#6000684,https://goldenhub.goldenpaints.com/storage/upl...,./images/alizarin_crimson_swatch.jpg,#92343f,"[146, 52, 63]",./returned_colors/alizarin_crimson_returned.jpg
1,Alizarin Orange,#6000534,https://goldenhub.goldenpaints.com/storage/upl...,./images/alizarin_orange_swatch.jpg,#9f471b,"[159, 71, 27]",./returned_colors/alizarin_orange_returned.jpg
2,Alizarin Yellow,#6000514,https://goldenhub.goldenpaints.com/storage/upl...,./images/alizarin_yellow_swatch.jpg,#c88c1b,"[200, 140, 27]",./returned_colors/alizarin_yellow_returned.jpg
3,Bismuth Vanadate Yellow,#6001929,https://goldenhub.goldenpaints.com/storage/upl...,./images/bismuth_vanadate_yellow_swatch.jpg,#f6df08,"[246, 223, 8]",./returned_colors/bismuth_vanadate_yellow_retu...
4,Bohemian Green Earth,#6001021,https://goldenhub.goldenpaints.com/storage/upl...,./images/bohemian_green_earth_swatch.jpg,#474b3b,"[71, 75, 59]",./returned_colors/bohemian_green_earth_returne...
...,...,...,...,...,...,...,...
171,Veronese Green,#6001103,https://goldenhub.goldenpaints.com/storage/upl...,./images/veronese_green_swatch.jpg,#1f7b62,"[31, 123, 98]",./returned_colors/veronese_green_returned.jpg
172,Viridian,#6001245,https://goldenhub.goldenpaints.com/storage/upl...,./images/viridian_swatch.jpg,#284d4e,"[40, 77, 78]",./returned_colors/viridian_returned.jpg
173,Yellow Ochre (Domestic),#6001401,https://goldenhub.goldenpaints.com/storage/upl...,./images/yellow_ochre_(domestic)_swatch.jpg,#b38533,"[179, 133, 51]",./returned_colors/yellow_ochre_(domestic)_retu...
174,Yellow Ochre Burnt,#6001541,https://goldenhub.goldenpaints.com/storage/upl...,./images/yellow_ochre_burnt_swatch.jpg,#755b3d,"[117, 91, 61]",./returned_colors/yellow_ochre_burnt_returned.jpg


In [41]:
print('|Color Name|Hex color|RGB Color|Paint swatch|Returned color|')
print('|-----|-----|-----|-----|-----|')
for i in range(len(df)):
    print(f"|{df.loc[i,'name']}|{df.loc[i,'most_common_hex']}|{df.loc[i,'most_common_rgb']}|![]({df.loc[i,'path_to_image']})|![]({df.loc[i,'path_to_returned_color']})|")

|Color Name|Hex color|RGB Color|Paint swatch|Returned color|
|-----|-----|-----|-----|-----|
|Alizarin Crimson|#92343f|[146, 52, 63]|![](./images/alizarin_crimson_swatch.jpg)|![](./returned_colors/alizarin_crimson_returned.jpg)|
|Alizarin Orange|#9f471b|[159, 71, 27]|![](./images/alizarin_orange_swatch.jpg)|![](./returned_colors/alizarin_orange_returned.jpg)|
|Alizarin Yellow|#c88c1b|[200, 140, 27]|![](./images/alizarin_yellow_swatch.jpg)|![](./returned_colors/alizarin_yellow_returned.jpg)|
|Bismuth Vanadate Yellow|#f6df08|[246, 223, 8]|![](./images/bismuth_vanadate_yellow_swatch.jpg)|![](./returned_colors/bismuth_vanadate_yellow_returned.jpg)|
|Bohemian Green Earth|#474b3b|[71, 75, 59]|![](./images/bohemian_green_earth_swatch.jpg)|![](./returned_colors/bohemian_green_earth_returned.jpg)|
|Brilliant Yellow Extra Pale|#f9f4d7|[249, 244, 215]|![](./images/brilliant_yellow_extra_pale_swatch.jpg)|![](./returned_colors/brilliant_yellow_extra_pale_returned.jpg)|
|Brilliant Yellow Pale|#f6eea