## Imports

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt 

import sklearn
from sklearn.cluster import KMeans
from collections import Counter

from tqdm import tqdm
import time

from selenium import webdriver
from selenium.webdriver.common.by import By

from urllib.request import urlretrieve

import cv2

import warnings
warnings.filterwarnings('ignore')

## Collect paint swatch images from Williamsburg Oil Paints' website

In [2]:
url = 'https://www.williamsburgoils.com/products/colors'
# prepare the option for the chrome driver
options = webdriver.ChromeOptions()
options.add_argument('headless')

# start chrome browser
browser = webdriver.Chrome(options=options)

# open url
browser.get(url)

# allow to load
time.sleep(5)

In [3]:
colors = []

for e in tqdm(browser.find_elements(By.CLASS_NAME,"colorColumn")):
    color = {}
    text = e.text.split('\n')
    color['name'] = text[0]
    color['alt_name'] = text[1]
    for line in e.find_elements(By.TAG_NAME, 'img'):
        color['width'] = line.get_attribute('width')
        color['height'] = line.get_attribute('height')
        color['swatch_image_url'] = line.get_attribute('src')
    colors.append(color)
    time.sleep(2)

df = pd.DataFrame(colors)

100%|██████████| 176/176 [05:59<00:00,  2.04s/it]


In [4]:
df

Unnamed: 0,name,alt_name,width,height,swatch_image_url
0,Brilliant Yellow Extra Pale,#6000202,53,53,https://goldenhub.goldenpaints.com/storage/upl...
1,Brilliant Yellow Pale,#6000212,53,53,https://goldenhub.goldenpaints.com/storage/upl...
2,Nickel Yellow,#6000224,53,53,https://goldenhub.goldenpaints.com/storage/upl...
3,Bismuth Vanadate Yellow,#6001929,53,53,https://goldenhub.goldenpaints.com/storage/upl...
4,Cadmium Lemon,#6000246,53,53,https://goldenhub.goldenpaints.com/storage/upl...
...,...,...,...,...,...
171,Iridescent Pewter,#6001843,53,53,https://goldenhub.goldenpaints.com/storage/upl...
172,Interference Violet,#6001813,53,53,https://goldenhub.goldenpaints.com/storage/upl...
173,Interference Red,#6001873,53,53,https://goldenhub.goldenpaints.com/storage/upl...
174,Interference Blue,#6001803,53,53,https://goldenhub.goldenpaints.com/storage/upl...


In [5]:
df['path_to_image'] = df['name'].map(lambda x: f"./images/{x.lower().replace(' ','_')}_swatch.jpg")

In [6]:
for i in tqdm(range(len(df))):
    img_url = df.loc[i,'swatch_image_url']
    save_path = df.loc[i,'path_to_image']
    
    urlretrieve(img_url, save_path)
    time.sleep(3)

100%|██████████| 176/176 [09:01<00:00,  3.07s/it]


## Set up computer vision

In [8]:
def get_image(path):
    image = cv2.imread(path, cv2.IMREAD_UNCHANGED)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return image

def RGB2HEX(color):
    return "#{:02x}{:02x}{:02x}".format(int(color[0]), int(color[1]), int(color[2]))


In [9]:
def get_most_common_color(path, number_of_colors=10):
    # get the image path
    image = get_image(path)
    
    # get the number of colors we want to retrieve
    number_of_colors = number_of_colors
    
    # reshape the image
    modified_image = image.reshape(image.shape[0]*image.shape[1], 3)
    
    #instantiate the KMeans model
    clf = KMeans(n_clusters = number_of_colors)
    
    # get the labels for the image from the model
    labels = clf.fit_predict(modified_image)
    
    # get the counts of the labels produced from the prediction
    counts = Counter(labels)
    
    # get the RGB values from the center of the clusters
    center_colors = clf.cluster_centers_
    
    # get ordered colors by iterating through the keys
    ordered_colors = [center_colors[i] for i in counts.keys()]
    
    # get the hex and rgb colors as dicts
    hex_dict = {i:RGB2HEX(ordered_colors[i]) for i in counts.keys()}
    rgb_dict = {i:ordered_colors[i] for i in counts.keys()}
    
    # most common label
    most_common_label = counts.most_common()[0][0]
    
    # most common hex color 
    most_common_hex = hex_dict[most_common_label]
    
    # most common RGB values as integers
    most_common_rgb = [int(i) for i in rgb_dict[most_common_label]]
    
    return most_common_hex, most_common_rgb

In [10]:
hex_rgb = df.path_to_image.map(lambda x: get_most_common_color(x))
df['most_common_hex'] = hex_rgb.map(lambda x: x[0])
df['most_common_rgb'] = hex_rgb.map(lambda x: x[1])
df.head()

Unnamed: 0,name,alt_name,width,height,swatch_image_url,path_to_image,most_common_hex,most_common_rgb
0,Brilliant Yellow Extra Pale,#6000202,53,53,https://goldenhub.goldenpaints.com/storage/upl...,./images/brilliant_yellow_extra_pale_swatch.jpg,#f3efd5,"[243, 239, 213]"
1,Brilliant Yellow Pale,#6000212,53,53,https://goldenhub.goldenpaints.com/storage/upl...,./images/brilliant_yellow_pale_swatch.jpg,#f6eea6,"[246, 238, 166]"
2,Nickel Yellow,#6000224,53,53,https://goldenhub.goldenpaints.com/storage/upl...,./images/nickel_yellow_swatch.jpg,#e9df64,"[233, 223, 100]"
3,Bismuth Vanadate Yellow,#6001929,53,53,https://goldenhub.goldenpaints.com/storage/upl...,./images/bismuth_vanadate_yellow_swatch.jpg,#f4dd07,"[244, 221, 7]"
4,Cadmium Lemon,#6000246,53,53,https://goldenhub.goldenpaints.com/storage/upl...,./images/cadmium_lemon_swatch.jpg,#eddf30,"[237, 223, 48]"


In [12]:
# df.to_csv('./williamsburg_oil_colors.csv', index = False)

In [3]:
df = pd.read_csv('./williamsburg_oil_colors.csv')
df

Unnamed: 0,name,alt_name,width,height,swatch_image_url,path_to_image,most_common_hex,most_common_rgb
0,Brilliant Yellow Extra Pale,#6000202,53,53,https://goldenhub.goldenpaints.com/storage/upl...,./images/brilliant_yellow_extra_pale_swatch.jpg,#f3efd5,"[243, 239, 213]"
1,Brilliant Yellow Pale,#6000212,53,53,https://goldenhub.goldenpaints.com/storage/upl...,./images/brilliant_yellow_pale_swatch.jpg,#f6eea6,"[246, 238, 166]"
2,Nickel Yellow,#6000224,53,53,https://goldenhub.goldenpaints.com/storage/upl...,./images/nickel_yellow_swatch.jpg,#e9df64,"[233, 223, 100]"
3,Bismuth Vanadate Yellow,#6001929,53,53,https://goldenhub.goldenpaints.com/storage/upl...,./images/bismuth_vanadate_yellow_swatch.jpg,#f4dd07,"[244, 221, 7]"
4,Cadmium Lemon,#6000246,53,53,https://goldenhub.goldenpaints.com/storage/upl...,./images/cadmium_lemon_swatch.jpg,#eddf30,"[237, 223, 48]"
...,...,...,...,...,...,...,...,...
171,Iridescent Pewter,#6001843,53,53,https://goldenhub.goldenpaints.com/storage/upl...,./images/iridescent_pewter_swatch.jpg,#5d5e5f,"[93, 94, 95]"
172,Interference Violet,#6001813,53,53,https://goldenhub.goldenpaints.com/storage/upl...,./images/interference_violet_swatch.jpg,#c0abad,"[192, 171, 173]"
173,Interference Red,#6001873,53,53,https://goldenhub.goldenpaints.com/storage/upl...,./images/interference_red_swatch.jpg,#d9aa8d,"[217, 170, 141]"
174,Interference Blue,#6001803,53,53,https://goldenhub.goldenpaints.com/storage/upl...,./images/interference_blue_swatch.jpg,#879ab7,"[135, 154, 183]"


In [6]:
print('|Color Name|Hex color|RGB Color|Paint swatch|')
print('|-----|-----|-----|-----|')
for i in range(len(df)):
    print(f"|{df.loc[i,'name']}|{df.loc[i,'most_common_hex']}|{df.loc[i,'most_common_rgb']}|![]({df.loc[i,'path_to_image']})|")

|Color Name|Hex color|RGB Color|Paint swatch|
|-----|-----|-----|-----|
|Brilliant Yellow Extra Pale|#f3efd5|[243, 239, 213]|![](./images/brilliant_yellow_extra_pale_swatch.jpg)|
|Brilliant Yellow Pale|#f6eea6|[246, 238, 166]|![](./images/brilliant_yellow_pale_swatch.jpg)|
|Nickel Yellow|#e9df64|[233, 223, 100]|![](./images/nickel_yellow_swatch.jpg)|
|Bismuth Vanadate Yellow|#f4dd07|[244, 221, 7]|![](./images/bismuth_vanadate_yellow_swatch.jpg)|
|Cadmium Lemon|#eddf30|[237, 223, 48]|![](./images/cadmium_lemon_swatch.jpg)|
|Permanent Lemon|#eee445|[238, 228, 69]|![](./images/permanent_lemon_swatch.jpg)|
|Cadmium Yellow Light|#f7eb50|[247, 235, 80]|![](./images/cadmium_yellow_light_swatch.jpg)|
|Permanent Yellow Light|#f0e654|[240, 230, 84]|![](./images/permanent_yellow_light_swatch.jpg)|
|Cadmium Yellow Medium|#f4d935|[244, 217, 53]|![](./images/cadmium_yellow_medium_swatch.jpg)|
|Permanent Yellow Medium|#eccc1c|[236, 204, 28]|![](./images/permanent_yellow_medium_swatch.jpg)|
|Cadmium Y