In [68]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.color import lab2rgb, rgb2lab
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import FunctionTransformer
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import export_graphviz

In [5]:
# representative RGB colours for each label, for nice display
COLOUR_RGB = {
    'red': (255, 0, 0),
    'orange': (255, 112, 0),
    'yellow': (255, 255, 0),
    'green': (0, 231, 0),
    'blue': (0, 0, 255),
    'purple': (185, 0, 185),
    'brown': (117, 60, 0),
    'pink': (255, 184, 184),
    'black': (0, 0, 0),
    'grey': (150, 150, 150),
    'white': (255, 255, 255),
}
name_to_rgb = np.vectorize(COLOUR_RGB.get, otypes=[np.uint8, np.uint8, np.uint8])

In [6]:
def plot_predictions(model, lum=67, resolution=300):
    """
    Create a slice of LAB colour space with given luminance; predict with the model; plot the results.
    """
    wid = resolution
    hei = resolution
    n_ticks = 5

    # create a hei*wid grid of LAB colour values, with L=lum
    ag = np.linspace(-100, 100, wid)
    bg = np.linspace(-100, 100, hei)
    aa, bb = np.meshgrid(ag, bg)
    ll = lum * np.ones((hei, wid))
    lab_grid = np.stack([ll, aa, bb], axis=2)

    # convert to RGB for consistency with original input
    X_grid = lab2rgb(lab_grid)

    # predict and convert predictions to colours so we can see what's happening
    y_grid = model.predict(X_grid.reshape((-1, 3)))
    pixels = np.stack(name_to_rgb(y_grid), axis=1) / 255
    pixels = pixels.reshape((hei, wid, 3))

    # plot input and predictions
    plt.figure(figsize=(10, 5))
    plt.suptitle('Predictions at L=%g' % (lum,))
    plt.subplot(1, 2, 1)
    plt.title('Inputs')
    plt.xticks(np.linspace(0, wid, n_ticks), np.linspace(-100, 100, n_ticks))
    plt.yticks(np.linspace(0, hei, n_ticks), np.linspace(-100, 100, n_ticks))
    plt.xlabel('A')
    plt.ylabel('B')
    plt.imshow(X_grid.reshape((hei, wid, -1)))

    plt.subplot(1, 2, 2)
    plt.title('Predicted Labels')
    plt.xticks(np.linspace(0, wid, n_ticks), np.linspace(-100, 100, n_ticks))
    plt.yticks(np.linspace(0, hei, n_ticks), np.linspace(-100, 100, n_ticks))
    plt.xlabel('A')
    plt.imshow(pixels)


In [8]:
data = pd.read_csv('colour-data.csv')

(array([[0.65882353, 0.82745098, 0.95294118],
        [0.14509804, 0.1254902 , 0.15686275],
        [0.1372549 , 0.13333333, 0.14901961],
        ...,
        [0.54509804, 0.6627451 , 0.40392157],
        [0.67843137, 0.13333333, 0.83921569],
        [0.38431373, 0.44705882, 0.58039216]]),
 array(['blue', 'black', 'black', ..., 'green', 'purple', 'blue'],
       dtype=object))

Unnamed: 0,R,G,B
0,0.658824,0.827451,0.952941
1,0.145098,0.125490,0.156863
2,0.137255,0.133333,0.149020
3,0.309804,0.290196,0.623529
4,0.215686,0.388235,0.133333
...,...,...,...
4373,0.294118,0.549020,0.309804
4374,0.325490,0.050980,0.521569
4375,0.545098,0.662745,0.403922
4376,0.678431,0.133333,0.839216


In [11]:
# extract X values
X = data[['R', 'G', 'B']]/255     # array with shape (n, 3). Divide by 255 so components are all 0-1
# extract y values
y = data['Label']                 # array with shape (n,) of colour words
# split x & y values to training and validation sets
X_train, X_valid, y_train, y_valid = train_test_split(X, y)

In [12]:
# TODO: create six models

In [15]:
# (1) create a naïve bayes classifier and train it
bayes_rgb = GaussianNB()
bayes_rgb.fit(X_train, y_train)
# (2) create a naïve bayes classifier with different color values and train it
# use pipeline for model
bayes_convert = make_pipeline(
    FunctionTransformer(rgb2lab, validate=True),
    GaussianNB()
)
bayes_convert.fit(X_train, y_train)

Pipeline(steps=[('functiontransformer',
                 FunctionTransformer(func=<function rgb2lab at 0x7f78dc2685e0>,
                                     validate=True)),
                ('gaussiannb', GaussianNB())])

In [19]:
bayes_rgb.score(X_valid, y_valid), bayes_convert.score(X_valid, y_valid)

(0.5789954337899543, 0.6319634703196347)

In [54]:
# (3) create a k-nearest neighbours classifier and train it
knn_rgb = KNeighborsClassifier(n_neighbors=10)
knn_rgb.fit(X_train, y_train)
# (4) create a k-nearest neighbours classifier with different color values and train it
# use pipeline for model
knn_convert = make_pipeline(
    FunctionTransformer(rgb2lab, validate=True),
    KNeighborsClassifier(n_neighbors=10)
)
knn_convert.fit(X_train, y_train)

Pipeline(steps=[('functiontransformer',
                 FunctionTransformer(func=<function rgb2lab at 0x7f78dc2685e0>,
                                     validate=True)),
                ('kneighborsclassifier', KNeighborsClassifier(n_neighbors=10))])

In [55]:
knn_rgb.score(X_valid, y_valid), knn_convert.score(X_valid, y_valid)

(0.7561643835616438, 0.7579908675799086)

In [81]:
# (5) create a random forest classifier and train it
rf_rgb = RandomForestClassifier(n_estimators=100, max_depth=10)
rf_rgb.fit(X_train, y_train)
# (6) create a random forest classifier with different color values and train it
# use pipeline for model
rf_convert = make_pipeline(
    FunctionTransformer(rgb2lab, validate=True),
    RandomForestClassifier(n_estimators=100, max_depth=10)
)
rf_convert.fit(X_train, y_train)

Pipeline(steps=[('functiontransformer',
                 FunctionTransformer(func=<function rgb2lab at 0x7f78dc2685e0>,
                                     validate=True)),
                ('randomforestclassifier',
                 RandomForestClassifier(max_depth=10))])

In [82]:
rf_rgb.score(X_valid, y_valid), rf_convert.score(X_valid, y_valid)

(0.7561643835616438, 0.7607305936073059)