# Color Classifier
**Created by Keaton Brewster, Roy Salinas, and Christine Case**

5/4/2020

This code translates the image info created with the RPI and trains several classifiers


In [0]:
#importing various packages
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image, ImageOps
from sklearn import neighbors
from sklearn import naive_bayes
from sklearn.datasets import load_digits 
from sklearn import tree
from sklearn import svm
from sklearn import metrics  # for confusion matrix
from skimage.color import rgb2gray
import skimage
from skimage.io import imread
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler

- First, we are dealing with the output from the RPI. The RPI gives us: 
  - filename.jpg [['R' 'G' 'B'] %... (for the top 5 colors)
  - this removes extraneous characters and strings, and then saves it as a numpy array
  - It also creates the X and Y data separately, pulling the color from the input file name

In [0]:



def reader(inputfile):
    with open(inputfile,"r") as fin:
        marray= []
        for line in fin:
            array = []
            weights = []
            line = line.strip() #get rid of white space such as "\n"
            line_info = line.split()
            for element in line_info:
                for character in element:
                    if( character == "]" or character == "'" or character == "[" or character == ","):
                        element =element.replace(character,"")
                try:
                    array.append(float(element))
                except ValueError:
                    continue 
            #grab the percentages for each color for each image 
            weights.append(array[3])
            weights.append(array[7])
            weights.append(array[11])
            weights.append(array[15])
            weights.append(array[19])

            #pop the percentages out of the array 
            junk = array.pop(3)
            junk = array.pop(6)
            junk = array.pop(9)
            junk = array.pop(12)
            junk = array.pop(15)
            
            #apply the percentages as the weights 
            temp1 = [i * weights[0] for i in array[:3]]
            temp2 = [i * weights[1] for i in array[3:6]]
            temp3 = [i * weights[2] for i in array[6:9]]
            temp4 = [i * weights[3] for i in array[9:12]]
            temp5 = [i * weights[4] for i in array[12:15]]
            
            array = temp1 +temp2+temp3+temp4+temp5
            
            #print(array)
            marray.append(array)
    
        #print(marray)
        marray = np.asarray(marray)
        #print(marray.shape)
    yarray = []
    #Create the Y array, taking the color from the input file name. Assumes it is in format: <color>_colorinfo.txt
    for i in range(marray.shape[0]):
        yarray.append(f"{inputfile[:-14]}")
    return marray,yarray

    fin.close()

Now, we are reading in the data, using the function above to convert the txt file into a usable format

In [88]:
# reading in converted data
X_yellow,Y_yellow = reader('yellow_colorinfo.txt')
X_blue,Y_blue = reader('blue_colorinfo.txt')
X_green, Y_green = reader('green_colorinfo.txt')
X_pink, Y_pink = reader('pink_colorinfo.txt')
X_red, Y_red = reader('red_colorinfo.txt')
X_white, Y_white = reader('white_colorinfo.txt')
X_purple, Y_purple = reader('purple_colorinfo.txt')
X_black, Y_black = reader('black_colorinfo.txt')
X_brown, Y_brown = reader('brown_colorinfo.txt')

X_yellow.shape

(76, 15)

There are 76 images and 15 features for yellow, displayed above. 


Now, converting lists to arrays and printing the shapes, which tells us how many images we have for each of the given colors



In [97]:
# converting to arrays
Y_yellow = np.asarray(Y_yellow)
Y_blue = np.asarray(Y_blue)
Y_green = np.asarray(Y_green)
Y_pink = np.asarray(Y_pink)
Y_red = np.asarray(Y_red)
Y_white = np.asarray(Y_white)
Y_purple = np.asarray(Y_purple)
Y_black = np.asarray(Y_black)
Y_brown = np.asarray(Y_brown)


# should be (# pics, <blank>)
print(Y_yellow.shape)
print(Y_blue.shape)
print(Y_green.shape)
print(Y_pink.shape)
print(Y_red.shape)
print(Y_white.shape)
print(Y_purple.shape)
print(Y_black.shape)
print(Y_brown.shape)


(76,)
(329,)
(126,)
(228,)
(167,)
(420,)
(73,)
(291,)
(94,)


In [150]:
# now, splitting off part of the data to be testing data
# for now, just taking the first 10 values from each color [:10]

# now, combining the colors. This can be ommited as desired.
Y_test = []
Y_test = np.asarray(Y_test)

Y_test = np.append(Y_test,Y_yellow[:10])
Y_test = np.append(Y_test,Y_blue[:10])
Y_test = np.append(Y_test,Y_green[:10])
Y_test = np.append(Y_test,Y_pink[:10])
Y_test = np.append(Y_test,Y_red[:10])
Y_test = np.append(Y_test,Y_white[:10])
Y_test = np.append(Y_test,Y_purple[:10])
Y_test = np.append(Y_test,Y_black[:10])
Y_test = np.append(Y_test,Y_brown[:10])

#should be (# pictures, <blank>)
print(Y_test.shape)

# starting off list
X_test = X_yellow[:10]


# adding in colors
# axis = 0 option makes sure it adds rows
# X_train = np.append(X_train,X_yellow, axis = 0)
X_test = np.append(X_test,X_blue[:10], axis = 0)
X_test = np.append(X_test,X_green[:10], axis = 0)
X_test = np.append(X_test,X_pink[:10], axis = 0)
X_test = np.append(X_test,X_red[:10], axis = 0)
X_test = np.append(X_test,X_white[:10], axis = 0)
X_test = np.append(X_test,X_purple[:10], axis = 0)
X_test = np.append(X_test,X_black[:10], axis = 0)
X_test = np.append(X_test,X_brown[:10], axis = 0)

# should be (# pictures, 15 features)
print(X_test.shape)



(90,)
(90, 15)


In [91]:
# now, combining the colors. This can be ommited as desired.


#for the training set, we keep everything but the first 10 values, hence the [10:]
Y_train = []
Y_train = np.asarray(Y_train)

Y_train = np.append(Y_train,Y_yellow[10:])
Y_train = np.append(Y_train,Y_blue[10:])
Y_train = np.append(Y_train,Y_green[10:])
Y_train = np.append(Y_train,Y_pink[10:])
Y_train = np.append(Y_train,Y_red[10:])
Y_train = np.append(Y_train,Y_white[10:])
Y_train = np.append(Y_train,Y_purple[10:])
Y_train = np.append(Y_train,Y_black[10:])
Y_train = np.append(Y_train,Y_brown[10:])

#should be (# pictures, <blank>)
print(Y_train.shape)

# starting off list
X_train = X_yellow[10:]


# adding in colors
# axis = 0 option makes sure it adds rows
# X_train = np.append(X_train,X_yellow, axis = 0)
X_train = np.append(X_train,X_blue[10:], axis = 0)
X_train = np.append(X_train,X_green[10:], axis = 0)
X_train = np.append(X_train,X_pink[10:], axis = 0)
X_train = np.append(X_train,X_red[10:], axis = 0)
X_train = np.append(X_train,X_white[10:], axis = 0)
X_train = np.append(X_train,X_purple[10:], axis = 0)
X_train = np.append(X_train,X_black[10:], axis = 0)
X_train = np.append(X_train,X_brown[10:], axis = 0)

# should be (# pictures, 15 features)
print(X_train.shape)


(1714,)
(1714, 15)


For neural network, it needs the data to be normalized to have a mean of 0 and a stdev of 1. This is done using the standard scaler. This should not affect other classifiers, but regardless we are mainly focused on MLP classifier.

In [0]:
#resizing
#sets mean = 0, stdev = 1
scaler = StandardScaler()

scaler.fit(X_train)
X_train = scaler.transform(X_train)


# will need to do this for prediction data
# X_pred = scaler.transform(X_pred)

#hurray! now we are all ready to rumble

Now, training the neural network. We tried many different options, including the solver type, activation method, alpha value, size of hidden layers, and the maximum interations



In [158]:
# setting up a neural network for yellow

# lbfgs gives score of 0.23
# adams gives score of 0.2
# sgd gives score of 0.377

# roy = solver='lbfgs',activation="tanh", alpha=1e-3,hidden_layer_sizes=(i,((i+2)^2-1)), random_state=42

# can mess with these options some
clf = MLPClassifier(solver='sgd',activation="tanh", alpha=1e-3,hidden_layer_sizes=100, random_state=42,max_iter=2000)

clf.fit(X_train,Y_train)





MLPClassifier(activation='tanh', alpha=0.001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=100, learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=2000,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=42, shuffle=True, solver='sgd',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [159]:
# just testing it out here


# print(clf.predict(X_test))
# print(Y_test)
print(f'test score = {clf.score(X_test,Y_test)}')
print(f'train score = {clf.score(X_train,Y_train)}')
# print(clf.predict_proba(X_test))

test score = 0.37777777777777777
train score = 0.5315052508751459


MLP did pretty well compared to other attempts, so we are keeping this as our best attempt. Below, I tried a few different Methods. 


# Tree

- did not work well
- the 0.11 is just as good as a random guess as there are 9 colors

In [142]:
# tree did not work well
clf2 = tree.DecisionTreeClassifier(max_depth=5)

clf2.fit(X_train,Y_train)

print(clf2.score(X_test,Y_test))

0.1111111111111111


# Neighbors

 - Usually, this one does great! But, it was not the case here.
 - Not much better than the tree.

In [104]:
# neighbor is baad

clf3 = neighbors.KNeighborsClassifier(n_neighbors=7)

clf3.fit(X_train,Y_train)
print(clf3.score(X_test,Y_test))


0.15555555555555556


# Naive Bayes

- same accuracy as neighbors
- still not nearly as good as a neural network

In [105]:
# bayes 2nd to neural net

clf4 = naive_bayes.GaussianNB()

clf4.fit(X_train,Y_train)

print(clf4.score(X_test,Y_test))


0.15555555555555556


# Below this, I am just using clf to make some predictions

In [109]:
# testing on an image

X_test, junk = reader('pic.jpg_testing_colorinfo.txt')

X_test = scaler.transform(X_test)

image = Image.open('pic.jpg')
image.show()
print(clf.predict(X_test))

X_test.shape

['red' 'black']


(2, 15)

In [145]:
# testing on an image

X_test, junk = reader('testpic_1.jpg_testing_colorinfo.txt')
X_test = scaler.transform(X_test)

print(clf.predict(X_test))



['blue']


In [146]:
# testing on an image

X_test, junk = reader('testpic_2.jpg_testing_colorinfo.txt')
X_test = scaler.transform(X_test)

print(clf.predict(X_test))



['white']


In [148]:
# testing on an image

X_test, junk = reader('testpic_3.jpg_testing_colorinfo.txt')
X_test = scaler.transform(X_test)

print(clf.predict(X_test))


['green']


In [147]:
# testing on a blue image of just blue
X_test, junk = reader('bluepic.jpg_testing_colorinfo.txt')

X_test = scaler.transform(X_test)

print(clf.predict(X_test))


['purple']


# Now, trying to make separate classifiers

- This was our initial plan, but it did not pan out well. When we train on all of the same color, then the classifier thinks that everything is that color, so these are not useful for anything unfortunately.
- There is potential to use pairings of colors, so in a way use a decision tree between colors, but each decision is made by a neural network.

In [111]:
clf_yellow = MLPClassifier(solver = 'adam', random_state=1,max_iter = 3000)
clf_blue = MLPClassifier(solver = 'adam', random_state=1,max_iter = 3000)
clf_green = MLPClassifier(solver = 'adam', random_state=1,max_iter = 3000)
clf_pink = MLPClassifier(solver = 'adam', random_state=1,max_iter = 3000)
clf_red = MLPClassifier(solver = 'adam', random_state=1,max_iter = 3000)
clf_white = MLPClassifier(solver = 'adam', random_state=1,max_iter = 3000)
clf_purple = MLPClassifier(solver = 'adam', random_state=1,max_iter = 3000)
clf_black = MLPClassifier(solver = 'adam', random_state=1,max_iter = 3000)
clf_brown = MLPClassifier(solver = 'adam', random_state=1,max_iter = 3000)


clf_yellow.fit(X_yellow,Y_yellow)
clf_blue.fit(X_blue,Y_blue)
clf_green.fit(X_green,Y_green)
clf_pink.fit(X_pink,Y_pink)
clf_red.fit(X_red,Y_red)
clf_white.fit(X_white,Y_white)
clf_purple.fit(X_purple,Y_purple)
clf_black.fit(X_black,Y_black)
clf_brown.fit(X_brown,Y_brown)






MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=3000,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=1, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [0]:
# yellow thinks all is yellow

# print(clf_yellow.predict(X_test))