In [1]:
# Import necessary libraries
import warnings
import cv2
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
import os
import numpy as np
import tensorflow
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import precision_score, recall_score
from colorthief import ColorThief as ct

In [2]:
# the following fucntion returns the data like it was before splitting
def unsplitData():
    global dataDir
    for x in os.listdir(dataDir):
        # directory of each color
        dir = dataDir + '\\' + x
        # directory of each color but in the test directory
        test_file_dir = dir + "\\test_images\\"
        # all the directories of the old paths
        imagesOld = []
        # all the directories of the new paths
        imagesNew = []
        # counts the number of files in test_images folder
        counter = 0
        # iterates through the test_images directory
        for y in os.listdir(test_file_dir):
            # appends the new path of the image to a list
            imagesOld.append(dir + "\\" + y)
            # appends the current path of the image to a list
            imagesNew.append(test_file_dir + y)
            # increments the number of images by one
            counter += 1
        # moves the images back from the test_images directory
        for z in range(counter):
            os.rename((imagesNew[z]), (imagesOld[z]))
        # deletes the test_images directory from each color directory
        for iterator in os.listdir(dir):
            try:
                os.removedirs(test_file_dir)
            except FileNotFoundError as e:
                continue

In [3]:
# Ignore warnings
warnings.simplefilter('ignore')

# Create a Pandas DataFrame to store color data for train and test
# Change the columns type to int because rgb values are int
data_train = pd.DataFrame({'r':[],'g':[],'b':[],'color':[]})
data_train = data_train.astype({'r':int,'g':int,'b':int})

data_test = pd.DataFrame({'r':[],'g':[],'b':[],'color':[]})
data_test = data_test.astype({'r':int,'g':int,'b':int})

# Define the directory containing the training dataset
dataDir = r"C:\Users\DELL\Desktop\final\training_dataset"

In [4]:
# if the program was shutdown due to any reason while the data was still splitted, it will cause an error when it runs again.
# we check if the data is splitted or no, if yes return it back to normal to continue the program
if 'test_images' in os.listdir(dataDir + '\\' + 'black'):
    unsplitData()

In [5]:
# to split data into training and testing data (17 for training while 8 for testing)
# iterates through the dataset directory
for x in os.listdir(dataDir):
    # directory of each color
    dir = dataDir + '\\' + x
    # directory of each color but in the test directory
    test_file_dir = dir + "\\test_images\\"
    # all the directories of the old paths
    imagesOld = []
    # all the directories of the new paths
    imagesNew = []
    # iterates through the directories of the colors
    for y in os.listdir(dir):
        # makes directory of the test images
        os.makedirs(test_file_dir, exist_ok=True)
        # appends the current path of the image to a list
        imagesOld.append(dir + "\\" + y)
        # appends the new path of the image to a list
        imagesNew.append(test_file_dir + y)
    # generate list of the first 8 integers
    added = range(0, 8)
    # moves 8 images to the new test directory 
    for z in added:
        os.rename((imagesOld[z]), (imagesNew[z]))

In [6]:
# the outer loop that iterates through all the folders in the main directory
iterator = 0
for x in os.listdir(dataDir):
    dir = dataDir + '\\' + x
    number_of_colors = len(os.listdir(dataDir))
    print(iterator,'out of',number_of_colors,'finished')
    iterator += 1
    
    # the inner loop that iterates through all the pictures in a single directory in the main directory and take its color, store it in the data frame
    for y in os.listdir(dir):
        if y == "test_images":
            dir2 = dir + "\\" + y
            # loop to iterate through test images folder for data tester
            for z in os.listdir(dir2):
                color_test = ct(dir2 + '\\' + z).get_color()
                color_test = pd.DataFrame({'r':[color_test[0]],'g':[color_test[1]],'b':[color_test[2]],'color':x})
                data_test = data_test.append(color_test,ignore_index=True)
            continue

        color_train = ct(dir + '\\' + y).get_color()
        color_train = pd.DataFrame({'r':[color_train[0]],'g':[color_train[1]],'b':[color_train[2]],'color':x})
        data_train = data_train.append(color_train,ignore_index=True)

print(iterator,'out of',number_of_colors,'finished')

0 out of 10 finished
1 out of 10 finished
2 out of 10 finished
3 out of 10 finished
4 out of 10 finished
5 out of 10 finished
6 out of 10 finished
7 out of 10 finished
8 out of 10 finished
9 out of 10 finished
10 out of 10 finished


In [7]:
# Print the DataFrame
print(data_train.head().to_string())
print('"...."')
print(data_train.tail().to_string( header=False))

    r   g   b  color
0   4   4   4  black
1   4   4   4  black
2  12  12  12  black
3  12   4   4  black
4  20  20  20  black
"...."
165  252  180  12  yellow
166  252  252   4  yellow
167  252  164   4  yellow
168  252  220  92  yellow
169  244  188  36  yellow


In [8]:
# Print the DataFrame
print(data_test.head().to_string())
print('"...."')
print(data_test.tail().to_string( header=False))

    r   g   b  color
0  12   4   4  black
1  20   4  20  black
2  28  28  20  black
3  44  44  44  black
4  28  28  36  black
"...."
75  252  252   92  yellow
76  252  220   92  yellow
77  252  252  124  yellow
78  252  228    4  yellow
79  252  252  124  yellow


In [9]:
# plots all the colors extracted from each image. We don't recommend using it because it will plot all the data one by one. we only used it to check if we extracted the correct colors
# if you want to try the function yourself and see what it does, all you need is to write the name of the data frame as an argument
# note that there are two dataframes. one for test and one for train
def plotAllData(data):
    for x in range(0,len(data)):
        plt.imshow([[(data.r[x],data.g[x],data.b[x])]])
        plt.title(data.color[x])
        plt.show()
        
# plotAllData(data_test)

In [10]:
# Split the data into input (X) and output (y) variables
data_all = data_train.append(data_test)
x = data_all[['r','g','b']]
y = data_all[['color']]

X_train = data_train[['r','g','b']]
y_train = data_train[['color']]

X_test = data_test[['r', 'g', 'b']]
y_test = data_test[['color']]

# Encoding the variables
y_encoded = pd.get_dummies(y)
y_train = pd.get_dummies(y_train)
y_test = pd.get_dummies(y_test)
y_encoded

Unnamed: 0,color_black,color_blue,color_brown,color_green,color_grey,color_orange,color_red,color_violet,color_white,color_yellow
0,1,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
75,0,0,0,0,0,0,0,0,0,1
76,0,0,0,0,0,0,0,0,0,1
77,0,0,0,0,0,0,0,0,0,1
78,0,0,0,0,0,0,0,0,0,1


In [11]:
unsplitData()

In [12]:
# Build the first model using a neural network with a softmax output layer
print('first model', '-'*50)
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(y_encoded.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test)
print('Test accuracy: %.2f' % (accuracy*100))


first model --------------------------------------------------
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test accuracy: 73.75


In [13]:
# Calculate precision and recall scores
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test.values, axis=1)
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
print('Precision: %.2f' % precision)
print('Recall: %.2f' % recall)

Precision: 0.73
Recall: 0.74


In [14]:
# Generate a classification report
target_names = y_test.columns
print(classification_report(y_true, y_pred, target_names=target_names))

              precision    recall  f1-score   support

 color_black       1.00      0.25      0.40         8
  color_blue       0.80      1.00      0.89         8
 color_brown       0.78      0.88      0.82         8
 color_green       1.00      0.75      0.86         8
  color_grey       0.42      1.00      0.59         8
color_orange       0.83      0.62      0.71         8
   color_red       0.73      1.00      0.84         8
color_violet       0.88      0.88      0.88         8
 color_white       0.00      0.00      0.00         8
color_yellow       0.89      1.00      0.94         8

    accuracy                           0.74        80
   macro avg       0.73      0.74      0.69        80
weighted avg       0.73      0.74      0.69        80



In [15]:
# Build and train the logistic regressio 
print('second model', '-'*50)

# Convert the categorical variable y back to its original form for the second model
y_train = y_train.idxmax(axis=1)
y_test = y_test.idxmax(axis=1)

# Make the model and train it
Logisticmodel = LogisticRegression(solver = 'liblinear',random_state=0)
Logisticmodel.fit(X_train,y_train)

# Predict the data and calculate the metrics of the model
y_pred = Logisticmodel.predict(X_test)
score = Logisticmodel.score(X_test,y_test)
conf_m = confusion_matrix(y_test,y_pred)
report = classification_report(y_test,y_pred)

print('Accuracy: ', score*100)
print('Confusion matrix:\n', conf_m)
print('Report: \n', report)

second model --------------------------------------------------
Accuracy:  87.5
Confusion matrix:
 [[8 0 0 0 0 0 0 0 0 0]
 [0 8 0 0 0 0 0 0 0 0]
 [0 0 7 0 0 1 0 0 0 0]
 [0 1 0 7 0 0 0 0 0 0]
 [1 0 0 0 7 0 0 0 0 0]
 [0 0 2 0 0 6 0 0 0 0]
 [1 0 0 0 0 0 7 0 0 0]
 [0 0 0 0 1 0 1 6 0 0]
 [0 0 0 0 0 0 0 0 8 0]
 [0 0 0 0 0 0 0 0 2 6]]
Report: 
               precision    recall  f1-score   support

 color_black       0.80      1.00      0.89         8
  color_blue       0.89      1.00      0.94         8
 color_brown       0.78      0.88      0.82         8
 color_green       1.00      0.88      0.93         8
  color_grey       0.88      0.88      0.88         8
color_orange       0.86      0.75      0.80         8
   color_red       0.88      0.88      0.88         8
color_violet       1.00      0.75      0.86         8
 color_white       0.80      1.00      0.89         8
color_yellow       1.00      0.75      0.86         8

    accuracy                           0.88        80
   macro a

In [16]:
# test the model on an image of your choice.
testImg = ct(r"C:\Users\DELL\Desktop\Youtube_logo.png").get_color()
print(Logisticmodel.predict([[testImg[0],testImg[1],testImg[2]]])[0])
print(target_names[np.argmax(model.predict([[testImg[0],testImg[1],testImg[2]]]),axis=1)][0])

color_red
color_red
