In [1]:
#required Libraries
import pandas as pd
import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score


In [2]:
#Preprocess training set
#splits eye_color and image name out of the training data and forms independent dataframe out of them
data=pd.read_csv('../Datasets/cartoon_set/labels.csv')

labelsTrain = data["\teye_color\tface_shape\tfile_name"].str.split(pat="\t", n=-1, expand=True)
labelsTrain.drop(columns =[0,2], inplace = True)
labelsTrain.columns = [ "eye_color" , "img_name"]
labelsTrain = labelsTrain.astype({'eye_color': 'int32'})
labelsTrain = labelsTrain.astype({'img_name': 'string'})

#Preprocess testing set
#splits eye_color and image name out of the testing data and forms independent dataframe out of them
data=pd.read_csv('../Datasets/cartoon_set_test/labels.csv')

labelsTest = data["\teye_color\tface_shape\tfile_name"].str.split(pat="\t", n=-1, expand=True)
labelsTest.drop(columns =[0,2], inplace = True)
labelsTest.columns = [ "eye_color" , "img_name"]
labelsTest = labelsTest.astype({'eye_color': 'int32'})
labelsTest = labelsTest.astype({'img_name': 'string'})

In [3]:
#Load training set################################
#reads image data from training set ,resises it, flattens it , and stores it into imageTrain array 
imagesTrain = np.zeros((10000, 50 , 50, 3)) 

for i in range(0,10000):
    image = cv.imread('../Datasets/cartoon_set/img/' + labelsTrain["img_name"][i])
    image = image[230:290,180:320] # cut 95 from left / right sides , 75 from top, 80 from the bottom
    image = cv.resize(image, dsize=(50 , 50), interpolation=cv.INTER_CUBIC)
    imagesTrain[i] = image
    
imagesTrain = imagesTrain.reshape(10000,7500)    
imagesTrain = pd.DataFrame(imagesTrain) 


#Load Testing set################################
#reads image data from Testing set ,resises it, flattens it , and stores it into imageTest array 
imagesTest = np.zeros((2500, 50 , 50, 3))

for i in range(0,2500):
    image = cv.imread('../Datasets/cartoon_set_test/img/' + labelsTest["img_name"][i])
    image = image[230:290,180:320]
    image = cv.resize(image, dsize=(50 , 50), interpolation=cv.INTER_CUBIC)
    imagesTest[i] = image
    
    
imagesTest = imagesTest.reshape(2500,7500)   
imagesTest = pd.DataFrame(imagesTest) 

In [9]:
#Convolves flattened data of imageTrain and ImageTest with 1D 3 size filter and then stores it into x_train and x_test array respectively
x_train=np.zeros((10000,7502))
for i in range(0,10000):
    x_train[i]=np.convolve(imagesTrain.iloc[i,:],[0.066,-5.599,5.566])
    
x_test=np.zeros((2500,7502))
for i in range(0,2500):
    x_test[i]=np.convolve(imagesTest.iloc[i,:],[0.066,-5.599,5.566])

x_train = pd.DataFrame(x_train) 
x_test = pd.DataFrame(x_test)


In [14]:
#runs knn algorithem for n_neighbor values between 100 and 1000 in steps of 100
for i in range(100,1000,100):
    model = KNeighborsClassifier(n_neighbors=i, weights = 'distance' , n_jobs=-1)
    model.fit(x_train,labelsTrain['eye_color'])
    print(model.score(x_test,labelsTest['eye_color']))

0.8152
0.8272
0.8292
0.8308
0.8296
0.8324
0.8312
0.8344
0.8304


In [19]:
# sets hyper parameter testing for cross validation
hyper_params = {
    'weights' : ['uniform', 'distance'],
    'n_neighbors' : [100, 300, 500, 800, 1000],
    'p' : [1 , 2],

}

In [20]:
#defines crossvalidation function
model = KNeighborsClassifier(n_jobs = -1)
grid=GridSearchCV(model , param_grid=hyper_params , cv=10 , n_jobs=-1 ,  verbose=10)

In [21]:
#starts cross validation
grid.fit(x_train,labelsTrain['eye_color'])

Fitting 10 folds for each of 20 candidates, totalling 200 fits


In [22]:
#cross valdiation best estimator
grid.best_estimator_


In [23]:
#cross validation best score
grid.best_score_

0.8322999999999998

In [24]:
model = KNeighborsClassifier(n_jobs=-1, n_neighbors=100, p=1, weights='distance')
model.fit(x_train,labelsTrain['eye_color'])
print(model.score(x_test,labelsTest['eye_color']))

0.8236


In [3]:
#required Libraries
import tensorflow as tf
from tensorflow import keras
from keras import Sequential
from keras.layers import Dense,Conv2D,MaxPooling2D,Flatten
print("num of gpus available: ",len(tf.config.experimental.list_physical_devices('GPU')))
from keras.utils import normalize,to_categorical
from keras.preprocessing.image import ImageDataGenerator
tf.config.run_functions_eagerly(True)

num of gpus available:  1


In [4]:
#Load training set for CNN################################
#reads image data from training set ,resises it
imagesTrain = np.zeros((10000, 150 , 150, 3)) 

for i in range(0,10000):
    image = cv.imread('C:/Users/X99S5/Documents/AMLS_22-23_SN18046828/Datasets/cartoon_set/img/' + labelsTrain["img_name"][i])
    image = image[75:420,95:405] # cut 95 from left / right sides , 75 from top, 80 from the bottom
    image = cv.resize(image, dsize=(150 , 150), interpolation=cv.INTER_CUBIC)

    imagesTrain[i] = image



#Load Testing set for CNN################################
#reads image data from testing set ,resises it
imagesTest = np.zeros((2500, 150 , 150, 3))

for i in range(0,2500):
    image = cv.imread('C:/Users/X99S5/Documents/AMLS_22-23_SN18046828/Datasets/cartoon_set_test/img/' + labelsTest["img_name"][i])
    image = image[75:420,95:405]
    image = cv.resize(image, dsize=(150 , 150), interpolation=cv.INTER_CUBIC)

    imagesTest[i] = image
    
#normalises image data and one hot encodes the labels
labelsTrain = labelsTrain['eye_color']
labelsTest = labelsTest['eye_color']

imagesTrain = normalize(imagesTrain,axis=1)
imagesTest = normalize(imagesTest,axis=1)

labelsTrain = to_categorical(labelsTrain)
labelsTest = to_categorical(labelsTest)

#creates generator functions
datagen_train = ImageDataGenerator()
#needed to save memory

datagen_Test = ImageDataGenerator()

In [7]:
#CNN architecture definition
model = Sequential()

model.add(Conv2D(32,kernel_size=(3,3),padding='valid',activation='relu',input_shape=(150,150,3)))
model.add(MaxPooling2D(pool_size=(2,2),strides=2,padding='valid'))

model.add(Conv2D(64,kernel_size=(3,3),padding='valid',activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2),strides=2,padding='valid'))

model.add(Conv2D(128,kernel_size=(3,3),padding='valid',activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2),strides=2,padding='valid'))

model.add(Flatten())

model.add( Dense(128,activation='relu') )
model.add( Dense(64,activation='relu') )
model.add( Dense(5,activation='softmax') )


In [8]:
#prints model summary
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 148, 148, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 74, 74, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 72, 72, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 36, 36, 64)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 34, 34, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 17, 17, 128)      0

In [9]:
#sets CNN model parameters
model.compile(optimizer='rmsprop',loss='categorical_crossentropy',metrics=['accuracy'])

In [10]:
#Start CNN fit function
history = model.fit( 
    datagen_train.flow(imagesTrain, labelsTrain, batch_size=100),
    epochs=20,
    validation_data=datagen_Test.flow(imagesTest, labelsTest, batch_size=100),
)



Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
