In [7]:
import numpy as np
#import pandas as pd

from tensorflow.keras.layers import Conv2D, BatchNormalization, MaxPool2D, Flatten, Dense, Input, ReLU, Dropout
import tensorflow.keras as keras
from tensorflow.keras import Model
from tensorflow.keras.callbacks import ModelCheckpoint
from PIL import Image

from ultralytics import YOLO

In [2]:
#import data
#read the data
#The data is read from the directory and each batch can be thought of as a set of labels and datapoints
#Might need to change the directory depending on where

#50
train = keras.utils.image_dataset_from_directory(
    directory='../../data/Faces',
    labels='inferred',
    label_mode='categorical',
    batch_size=140,
    image_size=(1000, 1000),
    validation_split = 0.2,
    subset = 'training',
    seed = 50
    )

validation = keras.utils.image_dataset_from_directory(
    directory='../../data/Faces',
    labels='inferred',
    label_mode='categorical',
    batch_size=140,
    image_size=(1000, 1000),
    validation_split = 0.2,
    subset = 'validation',
    seed = 50
    )

Found 140 files belonging to 6 classes.
Using 112 files for training.
Found 140 files belonging to 6 classes.
Using 28 files for validation.


In [4]:
#convert images to numpy
train_processed_numpy = train.as_numpy_iterator()
validation_processed_numpy = validation.as_numpy_iterator()

X_train, y_train = train_processed_numpy.next()
X_val, y_val = validation_processed_numpy.next()

In [18]:
#convert the images to cropped faces
face_classifier = YOLO("../../app/src/face_detection/model/yolov8n-face.pt")

X_cropped_train = []
y_train_cropped = []

for i in range(112):
    face = X_train[i]
    faces = face_classifier.predict(face)
    boxes = faces[0].boxes.xyxy.tolist()
    if boxes:
        
        #extract the face based on the output from the YOLOv8 model        
        left, bottom, right, top = boxes[0]
        cropped_face = face[int(bottom):int(top), int(left):int(right)]
        #resize cropped face to a std shape, 100x100 for now but can adjust this
        pil_face = Image.fromarray(np.uint8(cropped_face))
        pil_face = pil_face.resize((75, 75))
        numpy_cropped_face = np.array(pil_face)
        #append this to the new list containing all cropped faces
        X_cropped_train.append(numpy_cropped_face)
        y_train_cropped.append(y_train[i])

X_val_cropped = []
y_val_cropped = []

for i in range(28):
    face = X_val[i]
    faces = face_classifier.predict(face)
    boxes = faces[0].boxes.xyxy.tolist()
    if boxes:
        
        #extract the face based on the output from the YOLOv8 model        
        left, bottom, right, top = boxes[0]
        cropped_face = face[int(bottom):int(top), int(left):int(right)]
        #resize cropped face to a std shape, 100x100 for now but can adjust this
        pil_face = Image.fromarray(np.uint8(cropped_face))
        pil_face = pil_face.resize((75, 75))
        numpy_cropped_face = np.array(pil_face)
        #append this to the new list containing all cropped faces
        X_val_cropped.append(numpy_cropped_face)
        y_val_cropped.append(y_val[i])



0: 640x640 1 face, 125.9ms
Speed: 8.6ms preprocess, 125.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 86.9ms
Speed: 14.1ms preprocess, 86.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 87.4ms
Speed: 15.0ms preprocess, 87.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 82.9ms
Speed: 8.0ms preprocess, 82.9ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 86.0ms
Speed: 7.0ms preprocess, 86.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 99.4ms
Speed: 7.0ms preprocess, 99.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 93.1ms
Speed: 7.0ms preprocess, 93.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 89.9ms
Speed: 7.0ms preprocess, 89.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 

In [19]:
#convert data into neural net input format

X_cropped_train = np.array(X_cropped_train)
y_train_cropped = np.array(y_train_cropped)
X_val_cropped = np.array(X_val_cropped)
y_val_cropped = np.array(y_val_cropped)

In [23]:
#first convolutional layer
#source of archi: https://cs230.stanford.edu/projects_winter_2020/reports/32610274.pdf

default_filters = 64

input_x = Input(shape = (75, 75, 3))

conv_1 = Conv2D(filters = default_filters, kernel_size = (3, 3), name = 'conv_1', padding = 'same',activation='relu' ,data_format='channels_last')(input_x)
batch_norm1 = BatchNormalization(name = 'batch_norm1')(conv_1)
conv_12 = Conv2D(filters = default_filters, kernel_size = (3, 3), name = 'conv_11', padding = 'same', activation = 'relu')(batch_norm1)#max_pooling
batch_norm12 = BatchNormalization(name = 'batch_norm12')(conv_12)
max_pool1 = MaxPool2D(name = 'max_pool1', pool_size = (2,2), strides = (2,2))(batch_norm12)
drop_out1 = Dropout(0.5)(max_pool1)

#second convolutional layer 
conv2 = Conv2D(filters = 2 * default_filters, kernel_size = (3,3), name = 'conv_2', padding = 'same', activation = 'relu')(drop_out1)
batch_norm2 = BatchNormalization(name = 'batch_norm2')(conv2)
conv21 = Conv2D(filters = 2 * default_filters, kernel_size = (3,3), name = 'conv_21', padding = 'same', activation = 'relu')(conv2)
batch_norm21 = BatchNormalization(name = 'batch_norm21')(conv21)
max_pool2 = MaxPool2D(name = 'max_pool2', pool_size = (2,2), strides = (2,2))(batch_norm21)
dropout_2 = Dropout(0.5)(max_pool2)




#third convolutional layer
conv3 = Conv2D(filters = 2 * 2 * default_filters, kernel_size = (3,3), name = 'conv_3', padding = 'same', activation = 'relu')(dropout_2)
batch_norm3 = BatchNormalization(name = 'batch_norm3')(conv3)
conv31 = Conv2D(filters = 2 * 2 * default_filters, kernel_size = (3,3), name = 'conv_31', padding = 'same', activation = 'relu')(batch_norm3)
batch_norm31 = BatchNormalization(name = 'batch_norm31')(conv31)
max_pool3 = MaxPool2D(name = 'max_pool3', pool_size = (2,2), strides = (2,2))(batch_norm31)
dropout_3 = Dropout(0.5)(max_pool3)

#fourth convolutional layer
conv4 = Conv2D(filters = 2 * 2 * 2 * default_filters, kernel_size = (3,3), name = 'conv_4', padding = 'same', activation = 'relu')(dropout_3)
batch_norm4 = BatchNormalization(name = 'batch_norm4')(conv4)
conv41 = Conv2D(filters = 2 *2 * 2 * default_filters, kernel_size = (3,3), name = 'conv_41', padding = 'same', activation = 'relu')(batch_norm4)
batch_norm41 = BatchNormalization(name = 'batch_norm41')(conv41)
max_pool4 = MaxPool2D(name = 'max_pool4', pool_size = (2,2), strides = (2,2))(batch_norm41)
dropout_4 = Dropout(0.5)(max_pool4)
#dense layer

flat = Flatten()(dropout_4)

dense1 = Dense(256, activation = 'relu', name = 'dense_1')(flat)
drop_out_dense1 = Dropout(0.4)(dense1)
dense2 = Dense(128, activation = 'relu', name = 'dense_2')(drop_out_dense1)
drop_out_dense2 = Dropout(0.4)(dense2)
#final softmax layer
dense3 = Dense(64, activation = 'relu', name = 'dense_3')(drop_out_dense2)
drop_out_dense3 = Dropout(0.5)(dense3)

#6 outputs for the 6 classes
output = Dense(6, activation = 'softmax', name = 'output')(drop_out_dense3)


# In[11]:

In [24]:
checkpoint = ModelCheckpoint(filepath = 'cnn_scratch.keras')


# In[ ]:


model = Model(input_x, output)

model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['acc'])

model.fit(x = X_cropped_train, y = y_train_cropped, epochs = 100, validation_data = (X_val_cropped, y_val_cropped), callbacks = [checkpoint])

Epoch 1/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2s/step - acc: 0.2029 - loss: 6.3720 - val_acc: 0.1429 - val_loss: 6.0598
Epoch 2/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2s/step - acc: 0.1852 - loss: 6.2021 - val_acc: 0.2143 - val_loss: 18.3992
Epoch 3/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2s/step - acc: 0.1353 - loss: 4.8490 - val_acc: 0.1071 - val_loss: 43.3650
Epoch 4/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2s/step - acc: 0.1634 - loss: 4.3505 - val_acc: 0.1429 - val_loss: 53.0017
Epoch 5/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2s/step - acc: 0.1821 - loss: 3.6945 - val_acc: 0.1786 - val_loss: 51.2622
Epoch 6/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1s/step - acc: 0.2175 - loss: 3.6910 - val_acc: 0.1786 - val_loss: 79.2918
Epoch 7/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2s/step - acc: 0.1889 - loss: 3.2

<keras.src.callbacks.history.History at 0x1b062594ac0>