In [3]:
import numpy as np
#import pandas as pd

from tensorflow.keras.layers import Conv2D, BatchNormalization, MaxPool2D, Flatten, Dense, Input, ReLU, Dropout
import tensorflow.keras as keras
from tensorflow.keras import Model
from tensorflow.keras.callbacks import ModelCheckpoint
from PIL import Image

from ultralytics import YOLO

# Convolutional Neural Net

In this notebook, we build a CNN from scratch. We use 4 convolutional layers and a fully connected network at the end that is 3 layers deep. We also added significant regularization in the form of batch normalization and dropout to reduce overfitting. 

In [4]:
#Reduce the image size to facilitate the 
train = keras.utils.image_dataset_from_directory(
    directory='../../data/Webcam Images',
    labels='inferred',
    label_mode='categorical',
    batch_size=16,
    image_size=(500, 500),
    validation_split = 0.2,
    subset = 'training',
    seed = 42    
    )

validation = keras.utils.image_dataset_from_directory(
    directory='../../data/Webcam Images',
    labels='inferred',
    label_mode='categorical',
    batch_size=16,
    image_size=(500, 500),
    validation_split = 0.2,
    subset = 'validation',
    seed = 42   
    )

Found 164 files belonging to 3 classes.
Using 132 files for training.
Found 164 files belonging to 3 classes.
Using 32 files for validation.


In [5]:
#first convolutional layer
#source of archi: https://cs230.stanford.edu/projects_winter_2020/reports/32610274.pdf

default_filters = 32

input_x = Input(shape = (500, 500, 3))

conv_1 = Conv2D(filters = default_filters, kernel_size = (3, 3), name = 'conv_1', padding = 'same',activation='relu' ,data_format='channels_last')(input_x)
batch_norm1 = BatchNormalization(name = 'batch_norm1')(conv_1)
max_pool1 = MaxPool2D(name = 'max_pool1', pool_size = (2,2), strides = (2,2))(batch_norm1)
drop_out1 = Dropout(0.25)(max_pool1)

#second convolutional lyer 
conv2 = Conv2D(filters = 2 * default_filters, kernel_size = (3,3), name = 'conv_2', padding = 'same', activation = 'relu')(drop_out1)
batch_norm2 = BatchNormalization(name = 'batch_norm2')(conv2)
max_pool2 = MaxPool2D(name = 'max_pool2', pool_size = (2,2), strides = (2,2))(batch_norm2)
dropout_2 = Dropout(0.25)(max_pool2)




#third convolutional layer
conv3 = Conv2D(filters = 2 * 2 * default_filters, kernel_size = (3,3), name = 'conv_3', padding = 'same', activation = 'relu')(dropout_2)
batch_norm3 = BatchNormalization(name = 'batch_norm3')(conv3)

max_pool3 = MaxPool2D(name = 'max_pool3', pool_size = (2,2), strides = (2,2))(batch_norm3)
dropout_3 = Dropout(0.25)(max_pool3)

#third convolutional layer
conv4 = Conv2D(filters = 2 * 2 * 2 * default_filters, kernel_size = (3,3), name = 'conv_4', padding = 'same', activation = 'relu')(dropout_2)
batch_norm4 = BatchNormalization(name = 'batch_norm4')(conv4)

max_pool4 = MaxPool2D(name = 'max_pool4', pool_size = (2,2), strides = (2,2))(batch_norm4)
dropout_4 = Dropout(0.25)(max_pool4)


flat = Flatten()(dropout_3)

dense1 = Dense(256, activation = 'relu', name = 'dense_1')(flat)
drop_out_dense1 = Dropout(0.25)(dense1)
dense2 = Dense(128, activation = 'relu', name = 'dense_2')(drop_out_dense1)
drop_out_dense2 = Dropout(0.25)(dense2)
#final softmax layer
dense3 = Dense(64, activation = 'relu', name = 'dense_3')(drop_out_dense2)
drop_out_dense3 = Dropout(0.25)(dense3)

#3 outputs for the 3 classes
output = Dense(3, activation = 'softmax', name = 'output')(drop_out_dense3)


# In[11]:

In [4]:
# In[ ]:

#Re running may give different results due to initialization
model = Model(input_x, output)

model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['acc'])

model.fit(train, epochs = 40, validation_data = validation)

Epoch 1/40


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 3s/step - acc: 0.3058 - loss: 51.4915 - val_acc: 0.4333 - val_loss: 276.6225
Epoch 2/40
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 3s/step - acc: 0.3894 - loss: 76.5356 - val_acc: 0.4000 - val_loss: 188.4017
Epoch 3/40
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 3s/step - acc: 0.5275 - loss: 21.8435 - val_acc: 0.2333 - val_loss: 151.2392
Epoch 4/40
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 3s/step - acc: 0.5496 - loss: 17.3094 - val_acc: 0.3000 - val_loss: 90.3327
Epoch 5/40
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 3s/step - acc: 0.5511 - loss: 17.8779 - val_acc: 0.3000 - val_loss: 84.0967
Epoch 6/40
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 3s/step - acc: 0.5014 - loss: 15.8931 - val_acc: 0.3333 - val_loss: 24.9613
Epoch 7/40
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2s/step - acc: 0.6987 - loss: 12.1

<keras.src.callbacks.history.History at 0x203f16f2320>

# Face cropping with CNN

In [2]:
#import data
#read the data
#The data is read from the directory and each batch can be thought of as a set of labels and datapoints
#Might need to change the directory depending on where

#50
train = keras.utils.image_dataset_from_directory(
    directory='../../data/Webcam Images',
    labels='inferred',
    label_mode='categorical',
    batch_size=140,
    image_size=(1000, 1000),
    validation_split = 0.2,
    subset = 'training',
    seed = 42,
   
    )

validation = keras.utils.image_dataset_from_directory(
    directory='../../data/Webcam Images',
    labels='inferred',
    label_mode='categorical',
    batch_size=140,
    image_size=(1000, 1000),
    validation_split = 0.2,
    subset = 'validation',
    seed = 42,

    )

Found 154 files belonging to 3 classes.
Using 124 files for training.
Found 154 files belonging to 3 classes.
Using 30 files for validation.


In [3]:
#convert images to numpy
train_processed_numpy = train.as_numpy_iterator()
validation_processed_numpy = validation.as_numpy_iterator()

X_train, y_train = train_processed_numpy.next()
X_val, y_val = validation_processed_numpy.next()

In [4]:
#convert the images to cropped faces
face_classifier = YOLO("../../app/src/face_detection/model/yolov8n-face.pt")

X_cropped_train = []
y_train_cropped = []
n_training = X_train.shape[0]

for i in range(n_training):
    face = X_train[i]
    faces = face_classifier.predict(face)
    boxes = faces[0].boxes.xyxy.tolist()
    if boxes:
        
        #extract the face based on the output from the YOLOv8 model        
        left, bottom, right, top = boxes[0]
        cropped_face = face[int(bottom):int(top), int(left):int(right)]
        #resize cropped face to a std shape, 100x100 for now but can adjust this
        pil_face = Image.fromarray(np.uint8(cropped_face))
        pil_face = pil_face.resize((200, 200))
        numpy_cropped_face = np.array(pil_face)
        #append this to the new list containing all cropped faces
        X_cropped_train.append(numpy_cropped_face)
        y_train_cropped.append(y_train[i])

X_val_cropped = []
y_val_cropped = []
n_val = X_val.shape[0]
for i in range(n_val):
    face = X_val[i]
    faces = face_classifier.predict(face)
    boxes = faces[0].boxes.xyxy.tolist()
    if boxes:
        
        #extract the face based on the output from the YOLOv8 model        
        left, bottom, right, top = boxes[0]
        cropped_face = face[int(bottom):int(top), int(left):int(right)]
        #resize cropped face to a std shape, 100x100 for now but can adjust this
        pil_face = Image.fromarray(np.uint8(cropped_face))
        pil_face = pil_face.resize((200, 200))
        numpy_cropped_face = np.array(pil_face)
        #append this to the new list containing all cropped faces
        X_val_cropped.append(numpy_cropped_face)
        y_val_cropped.append(y_val[i])





  from .autonotebook import tqdm as notebook_tqdm


0: 640x640 1 face, 167.1ms
Speed: 49.5ms preprocess, 167.1ms inference, 2421.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 100.1ms
Speed: 0.0ms preprocess, 100.1ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 86.0ms
Speed: 16.8ms preprocess, 86.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 86.6ms
Speed: 10.0ms preprocess, 86.6ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 81.0ms
Speed: 15.8ms preprocess, 81.0ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 89.5ms
Speed: 16.7ms preprocess, 89.5ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 faces, 78.8ms
Speed: 9.0ms preprocess, 78.8ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 80.5ms
Speed: 19.5ms preprocess, 80.5ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 

In [5]:
#convert data into neural net input format

X_cropped_train = np.array(X_cropped_train)
y_train_cropped = np.array(y_train_cropped)
X_val_cropped = np.array(X_val_cropped)
y_val_cropped = np.array(y_val_cropped)

In [17]:
#first convolutional layer
#source of archi: https://cs230.stanford.edu/projects_winter_2020/reports/32610274.pdf

default_filters = 32

input_x = Input(shape = (200, 200, 3))

conv_1 = Conv2D(filters = default_filters, kernel_size = (3, 3), name = 'conv_1', padding = 'same',activation='relu' ,data_format='channels_last')(input_x)
batch_norm1 = BatchNormalization(name = 'batch_norm1')(conv_1)
max_pool1 = MaxPool2D(name = 'max_pool1', pool_size = (2,2), strides = (2,2))(batch_norm1)
drop_out1 = Dropout(0.25)(max_pool1)

#second convolutional lyer 
conv2 = Conv2D(filters = 2 * default_filters, kernel_size = (3,3), name = 'conv_2', padding = 'same', activation = 'relu')(drop_out1)
batch_norm2 = BatchNormalization(name = 'batch_norm2')(conv2)
max_pool2 = MaxPool2D(name = 'max_pool2', pool_size = (2,2), strides = (2,2))(batch_norm2)
dropout_2 = Dropout(0.25)(max_pool2)




#third convolutional layer
conv3 = Conv2D(filters = 2 * 2 * default_filters, kernel_size = (3,3), name = 'conv_3', padding = 'same', activation = 'relu')(dropout_2)
batch_norm3 = BatchNormalization(name = 'batch_norm3')(conv3)

max_pool3 = MaxPool2D(name = 'max_pool3', pool_size = (2,2), strides = (2,2))(batch_norm3)
dropout_3 = Dropout(0.25)(max_pool3)

#third convolutional layer
conv4 = Conv2D(filters = 2 * 2 * 2 * default_filters, kernel_size = (3,3), name = 'conv_4', padding = 'same', activation = 'relu')(dropout_2)
batch_norm4 = BatchNormalization(name = 'batch_norm4')(conv4)

max_pool4 = MaxPool2D(name = 'max_pool4', pool_size = (2,2), strides = (2,2))(batch_norm4)
dropout_4 = Dropout(0.25)(max_pool4)


flat = Flatten()(dropout_3)

dense1 = Dense(256, activation = 'relu', name = 'dense_1')(flat)
drop_out_dense1 = Dropout(0.25)(dense1)
dense2 = Dense(128, activation = 'relu', name = 'dense_2')(drop_out_dense1)
drop_out_dense2 = Dropout(0.25)(dense2)
#final softmax layer
dense3 = Dense(64, activation = 'relu', name = 'dense_3')(drop_out_dense2)
drop_out_dense3 = Dropout(0.25)(dense3)

#3 outputs for the 3 classes
output = Dense(3, activation = 'softmax', name = 'output')(drop_out_dense3)


# In[11]:

In [20]:
# In[ ]:


model = Model(input_x, output)

model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['acc'])

model.fit(x = X_cropped_train, y = y_train_cropped, epochs = 40, validation_data = (X_val_cropped, y_val_cropped))

Epoch 1/40
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 554ms/step - acc: 0.4735 - loss: 11.1856 - val_acc: 0.2308 - val_loss: 83.5056
Epoch 2/40
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 666ms/step - acc: 0.5295 - loss: 9.0523 - val_acc: 0.1923 - val_loss: 57.7484
Epoch 3/40
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 714ms/step - acc: 0.4030 - loss: 9.5099 - val_acc: 0.2692 - val_loss: 35.4911
Epoch 4/40
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 699ms/step - acc: 0.5259 - loss: 4.4845 - val_acc: 0.2308 - val_loss: 36.2974
Epoch 5/40
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 682ms/step - acc: 0.4020 - loss: 5.7383 - val_acc: 0.2308 - val_loss: 47.0196
Epoch 6/40
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 666ms/step - acc: 0.4157 - loss: 5.7699 - val_acc: 0.2308 - val_loss: 46.7552
Epoch 7/40
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 684ms/step - acc: 0.

<keras.src.callbacks.history.History at 0x1ad8fddcc40>