In [3]:
import numpy as np
#import pandas as pd

from tensorflow.keras.layers import Conv2D, BatchNormalization, MaxPool2D, Flatten, Dense, Input, ReLU, Dropout
import tensorflow.keras as keras
from tensorflow.keras import Model
from tensorflow.keras.callbacks import ModelCheckpoint
from PIL import Image

from ultralytics import YOLO

In [4]:
#import data
#read the data
#The data is read from the directory and each batch can be thought of as a set of labels and datapoints
#Might need to change the directory depending on where

#50
train = keras.utils.image_dataset_from_directory(
    directory='../../data/Faces',
    labels='inferred',
    label_mode='categorical',
    batch_size=140,
    image_size=(1000, 1000),
    validation_split = 0.2,
    subset = 'training',
    seed = 50,
    class_names = ['Happy' ,'Sad', 'Neutral']
    )

validation = keras.utils.image_dataset_from_directory(
    directory='../../data/Faces',
    labels='inferred',
    label_mode='categorical',
    batch_size=140,
    image_size=(1000, 1000),
    validation_split = 0.2,
    subset = 'validation',
    seed = 50,
    class_names = ['Happy', 'Sad', 'Neutral']
    )

Found 66 files belonging to 3 classes.


Using 53 files for training.
Found 66 files belonging to 3 classes.
Using 13 files for validation.


In [5]:
#convert images to numpy
train_processed_numpy = train.as_numpy_iterator()
validation_processed_numpy = validation.as_numpy_iterator()

X_train, y_train = train_processed_numpy.next()
X_val, y_val = validation_processed_numpy.next()

In [8]:
#convert the images to cropped faces
face_classifier = YOLO("../../app/src/face_detection/model/yolov8n-face.pt")

X_cropped_train = []
y_train_cropped = []
n_training = X_train.shape[0]

for i in range(n_training):
    face = X_train[i]
    faces = face_classifier.predict(face)
    boxes = faces[0].boxes.xyxy.tolist()
    if boxes:
        
        #extract the face based on the output from the YOLOv8 model        
        left, bottom, right, top = boxes[0]
        cropped_face = face[int(bottom):int(top), int(left):int(right)]
        #resize cropped face to a std shape, 100x100 for now but can adjust this
        pil_face = Image.fromarray(np.uint8(cropped_face))
        pil_face = pil_face.resize((48, 48))
        numpy_cropped_face = np.array(pil_face)
        #append this to the new list containing all cropped faces
        X_cropped_train.append(numpy_cropped_face)
        y_train_cropped.append(y_train[i])

X_val_cropped = []
y_val_cropped = []
n_val = X_val.shape[0]
for i in range(n_val):
    face = X_val[i]
    faces = face_classifier.predict(face)
    boxes = faces[0].boxes.xyxy.tolist()
    if boxes:
        
        #extract the face based on the output from the YOLOv8 model        
        left, bottom, right, top = boxes[0]
        cropped_face = face[int(bottom):int(top), int(left):int(right)]
        #resize cropped face to a std shape, 100x100 for now but can adjust this
        pil_face = Image.fromarray(np.uint8(cropped_face))
        pil_face = pil_face.resize((48, 48))
        numpy_cropped_face = np.array(pil_face)
        #append this to the new list containing all cropped faces
        X_val_cropped.append(numpy_cropped_face)
        y_val_cropped.append(y_val[i])



0: 640x640 1 face, 104.6ms
Speed: 8.1ms preprocess, 104.6ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 100.1ms
Speed: 14.6ms preprocess, 100.1ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 99.8ms
Speed: 0.8ms preprocess, 99.8ms inference, 15.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 99.3ms
Speed: 0.0ms preprocess, 99.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 75.9ms
Speed: 2.2ms preprocess, 75.9ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 100.0ms
Speed: 0.0ms preprocess, 100.0ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 83.2ms
Speed: 16.6ms preprocess, 83.2ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 83.2ms
Speed: 16.8ms preprocess, 83.2ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640

In [9]:
#convert data into neural net input format

X_cropped_train = np.array(X_cropped_train)
y_train_cropped = np.array(y_train_cropped)
X_val_cropped = np.array(X_val_cropped)
y_val_cropped = np.array(y_val_cropped)

In [10]:
#first convolutional layer
#source of archi: https://cs230.stanford.edu/projects_winter_2020/reports/32610274.pdf

default_filters = 32

input_x = Input(shape = (48, 48, 3))

conv_1 = Conv2D(filters = default_filters, kernel_size = (3, 3), name = 'conv_1', padding = 'same',activation='relu' ,data_format='channels_last')(input_x)
batch_norm1 = BatchNormalization(name = 'batch_norm1')(conv_1)
max_pool1 = MaxPool2D(name = 'max_pool1', pool_size = (2,2), strides = (2,2))(batch_norm1)
drop_out1 = Dropout(0.5)(max_pool1)

#second convolutional layer 
conv2 = Conv2D(filters = 2 * default_filters, kernel_size = (3,3), name = 'conv_2', padding = 'same', activation = 'relu')(drop_out1)
batch_norm2 = BatchNormalization(name = 'batch_norm2')(conv2)
max_pool2 = MaxPool2D(name = 'max_pool2', pool_size = (2,2), strides = (2,2))(batch_norm2)
dropout_2 = Dropout(0.5)(max_pool2)




#third convolutional layer
conv3 = Conv2D(filters = 2 * 2 * default_filters, kernel_size = (3,3), name = 'conv_3', padding = 'same', activation = 'relu')(dropout_2)
batch_norm3 = BatchNormalization(name = 'batch_norm3')(conv3)

max_pool3 = MaxPool2D(name = 'max_pool3', pool_size = (2,2), strides = (2,2))(batch_norm3)
dropout_3 = Dropout(0.5)(max_pool3)



flat = Flatten()(dropout_3)

dense1 = Dense(128, activation = 'relu', name = 'dense_1')(flat)
drop_out_dense1 = Dropout(0.4)(dense1)
dense2 = Dense(128, activation = 'relu', name = 'dense_2')(drop_out_dense1)
drop_out_dense2 = Dropout(0.4)(dense2)
#final softmax layer
dense3 = Dense(64, activation = 'relu', name = 'dense_3')(drop_out_dense2)
drop_out_dense3 = Dropout(0.5)(dense3)

#6 outputs for the 6 classes
output = Dense(3, activation = 'softmax', name = 'output')(drop_out_dense3)


# In[11]:

In [11]:
checkpoint = ModelCheckpoint(filepath = 'cnn_scratch.keras')


# In[ ]:


model = Model(input_x, output)

model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['acc'])

model.fit(x = X_cropped_train/255, y = y_train_cropped, epochs = 100, validation_data = (X_val_cropped/255, y_val_cropped), callbacks = [checkpoint])

Epoch 1/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 268ms/step - acc: 0.3349 - loss: 3.0139 - val_acc: 0.3846 - val_loss: 1.0981
Epoch 2/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step - acc: 0.3221 - loss: 3.1685 - val_acc: 0.3077 - val_loss: 1.0994
Epoch 3/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step - acc: 0.3662 - loss: 3.8649 - val_acc: 0.3077 - val_loss: 1.0983
Epoch 4/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step - acc: 0.2500 - loss: 3.4627 - val_acc: 0.2308 - val_loss: 1.0994
Epoch 5/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step - acc: 0.2885 - loss: 3.3915 - val_acc: 0.2308 - val_loss: 1.1035
Epoch 6/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step - acc: 0.3838 - loss: 2.3278 - val_acc: 0.3077 - val_loss: 1.1078
Epoch 7/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step - acc: 0.1803 -

<keras.src.callbacks.history.History at 0x2b650a20820>