In [1]:
#import pandas as pd
import numpy as np
from tensorflow import keras
from tensorflow.keras.layers import Input, Flatten, Dense, ReLU, BatchNormalization
from tensorflow.keras import Model
from tensorflow.keras.losses import CategoricalCrossentropy
from ultralytics import YOLO
from keras.callbacks import ModelCheckpoint

from PIL import Image

from tensorflow.keras.applications import VGG19 as vgg




In [26]:
#read the data
#The data is read from the directory and each batch can be thought of as a set of labels and datapoints
#Might need to change the directory depending on where

#Seed 50 is decent performing
train = keras.utils.image_dataset_from_directory(
    directory='../../data/Faces',
    labels='inferred',
    label_mode='categorical',
    batch_size=140,
    image_size=(1000, 1000),
    validation_split = 0.2,
    subset = 'training',
    seed = 50
    )

validation = keras.utils.image_dataset_from_directory(
    directory='../../data/Faces',
    labels='inferred',
    label_mode='categorical',
    batch_size=140,
    image_size=(1000, 1000),
    validation_split = 0.2,
    subset = 'validation',
    seed = 50
    )

Found 140 files belonging to 6 classes.
Using 112 files for training.
Found 140 files belonging to 6 classes.
Using 28 files for validation.


In [27]:
train_processed = train.map(lambda x, y: (keras.applications.vgg19.preprocess_input(x), y))

validation_processed = validation.map(lambda x, y: (keras.applications.vgg19.preprocess_input(x), y))

In [28]:

train_processed_numpy = train_processed.as_numpy_iterator()
validation_processed_numpy = validation_processed.as_numpy_iterator()

X_train, y_train = train_processed_numpy.next()
X_val, y_val = validation_processed_numpy.next()

In [29]:
#convert the images to cropped faces
face_classifier = YOLO("../../app/src/face_detection/model/yolov8n-face.pt")

X_cropped_train = []

for i in range(112):
    face = X_train[i]
    faces = face_classifier.predict(face)
    boxes = faces[0].boxes.xyxy.tolist()
    if boxes:
        
        #extract the face based on the output from the YOLOv8 model        
        left, bottom, right, top = boxes[0]
        cropped_face = face[int(bottom):int(top), int(left):int(right)]
        #resize cropped face to a std shape, 100x100 for now but can adjust this
        pil_face = Image.fromarray(np.uint8(cropped_face))
        pil_face = pil_face.resize((75, 75))
        numpy_cropped_face = np.array(pil_face)
        #append this to the new list containing all cropped faces
        X_cropped_train.append(numpy_cropped_face)

X_val_cropped = []

for i in range(28):
    face = X_val[i]
    faces = face_classifier.predict(face)
    boxes = faces[0].boxes.xyxy.tolist()
    if boxes:
        
        #extract the face based on the output from the YOLOv8 model        
        left, bottom, right, top = boxes[0]
        cropped_face = face[int(bottom):int(top), int(left):int(right)]
        #resize cropped face to a std shape, 100x100 for now but can adjust this
        pil_face = Image.fromarray(np.uint8(cropped_face))
        pil_face = pil_face.resize((75, 75))
        numpy_cropped_face = np.array(pil_face)
        #append this to the new list containing all cropped faces
        X_val_cropped.append(numpy_cropped_face)



0: 640x640 1 face, 101.1ms
Speed: 13.7ms preprocess, 101.1ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 83.5ms
Speed: 3.7ms preprocess, 83.5ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 99.1ms
Speed: 8.3ms preprocess, 99.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 86.2ms
Speed: 4.5ms preprocess, 86.2ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 83.3ms
Speed: 16.9ms preprocess, 83.3ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 82.9ms
Speed: 16.8ms preprocess, 82.9ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 82.1ms
Speed: 15.8ms preprocess, 82.1ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 100.3ms
Speed: 0.0ms preprocess, 100.3ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)


In [30]:
X_cropped_train = np.array(X_cropped_train)
print(X_cropped_train.shape)

X_cropped_val = np.array(X_val_cropped)
print(X_cropped_val.shape)

(112, 100, 100, 3)
(28, 100, 100, 3)


In [31]:
#flatten the output
inputs = Input(shape=(100, 100, 3))

#set inference mode to be false, flatten the output and add dense layers before making the final prediction
vgg_output = keras.applications.VGG19(input_tensor=inputs,                                      
    include_top=False,
    weights="imagenet",

    input_shape=(100, 100, 3),
    pooling=None,


)
for layer in vgg_output.layers:
    layer.trainable = False
flatten = Flatten()(vgg_output.output)
dense1 = Dense(256, activation = 'relu', name = 'first_dense_layer')(flatten)
batch_norm1 = BatchNormalization(name = 'batch_norm1')(dense1)
dense2 = Dense(256, activation = 'relu', name = 'second_dense_layer')(batch_norm1)
batch_norm1 = BatchNormalization(name = 'batch_norm2')(dense2)
output = Dense(6, activation = 'softmax', name = 'output')(batch_norm1)
final_model = Model(inputs, output)


# In[ ]:



final_model.compile(optimizer = 'adam', loss = CategoricalCrossentropy(), metrics = ['accuracy'])
#save the model every epoch
checkpoint = ModelCheckpoint(filepath = 'vggemotion.keras')



In [32]:
final_model.fit(x=X_cropped_train,y= y_train, validation_data = (X_cropped_val, y_val), epochs=40)


# In[ ]:


final_model.save('vggemotion.keras')

Epoch 1/40
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 543ms/step - accuracy: 0.1487 - loss: 2.6739 - val_accuracy: 0.1786 - val_loss: 6.6732
Epoch 2/40
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 824ms/step - accuracy: 0.8560 - loss: 0.5364 - val_accuracy: 0.2500 - val_loss: 3.8939
Epoch 3/40
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1s/step - accuracy: 0.9851 - loss: 0.2036 - val_accuracy: 0.4286 - val_loss: 3.3220
Epoch 4/40
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1s/step - accuracy: 0.9726 - loss: 0.1826 - val_accuracy: 0.3571 - val_loss: 3.1991
Epoch 5/40
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1s/step - accuracy: 0.9850 - loss: 0.0943 - val_accuracy: 0.3929 - val_loss: 2.3622
Epoch 6/40
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1s/step - accuracy: 1.0000 - loss: 0.0589 - val_accuracy: 0.3929 - val_loss: 2.1087
Epoch 7/40
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m