# Assignment 4.1 - Mood Detection with OpenCV

Submitted By: Labbao, Benedick D.<br>
Performed On: 03/21/2024<br>
Submitted On: 03/22/2024

Submitted To: Engr. Roman M. Richard

---

## Dataset

### Data Collection

In [1]:
from time import sleep
import cv2
import matplotlib.pyplot as plt
import os
import numpy as np

raw_dataset_path = 'dataset/faces/'
preprocessed_dataset_path = 'dataset/preprocessed_faces/'

face_cascade = cv2.CascadeClassifier('haarcascade/haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier('haarcascade/haarcascade_eye.xml')

def display_image(image, title=None, conversion=cv2.COLOR_BGR2RGB):
    image = cv2.cvtColor(image, conversion)
    plt.imshow(image)
    plt.xticks([])
    plt.yticks([])
    
    if title is not None:
        plt.title(title)

    plt.show()

def fix_dataset_names(directory, prefix='face_'):
    files = os.listdir(directory)
    counter = 0
    
    for file in files:
        _, ext = os.path.splitext(file)
        new_name = f"{prefix}{counter:04d}{ext}"
        print('fixing', new_name)
        
        os.rename(os.path.join(directory, file), os.path.join(directory, new_name))
        counter += 1

def generate_name(directory):
    files = os.listdir(directory)
    num_files = len(files)
    face_names = 'face_' + str(num_files).zfill(4)
    return face_names + '.png'

def capture(count=1):
    camera = cv2.VideoCapture(0)

    while (count > 0):
        ret, frame = camera.read()
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, 1.1, 5)

        for (x,y,w,h) in faces:
            image = frame[y:(y+h), x:(x+w)]
            name = generate_name(raw_dataset_path)
            cv2.imwrite(raw_dataset_path + name, image)
            print(f'saving {name}...')

        sleep(0.3)
        count-=1

In [94]:
fix_dataset_names(raw_dataset_path + 'sad/', 'sad_')

fixing sad_0000.png
fixing sad_0001.png
fixing sad_0002.png
fixing sad_0003.png
fixing sad_0004.png
fixing sad_0005.png
fixing sad_0006.png
fixing sad_0007.png
fixing sad_0008.png
fixing sad_0009.png
fixing sad_0010.png
fixing sad_0011.png
fixing sad_0012.png
fixing sad_0013.png
fixing sad_0014.png
fixing sad_0015.png
fixing sad_0016.png
fixing sad_0017.png
fixing sad_0018.png
fixing sad_0019.png
fixing sad_0020.png
fixing sad_0021.png
fixing sad_0022.png
fixing sad_0023.png
fixing sad_0024.png
fixing sad_0025.png
fixing sad_0026.png
fixing sad_0027.png
fixing sad_0028.png
fixing sad_0029.png
fixing sad_0030.png
fixing sad_0031.png
fixing sad_0032.png
fixing sad_0033.png
fixing sad_0034.png
fixing sad_0035.png
fixing sad_0036.png
fixing sad_0037.png
fixing sad_0038.png
fixing sad_0039.png
fixing sad_0040.png
fixing sad_0041.png
fixing sad_0042.png
fixing sad_0043.png
fixing sad_0044.png
fixing sad_0045.png
fixing sad_0046.png
fixing sad_0047.png
fixing sad_0048.png
fixing sad_0049.png


In [84]:
capture(100)

saving face_0004.png...
saving face_0005.png...
saving face_0006.png...
saving face_0007.png...
saving face_0008.png...
saving face_0009.png...
saving face_0010.png...
saving face_0011.png...
saving face_0012.png...
saving face_0013.png...
saving face_0014.png...
saving face_0015.png...
saving face_0016.png...
saving face_0017.png...
saving face_0018.png...
saving face_0019.png...
saving face_0020.png...
saving face_0021.png...
saving face_0022.png...
saving face_0023.png...
saving face_0024.png...
saving face_0025.png...
saving face_0026.png...
saving face_0027.png...
saving face_0028.png...
saving face_0029.png...
saving face_0030.png...
saving face_0031.png...
saving face_0032.png...
saving face_0033.png...
saving face_0034.png...
saving face_0035.png...
saving face_0036.png...
saving face_0037.png...
saving face_0038.png...
saving face_0039.png...
saving face_0040.png...
saving face_0041.png...
saving face_0042.png...
saving face_0043.png...
saving face_0044.png...
saving face_0045

This will take face images at an random interval to create a dataset for facial recognition.

### Image Pre-processing

Remove background by locating eyes and extracting face region.

In [3]:
def extract_face_region(image):
    center_point = lambda p1, p2: (int((p1[0] + p2[0]) / 2)+3, int((p1[1] + p2[1]) / 2))

    if len(image.shape) == 2:
        gray = image
    else:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    eyes = eye_cascade.detectMultiScale(gray, 1.03, 15, 0, (40, 40))

    # Perform eye detection then find center point of left and right eyes
    eyes_center_points = []
    for (x,y,w,h) in eyes:
        eyes_center_points.append(center_point((x,y), (x+w, y+h)))
        # cv2.rectangle(gray, (x, y), (x+w, y+h), (0, 255, 0), 1)
        # display_image(gray)
        
    # Find center betweem right eye's center point and left eye's center point
    if len(eyes_center_points) < 2:
        return image

    eye_center = center_point(eyes_center_points[0], eyes_center_points[1])
    x, y = eye_center

    # add offset to create a bounding box that only contains face region
    x1, y1, x2, y2 = x - 100, y + 140, x + 100, y - 60
    cv2.rectangle(image, (x1, y1), (x2, y2),  (0, 255, 0), 2)

    return image[y2:y1, x1:x2]

def resize(image):
    w, h = image.shape
    new_image = cv2.resize(image, (140, 140))
    return new_image

In [4]:
def preprocess_images(src_dir, dst_dir):
    files = os.listdir(src_dir)
    
    for file in files:
        file_name, ext = os.path.splitext(file)
        print("Processing image:", file_name + ext)

        image = cv2.imread(src_dir + file_name + ext)
        image = extract_face_region(image)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        cv2.imwrite(dst_dir + file_name + ext, resize(gray))

Now we created a function to perform preprocessing on all images and move them in a separate folder.

In [168]:
preprocess_images(raw_dataset_path + 'angry/', preprocessed_dataset_path + 'angry/')

Processing image: happy_0000.png
Processing image: happy_0001.png
Processing image: happy_0002.png
Processing image: happy_0003.png
Processing image: happy_0004.png
Processing image: happy_0005.png
Processing image: happy_0006.png
Processing image: happy_0007.png
Processing image: happy_0008.png
Processing image: happy_0009.png
Processing image: happy_0010.png
Processing image: happy_0011.png
Processing image: happy_0012.png
Processing image: happy_0013.png
Processing image: happy_0014.png
Processing image: happy_0015.png
Processing image: happy_0016.png
Processing image: happy_0017.png
Processing image: happy_0018.png
Processing image: happy_0019.png
Processing image: happy_0020.png
Processing image: happy_0021.png
Processing image: happy_0022.png
Processing image: happy_0023.png
Processing image: happy_0024.png
Processing image: happy_0025.png
Processing image: happy_0026.png
Processing image: happy_0027.png
Processing image: happy_0028.png
Processing image: happy_0029.png
Processing

In [141]:
preprocess_images(raw_dataset_path + 'happy/', preprocessed_dataset_path + 'happy/')

Processing image: happy_0000.png
Processing image: happy_0001.png
Processing image: happy_0002.png
Processing image: happy_0003.png
Processing image: happy_0004.png
Processing image: happy_0005.png
Processing image: happy_0006.png
Processing image: happy_0007.png
Processing image: happy_0008.png
Processing image: happy_0009.png
Processing image: happy_0010.png
Processing image: happy_0011.png
Processing image: happy_0012.png
Processing image: happy_0013.png
Processing image: happy_0014.png
Processing image: happy_0015.png
Processing image: happy_0016.png
Processing image: happy_0017.png
Processing image: happy_0018.png
Processing image: happy_0019.png
Processing image: happy_0020.png
Processing image: happy_0021.png
Processing image: happy_0022.png
Processing image: happy_0023.png
Processing image: happy_0024.png
Processing image: happy_0025.png
Processing image: happy_0026.png
Processing image: happy_0027.png
Processing image: happy_0028.png
Processing image: happy_0029.png
Processing

In [None]:
preprocess_images(raw_dataset_path + 'sad/', preprocessed_dataset_path + 'sad/')

In [191]:
preprocess_images(raw_dataset_path + 'neutral/', preprocessed_dataset_path + 'neutral/')

Processing image: neutral_0000.png
Processing image: neutral_0001.png
Processing image: neutral_0002.png
Processing image: neutral_0003.png
Processing image: neutral_0004.png
Processing image: neutral_0005.png
Processing image: neutral_0006.png
Processing image: neutral_0007.png
Processing image: neutral_0008.png
Processing image: neutral_0009.png
Processing image: neutral_0010.png
Processing image: neutral_0011.png
Processing image: neutral_0012.png
Processing image: neutral_0013.png
Processing image: neutral_0014.png
Processing image: neutral_0015.png
Processing image: neutral_0016.png
Processing image: neutral_0017.png
Processing image: neutral_0018.png
Processing image: neutral_0019.png
Processing image: neutral_0020.png
Processing image: neutral_0021.png
Processing image: neutral_0022.png
Processing image: neutral_0023.png
Processing image: neutral_0024.png
Processing image: neutral_0025.png
Processing image: neutral_0026.png
Processing image: neutral_0027.png
Processing image: ne

## Models

### Face Recognition Model

I will be using LBPH Algorithm to create a Face Recognition Model to recognize me

In [5]:
import cv2
import os
import numpy as np

In [5]:
def train_face_recognition_model(data_dir):
    recognizer = cv2.face.LBPHFaceRecognizer_create()

    # Prepare training data
    faces = []
    labels = []

    # Recursively walk through the directory and its subdirectories
    for root, _, files in os.walk(data_dir):
        for filename in files:
            # Check if the file has an image extension
            if any(filename.lower().endswith(ext) for ext in ['.png']):
                img = cv2.imread(os.path.join(root, filename), cv2.IMREAD_GRAYSCALE)
                faces.append(img)
                labels.append('Ben')

    # Assign unique integer labels to each person
    label_dict = {label: idx for idx, label in enumerate(np.unique(labels))}
    labels = [label_dict[label] for label in labels]

    # Train the recognizer
    recognizer.train(faces, np.array(labels))

    # Save the trained model
    recognizer.save("face_recognition.xml")
    print("Face recognition model trained and saved successfully.")

def test_face_recognition_model(image):
    recognizer = cv2.face.LBPHFaceRecognizer_create()
    recognizer.read("face_recognition.xml")
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # Use LBPH recognizer to predict the identity of the face in the test image
    label, confidence = recognizer.predict(gray_image)

    return (label, confidence)

In [174]:
train_face_recognition_model(raw_dataset_path)

Face recognition model trained and saved successfully.


In [7]:
image = cv2.imread(preprocessed_dataset_path + '/angry/angry_0000.png')
results = test_face_recognition_model(image)

print(results)

(0, 67.41625512149969)


Testing if the model can recognize processed images

In [3]:
recognizer = cv2.face.LBPHFaceRecognizer_create()
recognizer.read("face_recognition.xml")

# Initialize the camera

cam = cv2.VideoCapture(0)

while True:
    ret, frame = cam.read()

    if(frame.all() == None):
        continue

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Detect faces in the frame
    faces = face_cascade.detectMultiScale(gray, 1.03, 5)

    # Draw rectangles around the detected faces and recognize them
    for (x, y, w, h) in faces:
        # Recognize the face
        roi_gray = gray[y:y+h, x:x+w]
        id_, confidence = recognizer.predict(roi_gray)

        # If recognized face belongs to you (adjust confidence threshold as needed)
        if confidence < 70:
            cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
            label_text = f"You (ID: {id_}, Confidence: {confidence:.2f})"
            cv2.putText(frame, label_text, (x-10, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
        else:
            cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255), 2)
            label_text = f"Unknown (ID: {id_}, Confidence: {confidence:.2f})"
            cv2.putText(frame, label_text, (x-10, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)

        # Print ID and confidence
        print(f"ID: {id_}, Confidence: {confidence:.2f}")

    # Display the resulting frame
    cv2.imshow('Face Recognition', frame)

    # Break the loop when 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cam.release()
cv2.destroyAllWindows()

ID: 0, Confidence: 60.27
ID: 0, Confidence: 60.10
ID: 0, Confidence: 58.94
ID: 0, Confidence: 63.02
ID: 0, Confidence: 60.76
ID: 0, Confidence: 55.66
ID: 0, Confidence: 49.10
ID: 0, Confidence: 51.25
ID: 0, Confidence: 50.96
ID: 0, Confidence: 50.74
ID: 0, Confidence: 51.95
ID: 0, Confidence: 78.07
ID: 0, Confidence: 72.44
ID: 0, Confidence: 73.72
ID: 0, Confidence: 69.27
ID: 0, Confidence: 62.90
ID: 0, Confidence: 62.30
ID: 0, Confidence: 60.81
ID: 0, Confidence: 61.18
ID: 0, Confidence: 61.09
ID: 0, Confidence: 60.81
ID: 0, Confidence: 58.50
ID: 0, Confidence: 70.94
ID: 0, Confidence: 70.51
ID: 0, Confidence: 67.25
ID: 0, Confidence: 76.48
ID: 0, Confidence: 64.04


Testing if it can recognize me using actually images from camera

### Facial Expression Recognition Model

In [8]:
from keras.layers import Conv2D, Dense, MaxPooling2D, Flatten, Input, Dropout, Activation
from keras.models import Sequential, load_model
from sklearn.model_selection import train_test_split

def load_training_data(data_dir, test_size=0.2):
    X = []
    y = []

    # Recursively walk through the directory and its subdirectories
    for root, _, files in os.walk(data_dir):
        for filename in files:
            # Check if the file has an image extension
            if any(filename.lower().endswith(ext) for ext in ['.png']):
                img = cv2.imread(os.path.join(root, filename), cv2.IMREAD_GRAYSCALE)
                X.append(img)
                l = filename.split('_')
                y.append(l[0])

    # Assign unique integer labels to each person
    label_dict = {label: idx for idx, label in enumerate(np.unique(y))}
    y = [label_dict[label] for label in y]
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=10010)

    X_train = np.array(X_train).reshape(-1, 140, 140, 1)
    X_test = np.array(X_test).reshape(-1, 140, 140, 1)
    y_train = np.array(y_train)
    y_test = np.array(y_test)

    print(X_train.shape)

    return (X_train, y_train), (X_test, y_test)


def train_facial_expression_recognition_model(directory):
    train, valid = load_training_data(directory)

    model = Sequential()
    model.add(Input(shape=(140, 140, 1)))
    model.add(Conv2D(32, (5,5), 2))
    model.add(Activation('relu'))
    model.add(MaxPooling2D())
    model.add(Dropout(0.2))
    model.add(Conv2D(16, (5,5), 2))
    model.add(Activation('relu'))
    model.add(MaxPooling2D())
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(4))
    model.add(Activation('softmax'))

    model.compile('adam',
                'sparse_categorical_crossentropy', # use sparse since the label is not one-hot encoded
                metrics=['accuracy'])
    
    print(model.summary())

    history = model.fit(train[0],
                    train[1],
                    epochs=50,
                    validation_data=valid)
    
    model.save('facial_expression_recognition.keras')

    return history

def test_facial_expression_recognition_model(test_data):
    model = load_model('facial_expression_recognition.keras')
    model.evaluate(test_data[0], test_data[1])

In [9]:
history = train_facial_expression_recognition_model(preprocessed_dataset_path)

(320, 140, 140, 1)


None
Epoch 1/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 462ms/step - accuracy: 0.3232 - loss: 19.2610 - val_accuracy: 0.4000 - val_loss: 1.2947
Epoch 2/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 255ms/step - accuracy: 0.2690 - loss: 2.4599 - val_accuracy: 0.2750 - val_loss: 1.3864
Epoch 3/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 252ms/step - accuracy: 0.2618 - loss: 1.4633 - val_accuracy: 0.2250 - val_loss: 1.3867
Epoch 4/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 267ms/step - accuracy: 0.2723 - loss: 1.3775 - val_accuracy: 0.2250 - val_loss: 1.3869
Epoch 5/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 237ms/step - accuracy: 0.3145 - loss: 1.3765 - val_accuracy: 0.2250 - val_loss: 1.3870
Epoch 6/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 239ms/step - accuracy: 0.3175 - loss: 1.3704 - val_accuracy: 0.2250 - val_loss: 1.3871
Epoch 7/50
[1m10/10[

## Output

In [6]:
import cv2
import numpy as np
from keras.models import load_model


# Load the pre-trained face detector
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
recognizer = cv2.face.LBPHFaceRecognizer_create()
recognizer.read("face_recognition.xml")
model = load_model('facial_expression_recognition.keras')

mood = ['Angry', 'Happy', 'Neutral', 'Sad']

# Initialize the camera
cap = cv2.VideoCapture(0)
 
while True:
    # Capture frame-by-frame
    ret, frame = cap.read()

    # Convert the frame to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Detect faces in the frame
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

    # Draw rectangles around the detected faces and recognize them
    for (x, y, w, h) in faces:
        emotion_label = ''
        label_text = ''
        roi_gray = gray[y:y+h, x:x+w]

        id_, confidence = recognizer.predict(roi_gray)
        facial_expression_input = resize(extract_face_region(roi_gray)).reshape(140, 140, 1)

        if confidence < 70:            
            label_text = f"Ben"
            box_color = (0, 255, 0)

            if facial_expression_input.shape == (140, 140, 1):
                facial_expression_input = np.expand_dims(facial_expression_input, axis=0)
                predictions = model.predict(facial_expression_input)
                emotion_label = mood[np.argmax(predictions)]
        else:
            label_text = f"Unknown"
            box_color = (0, 0, 255)

        cv2.putText(frame, emotion_label, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        cv2.putText(frame, label_text, (x, y-30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        cv2.rectangle(frame, (x, y), (x+w, y+h), box_color, 2)

    # Display the resulting frame
    cv2.imshow('Mood Detection', frame)

    # Break the loop when 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the capture
cap.release()
cv2.destroyAllWindows()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 222ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

![image.png](./resources/happy_test.PNG)

## Explanation of the steps

1. I collected a total of 400 (100 each mood), the dataset using `haarcascade_frontalface_default.xml` to detect my face and extract it from the video frame, then saved it in `faces` folder.
2. I performed pre-processing on the image to extract face region from all raw datasets, grayscaled it and then resized them to (140, 140) in preparation to use it as training data, then moved it to `preprocessed_faces` folder.
3. Using the raw dataset, I trained an LBPH algorithm to perform face recognition, I saved the model in `face_recognition.xml`. The reason I chose LBPH algorithm because it can detect person's face from front and side.
4. Using the preprocessed dataset, I trained a CNN model for the facial expression recognition, the dataset was split into 80% and 20% for training and validation.
5. With face detection, face recognition, and facial expression recognition, I wrote the code to open a camera and used haar cascade to perform face detection, then extract the face from the grayscaled video frame. the extracted face was inputted in the face recognition. I chose the confidence level of the face recognition to be 70. If the face recognition recognizes me, it then performs the same preprocessing methods that I used to process the dataset so that it can be inputted in the facial expression recognition.

## Conclusion

This activity gives me an insight on how OpenCV is used together with Keras to perform both detection and recognition. I are able to visualize the results of the mood detection with the use of OpenCV.