In [1]:
import os
import cv2
import mediapipe as mp

import pickle

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

I am going to build the function to get face landmarks individually from each image. It will return a list with the normalized landmarks.


In [2]:
def single_landmarks(image, static_image_mode):
    single_landmarks = []
    
    rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    face_mesh = mp.solutions.face_mesh.FaceMesh(static_image_mode, 
                                                max_num_faces=1, min_detection_confidence=0.5)
    results = face_mesh.process(rgb)
    
    if results.multi_face_landmarks:
        single_face = results.multi_face_landmarks[0].landmark
        xs_ = []
        ys_ = []
        zs_ = []
        for idx in single_face:
            xs_.append(idx.x)
            ys_.append(idx.y)
            zs_.append(idx.z)
        for j in range(len(xs_)):
            single_landmarks.append(xs_[j] - min(xs_))
            single_landmarks.append(ys_[j] - min(ys_))
            single_landmarks.append(zs_[j] - min(zs_))
    return single_landmarks if single_landmarks else [0] * 468 * 3  # Return a list of zeros if no landmarks are detected


In [3]:
def prepare_data(directory):
    # Define the emotion folder names
    emotion_folders = ["Happy", "Neutral", "Surprised"]
    lmrk = []
    array = []
    # Iterate over the root folder "final_dataset"
    for root, dirs, files in os.walk(directory):
        for idx, emotion_folder in enumerate(emotion_folders, start=1):
            # Check if the current directory is one of the emotion folders
            if emotion_folder in dirs:
                emotion_folder_path = os.path.join(root, emotion_folder)
                # Iterate through the files in the emotion folder
                for file_name in os.listdir(emotion_folder_path):
                    # Construct the full path of the image file
                    image_path = os.path.join(emotion_folder_path, file_name)
                    image = cv2.imread(image_path)
                    # Append the image path along with its emotion index to the list
                    lmrk = single_landmarks(image, True)
                    lmrk.append(int(idx))
                    array.append(lmrk)
                    
    np.savetxt('data.txt', np.asarray(array))

In [4]:
folder_to_search = r"C:\Users\Jorge\Facial expression recognizer\Final_dataset"
prepare_data(folder_to_search)

Now we have already prepared the data that will rain our model. It consists on all teh landmarks of the faces in our images, related to an index, which shows which expression that image has.

In [5]:
import pickle

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

# Load data from the text file
data_file = "data.txt"
data = np.loadtxt(data_file)

# Split data into features (X) and labels (y)
X = data[:, :-1]  # Features are all columns except the last one
y = data[:, -1]   # Labels are the last column

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42,
                                                    shuffle=True,
                                                    stratify=y)

# Initialize the Random Forest Classifier
rf_classifier = RandomForestClassifier()

# Train the classifier on the training data
rf_classifier.fit(X_train, y_train)

# Make predictions on the test data
y_pred = rf_classifier.predict(X_test)

# Evaluate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
print(confusion_matrix(y_test, y_pred))

with open('./model', 'wb') as f:
    pickle.dump(rf_classifier, f)

Accuracy: 89.87%
[[74  1  6]
 [ 2 64  3]
 [ 7  4 66]]


We can see that the model is well trained as it is having a great accuracy and the confussion matrix is showing few confussions.

Testing the model in real time images with my face:

In [7]:
def test_model(frame):
    with open('./model', 'rb') as f:
        model = pickle.load(f)

    face_landmarks = single_landmarks(frame, False)
    output = model.predict([face_landmarks])

    if output == 1: print("Happy")
    elif output == 2:print("Neutral")
    else:print("Surprised")
    font = cv2.FONT_HERSHEY_SIMPLEX
    text = str(output)
    position = (50, 50)
    font_scale = 1
    font_color = (255, 255, 255)  # white color in BGR
    thickness = 2
    cv2.putText(frame, text, position, font, font_scale, font_color, thickness)
    
    

    cv2.imshow('frame', frame)
    cv2.waitKey(25)

def start():
    
    #Videocapture, select yopur camera with 0,1,2,3...
    cap = cv2.VideoCapture(0)

    #check the access to camera
    if not cap.isOpened():
        print("Error: Unable to open camera.")
        exit()

    #capture each frame until pressed "q"
    
    while True:
        ret, f = cap.read()
        if not ret:
            print("Error: Unable to read frame.")
            break
       
        
        test_model(f)

        # Check for keypress (press 'q' to exit)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        

    #release the video and close the display window
    cap.release()
    cv2.destroyAllWindows()
    
start()

Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised
Surprised


Clearly it is not working despite of having seen that our model was pretty accurate. Maybe the problem is the dataset, that is not useful for real time imagess