In [1]:
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Layer, Conv2D, MaxPooling2D, Input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img
import tensorflow as tf
import cv2
import mediapipe as mp
import os
import time
import numpy as np
print(tf.__version__)

2.12.1


In [2]:
# Load Model
# Model can be found in sharepoint
model = load_model('custom_cnn_v2.h5', compile=False)

The dtype policy mixed_float16 may run slowly because this machine does not have a GPU. Only Nvidia GPUs with compute capability of at least 7.0 run quickly with mixed_float16.


In [4]:
# Initialize Mediapipe Face Detection
mp_face_detection = mp.solutions.face_detection
face_detection = mp_face_detection.FaceDetection(model_selection=0, min_detection_confidence=0.5)

# Create a folder to save images
save_folder = "takenPictures"
os.makedirs(save_folder, exist_ok=True)

# Open webcam
cap = cv2.VideoCapture(1)

last_capture_time = time.time()
capture_interval = 0.5  # Capture every 0.5 seconds

emotion_labels = ["Angry", "Happy", "Disgust", "Fear", "Contempt", "Neutral", "Sad", "Surprise"]
emotion_results = {label: 0 for label in emotion_labels}

while True:
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame")
        break

    # Convert frame to RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Detect faces
    results = face_detection.process(rgb_frame)

    if results.detections:
        # Get the first detected face
        detection = results.detections[0]
        bboxC = detection.location_data.relative_bounding_box

        h, w, _ = frame.shape
        x, y, w_box, h_box = int(bboxC.xmin * w), int(bboxC.ymin * h), int(bboxC.width * w), int(bboxC.height * h)

        # Extract the face
        face_image = frame[y:y + h_box, x:x + w_box]

        # Resize the face to (224,224,3)
        face_resized = cv2.resize(face_image, (224, 224))

        # Save the image every 0.5 seconds
        if time.time() - last_capture_time > capture_interval:
            file_path = os.path.join(save_folder, f"face_image.jpg")
            cv2.imwrite(file_path, face_resized)
            last_capture_time = time.time()
            img = image.load_img(file_path, target_size=(224, 224)) 
            img_array = image.img_to_array(img)
            img_array = np.expand_dims(img_array, axis=0)
            datagen = ImageDataGenerator(rescale=1./255)
            img_stand = datagen.standardize(img_array)
            
            pred = model.predict(img_stand)[0] * 100

            emotion_results = {label: value + pred[idx] for idx, label in enumerate(emotion_labels)}

    # Draw emotion results on the right side
    black_overlay = np.zeros((frame.shape[0], 300, 3), dtype=np.uint8)  # Black rectangle for text
    start_y = 50
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 1
    font_thickness = 2
    text_white = (255, 255, 255) 
    text_green = (0, 255, 0)  

    sorted_emotions = sorted(emotion_results.items(), key=lambda x: x[1], reverse=True)

    for idx, (emotion, value) in enumerate(sorted_emotions):
        text_color = text_green if idx == 0 else text_white
        text = f"{emotion}: {value:.2f}%"
        cv2.putText(black_overlay, text, (20, start_y), font, font_scale, text_color, font_thickness)
        start_y += 40

    frame = np.hstack((frame, black_overlay))

    cv2.imshow("Emotion Recognition - Press 'q' to exit", frame)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1) 

I0000 00:00:1740609884.826698 5424106 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
2025-02-26 23:44:46.704739: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz




-1