In [1]:
import cv2
import mediapipe as mp
import numpy as np
import joblib

# Load model, label encoder, and scaler
model = joblib.load('mlp_emotion_model_balanced.pkl')
label_encoder = joblib.load('label_encoder2.pkl')
scaler = joblib.load('scaler.pkl')

# Initialize MediaPipe FaceMesh
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1)

# Open webcam (use 0 or 1 depending on your device)
cap = cv2.VideoCapture(1)

print("[INFO] Starting webcam... Press 'q' to quit.")

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(img_rgb)

    if results.multi_face_landmarks:
        for landmarks in results.multi_face_landmarks:
            coords = [coord for lm in landmarks.landmark for coord in (lm.x, lm.y)]
            coords_np = np.array(coords).reshape(1, -1)

            # Scale input
            coords_scaled = scaler.transform(coords_np)

            # Predict
            pred_encoded = model.predict(coords_scaled)[0]
            pred_label = label_encoder.inverse_transform([pred_encoded])[0]

            # Draw result
            h, w, _ = frame.shape
            cx_min = int(min([lm.x for lm in landmarks.landmark]) * w)
            cy_min = int(min([lm.y for lm in landmarks.landmark]) * h)

            cv2.putText(frame, pred_label, (cx_min, cy_min - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

            # Draw landmarks
            for lm in landmarks.landmark:
                cx, cy = int(lm.x * w), int(lm.y * h)
                cv2.circle(frame, (cx, cy), 1, (255, 0, 0), -1)

    cv2.imshow('Real-Time Emotion Detection (MLP)', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()


[INFO] Starting webcam... Press 'q' to quit.
