In [7]:
import cv2 
import numpy as np
import mediapipe as mp
import time

In [None]:
mp_face_mesh = mp.solutions.face_mesh

# Open camera
cap = cv2.VideoCapture(0)

# For smoothing angles
smooth_factor = 0.7
x_smooth = y_smooth = z_smooth = 0


In [None]:
with mp_face_mesh.FaceMesh(
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
) as face_mesh:

    while True:
        start = time.time()
        success, image = cap.read()
        if not success:
            break

        image = cv2.resize(image, (1200, 800))
        image_rgb = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
        image_rgb.flags.writeable = False
        results = face_mesh.process(image_rgb)
        image_rgb.flags.writeable = True
        image = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)

        img_h, img_w, _ = image.shape
        face_3d, face_2d = [], []

        if results.multi_face_landmarks:
            for face_landmarks in results.multi_face_landmarks:
                for idx, lm in enumerate(face_landmarks.landmark):
                    if idx in [33, 263, 1, 61, 291, 199]:
                        if idx == 1:  # Nose tip
                            nose_2d = (float(lm.x * img_w), float(lm.y * img_h))
                            nose_3d = (float(lm.x * img_w),
                                       float(lm.y * img_h),
                                       float(lm.z * 3000))
                        x, y = int(lm.x * img_w), int(lm.y * img_h)
                        face_2d.append([x, y])
                        face_3d.append([x, y, lm.z])

                face_2d = np.array(face_2d, dtype=np.float64)
                face_3d = np.array(face_3d, dtype=np.float64)

                if face_2d.shape[0] >= 4:
                    focal_length = img_w
                    cam_matrix = np.array([
                        [focal_length, 0, img_w / 2],
                        [0, focal_length, img_h / 2],
                        [0, 0, 1]
                    ])
                    dist_matrix = np.zeros((4, 1), dtype=np.float64)

                    success, rot_vec, trans_vec = cv2.solvePnP(
                        face_3d, face_2d, cam_matrix, dist_matrix
                    )
                    rmat, _ = cv2.Rodrigues(rot_vec)
                    angles, _, _, _, _, _ = cv2.RQDecomp3x3(rmat)

                    x_angle = angles[0] * 360
                    y_angle = angles[1] * 360
                    z_angle = angles[2] * 360

                    # Smoothing
                    x_smooth = x_smooth * smooth_factor + x_angle * (1 - smooth_factor)
                    y_smooth = y_smooth * smooth_factor + y_angle * (1 - smooth_factor)
                    z_smooth = z_smooth * smooth_factor + z_angle * (1 - smooth_factor)

                    if y_smooth < -10:
                        text = "Looking Left"
                    elif y_smooth > 7:
                        text = "Looking Right"
                    elif x_smooth < -10:
                        text = "Looking Down"
                    elif x_smooth > 10:
                        text = "Looking Up"
                    else:
                        text = "Forward"

                    # Nose projection line
                    nose_3d_proj, _ = cv2.projectPoints(
                        np.array([nose_3d]), rot_vec, trans_vec, cam_matrix, dist_matrix
                    )
                    p1 = (int(nose_2d[0]), int(nose_2d[1]))
                    p2 = (int(nose_2d[0] + y_smooth * 10),
                          int(nose_2d[1] - x_smooth * 10))
                    cv2.line(image, p1, p2, (0, 0, 255), 3)

                    # Head pose text
                    cv2.putText(image, text, (20, 50),
                                cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 2)

        end = time.time()
        fps = 1 / (end - start)
        cv2.putText(image, f"FPS: {int(fps)}", (20, 250),
                    cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 0), 2)

        cv2.imshow('Head Pose Estimation', image)

        if cv2.waitKey(1) & 0xFF == 27:
            break

cap.release()
cv2.destroyAllWindows()
