## Install and import the necessary libraries

In [2]:
!pip install mediapipe
!pip install opencv-python












In [2]:
import cv2
import mediapipe as mp
import numpy as np
import time

## Application

In [69]:
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=False,
    max_num_faces=1,
    min_detection_confidence=0.6,
    min_tracking_confidence=0.6
)

# For accessing the camera
cap = cv2.VideoCapture(0)

# I defined facial features indices by hand for better precision.
# MediaPipe uses a total of 468 indice points in face to create a mesh. You can see the map from this link:
# https://github.com/google/mediapipe/blob/a908d668c730da128dfa8d9f6bd25d519d006692/mediapipe/modules/face_geometry/data/canonical_face_model_uv_visualization.png

left_eye_indices = list((359,467,260,259,257,258,286,414,463,341,256,252,253,254,339,255))
right_eye_indices = list((130,25,110,24,23,22,26,112,243,190,56,28,27,29,30,247))
nose_indices = list((168,417,413,414,382,341,453,357,343,437,420,360,344,438,309,250,462,370,94,
                    141,242,20,79,218,115,131,198,217,114,128,233,112,155,173,190,189,193))
upper_lip_indices = list((61,185,40,39,37,0,267,269,270,409,291,308,415,310,311,312,13,82,81,80,191,78,62,76))
lower_lip_indices = list((61,146,91,181,84,17,314,405,321,375,291,308,324,318,402,317,14,87,179,88,95,78,62,76))

mouth_area = [78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95]

# For talking detection
prev_area = None

start_time = None
display_duration = 1


facial_features = {
    "left_eye": left_eye_indices,
    "right_eye": right_eye_indices,
    "nose": nose_indices,
    "upper_lip": upper_lip_indices,
    "lower_lip": lower_lip_indices
}

while cap.isOpened():
    ret, frame = cap.read()

    if not ret:
        print("Failed to capture frame")
        break

    # I am resizing the frame beacuse the code have a threshold in a distance calculation down below. Distances change with cam 
    # resolutions so a standard size is needed. I also wanted to improve performance by choosing a rather low resolution
    frame = cv2.resize(frame, (640, 480))    
        
    # I converted the frame from BGR to RGB. 
    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    results = face_mesh.process(image_rgb)

    # I grouped corresponding facial landmarks
    facial_landmarks = {"left_eye": [], "right_eye": [], "nose": [], "upper_lip": [], "lower_lip":[]}
    if results.multi_face_landmarks:
        
        landmarks = results.multi_face_landmarks[0] 
        
        # I wanted to calculate the eye distance. Eye distance (or pupilary distance) is calculated from pupil to pupil
        # Since there is not an indice for pupils in the map, I calculated the distance from upper middle point and
        # the lower middle point of the eyes, then measuered the distance between them.
        
        # Got the coordinates
        landmark_159 = (int(landmarks.landmark[159].x * frame.shape[1]),
                        int(landmarks.landmark[159].y * frame.shape[0]))

        landmark_145 = (int(landmarks.landmark[145].x * frame.shape[1]),
                        int(landmarks.landmark[145].y * frame.shape[0]))

        landmark_386 = (int(landmarks.landmark[386].x * frame.shape[1]),
                        int(landmarks.landmark[386].y * frame.shape[0]))

        landmark_374 = (int(landmarks.landmark[374].x * frame.shape[1]),
                        int(landmarks.landmark[374].y * frame.shape[0]))

        # Calculated the middle points
        middle_point_159_145 = ((landmark_159[0] + landmark_145[0]) // 2, (landmark_159[1] + landmark_145[1]) // 2)
        middle_point_386_374 = ((landmark_386[0] + landmark_374[0]) // 2, (landmark_386[1] + landmark_374[1]) // 2)

        # Finally I calculated the euclidean distance between those points
        distance = np.linalg.norm(np.array(middle_point_159_145) - np.array(middle_point_386_374))

        
        # Code below basically calculates the distance from middle upper indices of the eyes to middle indice of the eyebrows.
        # If their sum is high enough, a text appears telling you that you are surprised.
        
        landmark_282 = (int(landmarks.landmark[282].x * frame.shape[1]),
                        int(landmarks.landmark[282].y * frame.shape[0]))

        landmark_257 = (int(landmarks.landmark[257].x * frame.shape[1]),
                        int(landmarks.landmark[257].y * frame.shape[0]))

        landmark_27 = (int(landmarks.landmark[27].x * frame.shape[1]),
                       int(landmarks.landmark[27].y * frame.shape[0]))

        landmark_52 = (int(landmarks.landmark[52].x * frame.shape[1]),
                       int(landmarks.landmark[52].y * frame.shape[0]))

        # Calculated the distances
        distance_282_257 = np.linalg.norm(np.array(landmark_282) - np.array(landmark_257))
        distance_27_52 = np.linalg.norm(np.array(landmark_27) - np.array(landmark_52))
        
        
        # Detecting facial features 
        for feature, indices in facial_features.items():
            facial_landmarks[feature] = [(int(landmarks.landmark[idx].x * frame.shape[1]),
                                          int(landmarks.landmark[idx].y * frame.shape[0]))
                                         for idx in indices]

        
        for feature, landmarks_list in facial_landmarks.items():
            cv2.polylines(frame, [np.array(landmarks_list)], isClosed=True, color=(0, 255, 0), thickness=2)
            centroid = np.mean(landmarks_list, axis=0, dtype=int)
            # It writes the corresponding facial feature on the lines.
            cv2.putText(frame, feature, (centroid[0], centroid[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2,
                        cv2.LINE_AA)
            
            # Text for pupillary distance
            cv2.putText(frame, f"Pupillary distance is: {distance:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2, cv2.LINE_AA)
            # I also wanted to see the distance as a line in real time
            cv2.line(frame, middle_point_159_145, middle_point_386_374, (255, 0, 0), 2)

            # Pupillary distance determines the distance from the camera. I proportioned the eyebrow distance to pupillary distance so the code works on all webcam distances
            # I also multiplied the pupillary distance by two, because since we have two eyebrow distances, they increase or decrease by 2x, while pupillary distance changes by x
            if (distance_282_257 + distance_27_52)/ (distance*2) > 0.25:
                cv2.putText(frame, "You are SURPRISED!", (10, 60),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2, cv2.LINE_AA)

            # Calculating the mouth area
            polygon_vertices = [(int(landmarks.landmark[idx].x * frame.shape[1]),
                                 int(landmarks.landmark[idx].y * frame.shape[0]))
                                for idx in mouth_area]

            area = 0.5 * np.abs(sum(x0 * y1 - x1 * y0 for (x0, y0), (x1, y1) in zip(polygon_vertices, polygon_vertices[1:] + [polygon_vertices[0]])))

            # If the mouth area changes in proportion to the camera distance, a text appears to tell you that you are talking.
            if prev_area is not None:
                area_difference = area - prev_area
                
                if area_difference/distance > 1.5 :
                    cv2.putText(frame, "You are talking.", (10, 90),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2, cv2.LINE_AA)
                    start_time = time.time()  # Reset the timer
                elif start_time is not None and time.time() - start_time < display_duration:
                    cv2.putText(frame, "You are talking.", (10, 90),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2, cv2.LINE_AA)

            # Update the previous area
            prev_area = area
                      
    cv2.imshow('Facial Detection made by ali0onder', frame)

    # Will close the camera if "ESC" button is pressed
    if cv2.waitKey(1) == 27:
        break

cap.release()
cv2.destroyAllWindows()

## If you don't want to include eyelids while detecting eyes, use these coordinates instead:
right eye = (7,33,163,144,145,153,154,155,133,173,157,158,159,160,161,246)

left eye = (263,466,388,387,386,385,384,398,362,382,381,380,374,373,390,249)