# Facial Landmark Detection

### For Video Data

In [1]:
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import numpy as np
import matplotlib.pyplot as plt
import mediapipe as mp


def draw_landmarks_on_image_face(rgb_image, detection_result):
  face_landmarks_list = detection_result.face_landmarks
  annotated_image = np.copy(rgb_image)

  # Loop through the detected faces to visualize.
  for idx in range(len(face_landmarks_list)):
    face_landmarks = face_landmarks_list[idx]

    # Draw the face landmarks.
    face_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    face_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in face_landmarks
    ])

    solutions.drawing_utils.draw_landmarks(
        image=annotated_image,
        landmark_list=face_landmarks_proto,
        connections=mp.solutions.face_mesh.FACEMESH_TESSELATION,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp.solutions.drawing_styles
        .get_default_face_mesh_tesselation_style())
    solutions.drawing_utils.draw_landmarks(
        image=annotated_image,
        landmark_list=face_landmarks_proto,
        connections=mp.solutions.face_mesh.FACEMESH_CONTOURS,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp.solutions.drawing_styles
        .get_default_face_mesh_contours_style())
    solutions.drawing_utils.draw_landmarks(
        image=annotated_image,
        landmark_list=face_landmarks_proto,
        connections=mp.solutions.face_mesh.FACEMESH_IRISES,
          landmark_drawing_spec=None,
          connection_drawing_spec=mp.solutions.drawing_styles
          .get_default_face_mesh_iris_connections_style())

  return annotated_image

def plot_face_blendshapes_bar_graph(face_blendshapes):
  # Extract the face blendshapes category names and scores.
  face_blendshapes_names = [face_blendshapes_category.category_name for face_blendshapes_category in face_blendshapes]
  face_blendshapes_scores = [face_blendshapes_category.score for face_blendshapes_category in face_blendshapes]
  # The blendshapes are ordered in decreasing score value.
  face_blendshapes_ranks = range(len(face_blendshapes_names))

  fig, ax = plt.subplots(figsize=(12, 12))
  bar = ax.barh(face_blendshapes_ranks, face_blendshapes_scores, label=[str(x) for x in face_blendshapes_ranks])
  ax.set_yticks(face_blendshapes_ranks, face_blendshapes_names)
  ax.invert_yaxis()

  # Label each bar with values
  for score, patch in zip(face_blendshapes_scores, bar.patches):
    plt.text(patch.get_x() + patch.get_width(), patch.get_y(), f"{score:.4f}", va="top")

  ax.set_xlabel('Score')
  ax.set_title("Face Blendshapes")
  plt.tight_layout()
  plt.show()

In [2]:
# detecting facial landmarks
def detect_landmarks_face(frame, detector):
    rgb_frame = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
    detection_result = detector.detect(rgb_frame)
    annotated_image_face = draw_landmarks_on_image_face(rgb_frame.numpy_view(), detection_result)
    return annotated_image_face, detection_result

In [3]:
# defining a function for calculating euclidean distance
def calc_euclidean(point1, point2):
    # ith point
    point1 = np.array(point1)
    # jth point
    point2 = np.array(point2)
    # calculating sq.error
    sq_error_point = (point1 - point2) ** 2
    # calulating sum of sq.error
    sum_sq_error = sq_error_point.sum()
    # calculating euclidean distance
    euclidean_distance = np.sqrt(sum_sq_error)
    return euclidean_distance

In [4]:
# importing the necessary modules.
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import cv2 as cv

def get_face_features(video_name):
    # initiating the distance list for measuring the distance between two landmarks
    dist_0 = []; dist_1 = []; dist_2 = []; dist_3 = []; dist_4 = []; dist_5 = []; dist_6 = []; dist_7 = []; dist_8 = []; dist_9 = []; 
    dist_10 = []; dist_11 = []; dist_12 = []; dist_13 = []; dist_14= []; dist_15 = []; 

    # creating an FaceLandmarker object.
    base_options = python.BaseOptions(model_asset_path='face_landmarker.task')
    options = vision.FaceLandmarkerOptions(base_options=base_options,
                                        output_face_blendshapes=False,
                                        output_facial_transformation_matrixes=False,
                                        num_faces=1)
    detector = vision.FaceLandmarker.create_from_options(options)

    # defining a video capture object------------------------------------------------------------------Video Name is Provided Here---------------------- 
    vid = cv.VideoCapture(video_name) 
    # getting frame rate
    frame_rate = vid.get(cv.CAP_PROP_FPS)
    print (f"Frame rate is : {frame_rate}")

    # calculating the interval to read one frame per second
    interval = int(frame_rate)  

    frame_counter = 0 

    while True:
        # capturing frame-by-frame
        ret, frame = vid.read()
        # if frame is read correctly ret is True
        if not ret:
            print("Can't receive frame (stream end?). Exiting ...")
            break

        # getting one frame per second
        if frame_counter % interval == 0:
            annotated_image_face, detection_result = detect_landmarks_face(frame, detector)
            face_dat = detection_result.face_landmarks
            # appending the face_points in a list
            if len(face_dat):
                face_dat = face_dat[0]
                face_points = np.array(np.array([[landmark.x, landmark.y, landmark.z] for landmark in face_dat]))
                #print (face_points)

                # Extracting 06 random landmarks of face
                face_point_0 = face_points[0]
                face_point_1 = face_points[52]
                face_point_2 = face_points[123]
                face_point_3 = face_points[180]
                face_point_4 = face_points[260]
                face_point_5 = face_points[25]
                face_point_6 = face_points[400]
                face_point_7 = face_points[222]
                face_point_8 = face_points[178]
                face_point_9 = face_points[321]
                face_point_10 = face_points[84]
        
                # calculating the euclidean distance between face_point_0 and face_point_1
                distance_0 = calc_euclidean(face_point_0, face_point_1)
                # appending
                dist_0.append(distance_0)
                # calculating the euclidean distance between face_point_1 and face_point_2
                distance_1 = calc_euclidean(face_point_1, face_point_2)
                # appending
                dist_1.append(distance_1)
                # calculating the euclidean distance between face_point_2 and face_point_3
                distance_2 = calc_euclidean(face_point_2, face_point_3)
                # appending
                dist_2.append(distance_2)
                # calculating the euclidean distance between face_point_3 and face_point_4
                distance_3 = calc_euclidean(face_point_3, face_point_4)
                # appending
                dist_3.append(distance_3)
                # calculating the euclidean distance between face_point_4 and face_point_5
                distance_4 = calc_euclidean(face_point_4, face_point_5)
                # appending
                dist_4.append(distance_4)
                # calculating the euclidean distance between face_point_4 and face_point_5
                distance_5 = calc_euclidean(face_point_5, face_point_6)
                # appending
                dist_5.append(distance_5)
                # calculating the euclidean distance between face_point_4 and face_point_5
                distance_6 = calc_euclidean(face_point_6, face_point_7)
                # appending
                dist_6.append(distance_6)
                # calculating the euclidean distance between face_point_4 and face_point_5
                distance_7 = calc_euclidean(face_point_7, face_point_8)
                # appending
                dist_7.append(distance_7)
                # calculating the euclidean distance between face_point_4 and face_point_5
                distance_8 = calc_euclidean(face_point_8, face_point_9)
                # appending
                dist_8.append(distance_8)
                # calculating the euclidean distance between face_point_4 and face_point_5
                distance_9 = calc_euclidean(face_point_9, face_point_10)
                # appending
                dist_9.append(distance_9)
                # calculating the euclidean distance between face_point_4 and face_point_5
                distance_10 = calc_euclidean(face_point_0, face_point_3)
                # appending
                dist_10.append(distance_10)
                # calculating the euclidean distance between face_point_4 and face_point_5
                distance_11 = calc_euclidean(face_point_1, face_point_5)
                # appending
                dist_11.append(distance_11)
                # calculating the euclidean distance between face_point_4 and face_point_5
                distance_12 = calc_euclidean(face_point_2, face_point_9)
                # appending
                dist_12.append(distance_12)
                # calculating the euclidean distance between face_point_4 and face_point_5
                distance_13 = calc_euclidean(face_point_3, face_point_8)
                # appending
                dist_13.append(distance_13)
                # calculating the euclidean distance between face_point_4 and face_point_5
                distance_14 = calc_euclidean(face_point_4, face_point_10)
                # appending
                dist_14.append(distance_14)
                # calculating the euclidean distance between face_point_4 and face_point_5
                distance_15 = calc_euclidean(face_point_2, face_point_7)
                # appending
                dist_15.append(distance_15)
            
            cv.imshow("result", annotated_image_face)

        #annotated_image_face = detect_landmarks_face(frame)
        #cv2.imshow("result", annotated_image_face)
    
    #final_img = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
        if cv.waitKey(1) == ord('q'):
            break
    # When everything done, release the capture
    vid.release()
    cv.destroyAllWindows()
    # returning features
    return (dist_0, dist_1, dist_2, dist_3, dist_4, dist_5, dist_6, dist_7, dist_8, dist_9, dist_10, dist_11, dist_12, dist_13, dist_14, dist_15)

In [5]:
# importing pandas for creating the dataframe
import pandas as pd
# exporting the extracted features to a pandas dataframe and make it .csv file for sattistical analysis in future
def create_face_csv(dist_0, dist_1, dist_2, dist_3, dist_4, dist_5, dist_6, dist_7, dist_8, dist_9, dist_10, dist_11, dist_12, dist_13, dist_14, dist_15, state):
        # creating a data frame and store the distane list in 'em
        dataframe_face = {'distance_face_1'+"_"+state: dist_0,
                'distance_face_2'+"_"+state: dist_1,
                'distance_face_3'+"_"+state: dist_2,
                'distance_face_4'+"_"+state: dist_3,
                'distance_face_5'+"_"+state: dist_4,
                'distance_face_6'+"_"+state: dist_5,
                'distance_face_7'+"_"+state: dist_6,
                'distance_face_8'+"_"+state: dist_7,
                'distance_face_9'+"_"+state: dist_8,
                'distance_face_10'+"_"+state: dist_9,
                'distance_face_11'+"_"+state: dist_10,
                'distance_face_12'+"_"+state: dist_11,
                'distance_face_13'+"_"+state: dist_12,
                'distance_face_14'+"_"+state: dist_13,
                'distance_face_15'+"_"+state: dist_14,
                'distance_face_16'+"_"+state: dist_15}

        df_face = pd.DataFrame(dataframe_face)
        # saving the dataframe as a .csv file for further analysis
        df_face.to_csv('facial_landmark_distances_' + state + '.csv', index=False)
        df_face

### creating features and saving the data

In [6]:
# creating for speaking state - facial features
dist_0, dist_1, dist_2, dist_3, dist_4, dist_5, dist_6, dist_7, dist_8, dist_9, dist_10, dist_11, dist_12, dist_13, dist_14, dist_15 = get_face_features("speaking_state.mp4")
create_face_csv(dist_0, dist_1, dist_2, dist_3, dist_4, dist_5, dist_6, dist_7, dist_8, dist_9, dist_10, dist_11, dist_12, dist_13, dist_14, dist_15, "speaking")

# creating for idle state - facial features
dist_0, dist_1, dist_2, dist_3, dist_4, dist_5, dist_6, dist_7, dist_8, dist_9, dist_10, dist_11, dist_12, dist_13, dist_14, dist_15 = get_face_features("idle_state.mp4")
create_face_csv(dist_0, dist_1, dist_2, dist_3, dist_4, dist_5, dist_6, dist_7, dist_8, dist_9, dist_10, dist_11, dist_12, dist_13, dist_14, dist_15, "idle")

Frame rate is : 30.0
Can't receive frame (stream end?). Exiting ...
Frame rate is : 30.0
Can't receive frame (stream end?). Exiting ...


## Hand Gesture Landmark Detection

In [7]:
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import numpy as np
import cv2 as cv

MARGIN = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
HANDEDNESS_TEXT_COLOR = (88, 205, 54) # vibrant green

def draw_landmarks_on_image_hand(rgb_image, detection_result):
  hand_landmarks_list = detection_result.hand_landmarks
  handedness_list = detection_result.handedness
  annotated_image = np.copy(rgb_image)

  # Loop through the detected hands to visualize.
  for idx in range(len(hand_landmarks_list)):
    hand_landmarks = hand_landmarks_list[idx]
    handedness = handedness_list[idx]

    # Draw the hand landmarks.
    hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    hand_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
      annotated_image,
      hand_landmarks_proto,
      solutions.hands.HAND_CONNECTIONS,
      solutions.drawing_styles.get_default_hand_landmarks_style(),
      solutions.drawing_styles.get_default_hand_connections_style())

    # Get the top left corner of the detected hand's bounding box.
    height, width, _ = annotated_image.shape
    x_coordinates = [landmark.x for landmark in hand_landmarks]
    y_coordinates = [landmark.y for landmark in hand_landmarks]
    text_x = int(min(x_coordinates) * width)
    text_y = int(min(y_coordinates) * height) - MARGIN

    # Draw handedness (left or right hand) on the image.
    cv.putText(annotated_image, f"{handedness[0].category_name}",
                (text_x, text_y), cv.FONT_HERSHEY_DUPLEX,
                FONT_SIZE, HANDEDNESS_TEXT_COLOR, FONT_THICKNESS, cv.LINE_AA)

  return annotated_image

In [8]:
# detecting hand gesture features
def detect_landmarks_hand(frame, detector):
     rgb_frame = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
     detection_result = detector.detect(rgb_frame)
     annotated_image_hand = draw_landmarks_on_image_hand(rgb_frame.numpy_view(), detection_result)
     return annotated_image_hand, detection_result

In [9]:
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import cv2 as cv

def get_hand_features(video_name):
    # initiating the distance list for measuring the distance between two landmarks
    dist_0 = []; dist_1 = []; dist_2 = []; dist_3 = []; dist_4 = []; dist_5 = []; dist_6 = []; dist_7 = []; dist_8 = []; dist_9 = []; 

    # creating an FaceLandmarker object.
    base_options = python.BaseOptions(model_asset_path='hand_landmarker.task')
    options = vision.HandLandmarkerOptions(base_options=base_options,
                                        num_hands=2)
    detector = vision.HandLandmarker.create_from_options(options)

    # defining a video capture object------------------------------------------------------------------Video Name is Provided Here---------------------- 
    vid = cv.VideoCapture(video_name) 
    # getting frame rate
    frame_rate = vid.get(cv.CAP_PROP_FPS)
    print (f"Frame rate is : {frame_rate}")

    # calculting the interval to read one frame per second
    interval = int(frame_rate)  

    frame_counter = 0  

    while True:
        # capturing frame-by-frame
        ret, frame = vid.read()
        # if frame is read correctly ret is True
        if not ret:
            print("Can't receive frame (stream end?). Exiting ...")
            break

        # getting one frame per second
        if frame_counter % interval == 0:
            annotated_image_hand, detection_result = detect_landmarks_hand(frame, detector)
            hand_dat = detection_result.hand_landmarks
            # appending the hand_points in a list
            if len(hand_dat):
                hand_dat = hand_dat[0]
                hand_points = np.array(np.array([[landmark.x, landmark.y, landmark.z] for landmark in hand_dat]))

                # Extracting 06 random landmarks of hand
                hand_point_0 = hand_points[0]
                hand_point_1 = hand_points[4]
                hand_point_2 = hand_points[8]
                hand_point_3 = hand_points[13]
                hand_point_4 = hand_points[17]
                hand_point_5 = hand_points[20]
                hand_point_6 = hand_points[2]
                hand_point_7 = hand_points[5]
                hand_point_8 = hand_points[15]
                hand_point_9 = hand_points[18]
        
                # calculating the euclidean distance between hand_point_0 and hand_point_1
                distance_0 = calc_euclidean(hand_point_0, hand_point_1)
                # appending
                dist_0.append(distance_0)
                # calculating the euclidean distance between hand_point_1 and hand_point_2
                distance_1 = calc_euclidean(hand_point_1, hand_point_2)
                # appending
                dist_1.append(distance_1)
                # calculating the euclidean distance between hand_point_2 and hand_point_3
                distance_2 = calc_euclidean(hand_point_2, hand_point_3)
                # appending
                dist_2.append(distance_2)
                # calculating the euclidean distance between hand_point_3 and hand_point_4
                distance_3 = calc_euclidean(hand_point_3, hand_point_4)
                # appending
                dist_3.append(distance_3)
                # calculating the euclidean distance between hand_point_4 and hand_point_5
                distance_4 = calc_euclidean(hand_point_4, hand_point_5)
                # appending
                dist_4.append(distance_4)
                # calculating the euclidean distance between hand_point_4 and hand_point_5
                distance_5 = calc_euclidean(hand_point_5, hand_point_6)
                # appending
                dist_5.append(distance_5)
                # calculating the euclidean distance between hand_point_4 and hand_point_5
                distance_6 = calc_euclidean(hand_point_6, hand_point_7)
                # appending
                dist_6.append(distance_6)
                # calculating the euclidean distance between hand_point_4 and hand_point_5
                distance_7 = calc_euclidean(hand_point_7, hand_point_8)
                # appending
                dist_7.append(distance_7)
                # calculating the euclidean distance between hand_point_4 and hand_point_5
                distance_8 = calc_euclidean(hand_point_8, hand_point_9)
                # appending
                dist_8.append(distance_8)
                # calculating the euclidean distance between hand_point_4 and hand_point_5
                distance_9 = calc_euclidean(hand_point_9, hand_point_1)
                # appending
                dist_9.append(distance_9)
            
            cv.imshow("result", annotated_image_hand)

        #annotated_image_face = detect_landmarks_face(frame)
        #cv2.imshow("result", annotated_image_face)
    
    #final_img = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
        if cv.waitKey(1) == ord('q'):
            break
    # When everything done, release the capture
    vid.release()
    cv.destroyAllWindows()
    #returning the features
    return (dist_0, dist_1, dist_2, dist_3, dist_4, dist_5, dist_6, dist_7, dist_8, dist_9)

In [10]:
# importing pandas for creating the dataframe
import pandas as pd

# exporting the extracted features to a pandas dataframe and make it .csv file for sattistical analysis in future
def create_hand_csv(dist_0, dist_1, dist_2, dist_3, dist_4, dist_5, dist_6, dist_7, dist_8, dist_9, state):
        # creating a data frame and store the distane list in 'em
        dataframe_face = {'distance_hand_1'+"_"+state: dist_0,
                'distance_hand_2'+"_"+state: dist_1,
                'distance_hand_3'+"_"+state: dist_2,
                'distance_hand_4'+"_"+state: dist_3,
                'distance_hand_5'+"_"+state: dist_4,
                'distance_hand_6'+"_"+state: dist_5,
                'distance_hand_7'+"_"+state: dist_6,
                'distance_hand_8'+"_"+state: dist_7,
                'distance_hand_9'+"_"+state: dist_8,
                'distance_hand_10'+"_"+state: dist_9}

        df_face = pd.DataFrame(dataframe_face)
        # saving the dataframe as a .csv file for further analysis
        df_face.to_csv('hand_landmark_distances_' + state + '.csv', index=False)
        df_face

### creating features and saving data

In [11]:
# creating for speaking state - hand gesture features
dist_0, dist_1, dist_2, dist_3, dist_4, dist_5, dist_6, dist_7, dist_8, dist_9 = get_hand_features("speaking_state.mp4")
create_hand_csv(dist_0, dist_1, dist_2, dist_3, dist_4, dist_5, dist_6, dist_7, dist_8, dist_9, "speaking")

# creating for idle state - hand gesture features
dist_0, dist_1, dist_2, dist_3, dist_4, dist_5, dist_6, dist_7, dist_8, dist_9 = get_hand_features("idle_state.mp4")
create_hand_csv(dist_0, dist_1, dist_2, dist_3, dist_4, dist_5, dist_6, dist_7, dist_8, dist_9, "idle")

Frame rate is : 30.0
Can't receive frame (stream end?). Exiting ...
Frame rate is : 30.0
Can't receive frame (stream end?). Exiting ...
