<a href="https://colab.research.google.com/github/abdokamel2001/ASL-Translation-Project/blob/main/2023-10-Sprint2-Shared-Functions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# This Notebook Contains the Shared Functions across both Notebooks

In [None]:
!pip install -q mediapipe

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m33.6/33.6 MB[0m [31m46.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import cv2
import numpy as np
import mediapipe as mp
import matplotlib.pyplot as plt

# MediaPipe Functions

In [None]:
hands = mp.solutions.hands.Hands()
pose = mp.solutions.pose.Pose()
face_mesh = mp.solutions.face_mesh.FaceMesh()

In [None]:
def get_frame_landmarks(frame):
    """
    Extracts landmarks from a single video frame using MediaPipe.

    Args:
        frame: A single rgb frame/image.

    Returns:
        np.array: A NumPy array containing extracted landmarks.
        The output dimensions are (n, 3) array, where n is the number of landmarks.
        Each row in the array represents a landmark, and each landmark is represented
        as [x, y, z], where x, y, and z are the normalized coordinates of the landmark.
    """

    results_hands = hands.process(frame)
    results_pose = pose.process(frame)
    results_face = face_mesh.process(frame)

    landmarks_per_hand = 21
    landmarks_body_pose = 33
    landmarks_face = 468         # Max 468

    all_landmarks = np.zeros((landmarks_per_hand * 2 + landmarks_body_pose + landmarks_face, 3))

    if results_hands.multi_hand_landmarks:
        all_landmarks[:landmarks_per_hand, :] = np.array([(lm.x, lm.y, lm.z) for lm in results_hands.multi_hand_landmarks[0].landmark])
        if len(results_hands.multi_hand_landmarks) > 1:
            all_landmarks[landmarks_per_hand:landmarks_per_hand * 2, :] = np.array([(lm.x, lm.y, lm.z) for lm in results_hands.multi_hand_landmarks[1].landmark])

    if results_pose.pose_landmarks:
        all_landmarks[landmarks_per_hand * 2:landmarks_per_hand * 2 + landmarks_body_pose, :] = np.array([(lm.x, lm.y, lm.z) for lm in results_pose.pose_landmarks.landmark])

    if results_face.multi_face_landmarks:
        # all_landmarks[landmarks_per_hand * 2 + landmarks_body_pose:, :] = np.array([(lm.x, lm.y, lm.z) for lm in results_face.multi_face_landmarks[0].landmark[::468 // landmarks_face]])
        all_landmarks[landmarks_per_hand * 2 + landmarks_body_pose:, :] = np.array([(lm.x, lm.y, lm.z) for lm in results_face.multi_face_landmarks[0].landmark])

    return all_landmarks

In [None]:
def get_video_landmarks(video_path, start_frame=0, end_frame=-1, num_landmarks=543):
    """
    Extracts landmarks from a video by processing each frame in the video.

    Args:
        video_path (str): The file path to the video to process.
        start_frame (int): The index of the starting frame (default is 0).
        end_frame (int): The index of the ending frame (default is -1, meaning the last frame).

    Returns:
        np.array: A NumPy array where each row corresponds to the landmarks
        extracted from a single frame of the video within the specified frame range.
        The dimensions of the output array are (m, n, 3), where m is the number of frames
        within the specified range and n is the number of landmarks.
        Each element in the array is a 3D coordinate representing a landmark's position.
    """

    cap = cv2.VideoCapture(video_path)
    if end_frame < 0:
        end_frame = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    if start_frame < 0:
        start_frame = 0

    all_frame_landmarks = np.zeros((end_frame - start_frame, num_landmarks, 3))
    frame_index = 0

    while cap.isOpened() and frame_index != end_frame:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_index >= start_frame:
            frame.flags.writeable = False
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_landmarks = get_frame_landmarks(frame)
            all_frame_landmarks[frame_index - start_frame] = frame_landmarks

        frame_index += 1

    cap.release()

    return all_frame_landmarks

# OpenCV Functions

In [None]:
def draw_landmarks(input_path, output_path, npy_file, start_frame=0, end_frame=-1):
    """
    Reads a video from the input file, overlays landmarks on each frame, and saves the result to an output video file.

    Args:
        input_path (str): The path to the input video file.
        output_path (str): The path to save the output video with overlaid facial landmarks.
        npy_file (str): The path to a NumPy file containing facial landmarks data for each frame.
        start_frame (int): The index of the starting frame for landmark overlay (default is 0).
        end_frame (int): The index of the ending frame for landmark overlay (default is -1, meaning the last frame).

    Description:
        This function reads a video from the input file, extracts facial landmarks data from a NumPy file,
        and overlays landmarks on each frame of the video. The frames within the specified range,
        from 'start_frame' (inclusive) to 'end_frame' (exclusive), are processed. Facial landmarks are drawn as
        red circles on the face, hands, and body in each frame. The output video is saved to the 'output_path'
        with the same resolution and frame rate as the input video.
    """

    cap = cv2.VideoCapture(input_path)
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    # fourcc = int(cap.get(cv2.CAP_PROP_FOURCC))
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    landmarks_data = np.load(npy_file)
    frame_index = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        if frame_index >= start_frame and frame_index < end_frame:
            landmarks_frame = landmarks_data[frame_index - start_frame]
            landmarks = [(int(x * width), int(y * height)) for x, y, z in landmarks_frame]
            for x, y in landmarks:
                cv2.circle(frame, (x, y), 3, (0, 0, 255), -1)

        out.write(frame)
        frame_index += 1

    cap.release()
    out.release()

# OpenCV Test

In [None]:
!cp -r "/content/drive/MyDrive/AI Team/Tasks/2023-10-Sprint2/OpenCV_Test" "/content"

In [None]:
draw('/content/OpenCV_Test/mCjHYreiZ24.mp4', '/content/Edited.mp4', '/content/OpenCV_Test/thanks.npy', 0, 89)