# Necessary imports

In [24]:
import mediapipe as mp
import numpy as np
import os
from tqdm import tqdm
import cv2
import shutil

# Helper functions

In [25]:
# Create a Holistic object to detect pose, face, and hands keypoints
mp_holistic = mp.solutions.holistic

# Drawing utilities
mp_drawing = mp.solutions.drawing_utils 

In [26]:
def mediapipe_detection(image, model):
  """ Takes an input image and a MediaPipe model, and applies the model to the 
  image using the MediaPipe library for object detection or pose estimation.
  Args:
    image(numpy.ndarray): a frame of our video
    model(mediapipe.python.solutions.holistic.Holistic): the mediapipe model of choice
  Returns:
    image(numpy.ndarray): The processed image in BGR format. It can be used to display the input image with any detected objects
    or keypoints overlaid on top.
    results(mediapipe.python.solution_base.SolutionOutputs): contains the detected landmarks for the face, pose, left hand and 
    right hand in an image or a video frame.
  """
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
  image.flags.writeable = False                  # Image is no longer writeable
  results = model.process(image)                 # Make prediction
  image.flags.writeable = True                   # Image is now writeable 
  image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
  return image, results

In [27]:
def adjust_landmarks(arr,center):
    """
    Adjusts the array to make the landmarks in the array relative to the center.
    Args:
        arr (numpy.ndarray): The input array of landmarks with shape (n*3,).
        center (numpy.ndarray): The center array with shape (3,) to be subtracted from arr.

    Returns:
        numpy.ndarray: The adjusted array of landmarks with shape (n*3,).

    """
    # Reshape the array to have shape (n, 3)
    arr_reshaped = arr.reshape(-1, 3)

    # Repeat the center array to have shape (n, 3)
    center_repeated = np.tile(center, (len(arr_reshaped), 1))

    # Subtract the center array from the arr array
    arr_adjusted = arr_reshaped - center_repeated

    # Reshape arr_adjusted back to shape (n*3,)
    arr_adjusted = arr_adjusted.reshape(-1)
    return(arr_adjusted)

In [28]:
def extract_keypoints(results):
    """
    Extracts keypoints from the input results object and adjusts them to make them relative to specific landmarks.

    Args:
        results: The results object containing pose and hand landmarks.

    Returns:
        tuple: A tuple containing the adjusted pose keypoints, left hand keypoints, and right hand keypoints.
        
    Note:
        - The pose keypoints are represented as a numpy.ndarray with shape (33*3,).
        - The left hand keypoints are represented as a numpy.ndarray with shape (21*3,).
        - The right hand keypoints are represented as a numpy.ndarray with shape (21*3,).
    """
    pose = np.array([[res.x, res.y, res.z] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    nose=pose[:3]
    lh_wrist=lh[:3]
    rh_wrist=rh[:3]
    pose_adjusted = adjust_landmarks(pose,nose)
    lh_adjusted = adjust_landmarks(lh,lh_wrist)
    rh_adjusted = adjust_landmarks(rh,rh_wrist)
    return pose_adjusted, lh_adjusted, rh_adjusted

# Make keypoint arrays

In [29]:
def make_keypoint_arrays(signer,split):
    """This function generates numpy arrays of keypoints for each video in the specified folder location.
    Args:
      signer(int): the signer of interest. Could be 1 or 2 or 3
      split(str): can be 'train', 'test' or 'val'
    """
    words_folder = os.path.join(str(signer), split)
    
    # Loop through all the subfolders in the folder
    for word in tqdm(os.listdir(words_folder)):
          video_files = os.listdir(os.path.join(words_folder, word))
          video_files = [v for v in video_files if v.endswith('.mp4')]
          # Loop through the video files
          for video_file in video_files:
                  if os.path.isfile(os.path.join(words_folder, word,'pose_keypoints', f'{video_file[:-4]}.npy')) and os.path.isfile(os.path.join(words_folder, word,'lh_keypoints', f'{video_file[:-4]}.npy')) and os.path.isfile(os.path.join(words_folder, word,'rh_keypoints', f'{video_file[:-4]}.npy')):
                      pass
                  else:
                      # Open the video file
                      video = cv2.VideoCapture(os.path.join(words_folder, word, video_file))
    
                      # Check if the video file was successfully opened
                      if not video.isOpened():
                          print(f'Could not open video file {video_file}')
                          continue
    
                      # Initialize the list of keypoints for this video
                      pose_keypoints, lh_keypoints, rh_keypoints = [], [], []
    
                      # Initialize the Mediapipe Holistic model
                      with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
                          # Loop through the video frames
                          while True:
                              # Read the next frame
                              ret, frame = video.read()
    
                              # Check if there are no more frames to read
                              if not ret:
                                  break
    
                              # Make detections
                              image, results = mediapipe_detection(frame, holistic)
    
                              # Extract keypoints
                              pose, lh, rh = extract_keypoints(results)
                              # Add the keypoints to the list for this video
                              pose_keypoints.append(pose)
                              lh_keypoints.append(lh)
                              rh_keypoints.append(rh)
            
                      # Release the video file
                      video.release()            
                      # Save the keypoints for this video to a numpy array
                      pose_directory = os.path.join(words_folder, word,'pose_keypoints')
                      lh_directory = os.path.join(words_folder, word,'lh_keypoints')
                      rh_directory = os.path.join(words_folder, word,'rh_keypoints')
                    
                      if not os.path.exists(pose_directory):
                        os.makedirs(pose_directory)
                        
                      if not os.path.exists(lh_directory):
                        os.makedirs(lh_directory)
                        
                      if not os.path.exists(rh_directory):
                        os.makedirs(rh_directory)
                        
                      pose_path = os.path.join(pose_directory, video_file[:-4])
                      np.save(pose_path, pose_keypoints)
                        
                      lh_path = os.path.join(lh_directory, video_file[:-4])
                      np.save(lh_path, lh_keypoints)
                        
                      rh_path = os.path.join(rh_directory, video_file[:-4])
                      np.save(rh_path, rh_keypoints)