This script will create numpy array (.npy files) for each frames in each video and save them accordingly in a folder automatically.
Just run the script and it will work automatically

In [None]:
import cv2
import mediapipe as mp
import numpy as np
import os
from matplotlib import pyplot as plt
import time

In [2]:
mp_holistic = mp.solutions.holistic # Holistic model is used for pose detection
mp_drawing = mp.solutions.drawing_utils # Drawing utilities on the image detected by the model

In [3]:
def mediapipe_detection(image, model):

    # Convert the BGR image to RGB before processing
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # BGR to RGB because the model is trained on RGB images

    # Image is no longer writeable
    image.flags.writeable= False  

    # Make prediction
    results = model.process(image) # Make prediction on the image using the model and store the results in results

    # Image is now writeable
    image.flags.writeable = True 

    # Convert back to BGR
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # RGB to BGR because we are going to display the image using cv2
    return image, results # Return the image and the results. Results contain the landmarks of the image

In [4]:
def draw_styled_landmarks (image, results):
    # Draw face connections
    # mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, # Draw the face landmarks
    # mp_drawing. DrawingSpec (color=(80,110,10), thickness=1, circle_radius=1), # Draw the face connections
    # mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1) # Draw the face connections
    # )
    
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic. POSE_CONNECTIONS, # Draw the pose landmarks
    mp_drawing. DrawingSpec (color=(80,22,10), thickness=2, circle_radius=4), # Draw the pose connections
    mp_drawing. DrawingSpec (color=(80,44,121), thickness=2, circle_radius=2) # Draw the pose connections
    )

    # Draw Left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic. HAND_CONNECTIONS, # Draw the left hand landmarks 
    mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), # Draw the left hand connections
    mp_drawing.DrawingSpec (color= (121,44,250), thickness=2, circle_radius=2) # Draw the left hand connections
    )

    # Draw right hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, # Draw the right hand landmarks
    mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), # Draw the right hand connections
    mp_drawing. DrawingSpec (color=(245,66,230), thickness=2, circle_radius=2) # Draw the right hand connections
    )

In [5]:
def extract_keypoints(results): # Extract the landmarks from the results
    # Extract pose landmarks
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    
    # Extract face landmarks
    # face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    
    # Extract left hand landmarks
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    
    # Extract right hand landmarks
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)

    # Concatenate the landmarks
    return np.concatenate([pose, lh, rh])

In [6]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('MP_Data_from_VIDEO') # Create a folder named MP_Data in the current directory

# Actions that we try to detect
actions = np.array(['again','boy','deaf','finish','girl','go','goodbye','hearing','hello','help',
'how','i','know','like','love','man','more','my','need','no',
'none','not','please','right','school','science','sentence','sign-language','sorry','sports',
'student','thank-you','that','want','what','when','where','who','why','with',
'without','woman','yes','you','your','i-love-you','youre-welcome','intelligent','friend','come-here',
'go-away','see-you-later','baby','name'])

no_videos = 60  # Number of sequences for each action
sequence_length = 60  # Number of frames in each sequence


In [7]:
for action in actions: # For each action
    for sequence in range(no_videos): # For each sequence
        try:
            os.makedirs(os.path. join(DATA_PATH, action, str(sequence))) # Create a folder for each sequence
        except:
            pass

In [8]:
#convert each videos in video folder to numpy array

for action in actions:
    for sequence in range(no_videos):

        cap = cv2.VideoCapture('Videos_Data/{}/{}_{}.mp4'.format(action, action, sequence))

        with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic: # Initialize the holistic model
           
            # Loop through video
            for frame_num in range(sequence_length):
                ret, frame = cap.read()
                image, results = mediapipe_detection(frame, holistic) # Make prediction on the image

                keypoints = extract_keypoints(results) # Extract the keypoints from the results i.e. the landmarks
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num)) # Create a path to save the keypoints
                np.save(npy_path, keypoints)
        
        cap.release()

cv2.destroyAllWindows()
                            

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
