In [2]:
import cv2
import numpy as np
import os
import mediapipe as mp

objc[12100]: Class CaptureDelegate is implemented in both /Users/yannick/opt/anaconda3/envs/test-env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x1356124d0) and /Users/yannick/opt/anaconda3/envs/test-env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_videoio.3.4.16.dylib (0x131798860). One of the two will be used. Which one is undefined.
objc[12100]: Class CVWindow is implemented in both /Users/yannick/opt/anaconda3/envs/test-env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x135612520) and /Users/yannick/opt/anaconda3/envs/test-env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x112df0a68). One of the two will be used. Which one is undefined.
objc[12100]: Class CVView is implemented in both /Users/yannick/opt/anaconda3/envs/test-env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x135612548) and /Users/yannick/opt/anaconda3/envs/test-env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x112df0a90). One of the two w

# This file contains all Functions and relevant code parts to extract keypoints from all videos inside of a given base Path

### General important Information to be defined for running this can be seen and adjusted below:

## Mediapipe Helper Functions

In [3]:
def process_image(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 
    return image, results

In [4]:
def visualize_landmarks(image, results):
    #mp.solutions.drawing_utils.draw_landmarks(image, results.face_landmarks, mp.solutions.holistic.FACEMESH_CONTOURS)
    mp.solutions.drawing_utils.draw_landmarks(image, results.pose_landmarks, mp.solutions.holistic.POSE_CONNECTIONS)
    mp.solutions.drawing_utils.draw_landmarks(image, results.left_hand_landmarks, mp.solutions.holistic.HAND_CONNECTIONS)
    mp.solutions.drawing_utils.draw_landmarks(image, results.right_hand_landmarks, mp.solutions.holistic.HAND_CONNECTIONS) 

In [5]:
def extract_keypoints(results):        
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    left_hand = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    right_hand = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, left_hand, right_hand])

# Video Processing, Keypoint Extraction and Exporting Logic

In [6]:
def mp_db_processing(path, words):
    # Set mediapipe model 
    with mp.solutions.holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic_model:
        # Looping through subset of words we choose to work with 
        for word in words:
            dir_path = path + "/" + str(word) + "/"
            # Loop through videos
            for video in os.listdir(dir_path):
                if video.endswith('.mp4'):
                    # Importing videos and extracting frame amount
                    vid_file = cv2.VideoCapture(dir_path + str(video))
                    frame_amount = int(vid_file.get(cv2.CAP_PROP_FRAME_COUNT))
            
                    # Loop through video frame sequence length
                    for frame_nr in range(frame_amount):
                        ret, frame = vid_file.read()
                        image, results = process_image(frame, holistic_model)
                        
                        # Exporting Keypoints
                        keypoints = extract_keypoints(results)

                        # Creating 'word'_data folder if it does not exist 
                        _datadir = os.path.join(path, str(word) + "_data")
                        if not (os.path.exists(_datadir)):
                            os.makedirs(_datadir)
                        
                        current_directory = os.path.join(_datadir , str(video.split(".")[0]))

                        # Creates needed directory if it doesn't exist already
                        if not (os.path.exists(current_directory)):
                            os.makedirs(current_directory)
                        np.save(os.path.join(current_directory, word) + str(frame_nr), keypoints)
                else:
                    continue
                    
        vid_file.release()

In [30]:
'''
path = os.path.join(os.getcwd(), "../extended_subset")
words =  np.array(['hvad', 'ja', 'soed', 'mange', 'nej', 'nu', 'fordi', 'koebe', 'tid'])
mp_db_processing(path, words)
'''

# Extracting Keypoints for specific single Video

In [6]:
def mp_sv_processing(vid_path):
    # Set mediapipe model 
    with mp.solutions.holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic_model:
        vid_file = cv2.VideoCapture(vid_path)
        num_frames = int(vid_file.get(cv2.CAP_PROP_FRAME_COUNT))
        keypoint_data = []
        # Loop through video length aka sequence length
        for frame in range(num_frames):
            ret, frame = vid_file.read()
            image, results = process_image(frame, holistic_model)
            
            keypoints = extract_keypoints(results)
            keypoint_data.append(keypoints)
        
        vid_file.release()

    return np.asarray(keypoint_data)

# Testing and Visualization

In [7]:
def visualize_video_extraction(path):
    # Currently opening video camera feed but can be replaced by specific video file paths
    cap = cv2.VideoCapture(path)
    # Set mediapipe model 
    with mp.solutions.holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        while cap.isOpened():
            ret, frame = cap.read()
            image, results = process_image(frame, holistic)
            visualize_landmarks(image, results)
            cv2.imshow('OpenCV Feed', image)

            # Break gracefully
            if cv2.waitKey(10) & 0xFF == ord('q'):
                break

        cap.release()
        cv2.destroyAllWindows()
        cv2.waitKey(1)

In [6]:
def testing(vid_path, path):
    # Set mediapipe model 
    with mp.solutions.holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic_model:
        vid_file = cv2.VideoCapture(vid_path)
        num_frames = int(vid_file.get(cv2.CAP_PROP_FRAME_COUNT))
        keypoint_data = []
        # Loop through video length aka sequence length
        for frame in range(num_frames):
            ret, frame = vid_file.read()
            image, results = process_image(frame, holistic_model)
            
            visualize_landmarks(image, results)
            cv2.imshow(path, image)
            keypoints = extract_keypoints(results)
            keypoint_data.append(keypoints)

            # Break gracefully
            if cv2.waitKey(10) & 0xFF == ord('q'):
                break
        
        vid_file.release()
        cv2.destroyAllWindows()
        cv2.waitKey(1)


In [52]:
# Fordi32 is a good example for pose landmarks bugging out
#testing("../extended_subset/fordi/fordi32.mp4", "")

testing("../extended_subset/nej/nej25.mp4", "")

In [23]:
'''
#dir_path= "../extended_data/soed"
dir_path = os.path.join(os.getcwd(), "../extended_subset/fordi")
for path in os.listdir(dir_path):
    # check if current path is a file
    if os.path.isfile(os.path.join(dir_path, path)):
        testing(os.path.join(dir_path, path), path)
        print
'''

OpenCV: Couldn't read video stream from file "/Users/yannick/Library/Mobile Documents/com~apple~CloudDocs/Documents/GitHub/danish-sign-language-translator/src/../extended_subset/fordi/fordi15"
OpenCV: Couldn't read video stream from file "/Users/yannick/Library/Mobile Documents/com~apple~CloudDocs/Documents/GitHub/danish-sign-language-translator/src/../extended_subset/fordi/fordi6"
[mov,mp4,m4a,3gp,3g2,mj2 @ 0x28341a600] moov atom not found
OpenCV: Couldn't read video stream from file "/Users/yannick/Library/Mobile Documents/com~apple~CloudDocs/Documents/GitHub/danish-sign-language-translator/src/../extended_subset/fordi/fordi11.mov"
