In [None]:
# 1_video_processing

# get video files from LSFB dataset
# use mediapipe holistics to extract x, y, z coordinates of pose, face and hands
# create one dataframe per video
# save data

In [None]:
# import libraries

import os

import pandas as pd
import numpy as np

import cv2
import mediapipe.python.solutions.holistic as mp_holistic

In [None]:
# extract landmarks from video files

# directory to be scanned
path_from = "/SFL_data/videos/"

# directory to save features
path_to = "/path_of_directory_where_dataframes_will_be_saved/"

# instance of feature extractor
holistic = mp_holistic.Holistic(
    static_image_mode = False,
    model_complexity = 2,
    refine_face_landmarks = True)

# scan the dataset directory
obj_word = os.scandir(path_from)

# set file id
file_id = 0
 
# loop through words
for entry_word in obj_word :
    if entry_word.is_dir():
        path_word = entry_word.path
        obj_file = os.scandir(path_word)
        word = entry_word.name.split("_")[1]

        # loop through files
        for entry_file in obj_file:
            if entry_file.is_file() and entry_file.name.endswith(".mp4"):
                file_id += 1
                print(file_id)
                file_name_in = entry_file.name.split(".")[0] 
                file_name_out = word + "_" + str(file_id) + "_" + file_name_in + ".csv"
                
                # instance of video object
                video_current = cv2.VideoCapture(entry_file.path)

                # initialise temporary variables
                pose_all = []
                face_all = []
                left_all = []
                right_all = []
                count_frame_total = 0
                count_frame_missing = 0
                
                # open video object
                while video_current.isOpened():
                    success, image = video_current.read()
                    if success:

                        # get all landmarks per video frame
                        count_frame_total += 1
                        image.flags.writeable = False
                        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                        results = holistic.process(image)

                        # extract pose landmarks
                        pose = []
                        if results.pose_landmarks is None:
                            count_frame_missing += 1 
                            continue
                        else:
                            for landmark in results.pose_landmarks.landmark:
                                pose.append(landmark.x)
                                pose.append(landmark.y)
                                pose.append(landmark.z)
                            pose_all.append(pose)
                        
                        # extract face landmarks
                        face = []
                        if results.face_landmarks is None:
                            count_frame_missing += 1 
                            continue
                        else:
                            for landmark in results.face_landmarks.landmark:
                                face.append(landmark.x)
                                face.append(landmark.y)
                                face.append(landmark.z)
                            face_all.append(face)
                        
                        # extract left hand landmarks
                        left = []
                        if results.left_hand_landmarks is None:
                            count_frame_missing += 1 
                            continue
                        else:
                            for landmark in results.left_hand_landmarks.landmark:
                                left.append(landmark.x)
                                left.append(landmark.y)
                                left.append(landmark.z)
                            left_all.append(left)
                        

                        # extract right hand landmarks
                        right = []
                        if results.right_hand_landmarks is None:
                            count_frame_missing += 1 
                            continue
                        else:
                            for landmark in results.right_hand_landmarks.landmark:
                                right.append(landmark.x)
                                right.append(landmark.y)
                                right.append(landmark.z)
                            right_all.append(right)
                    
                    # stop when video ends
                    else:
                        video_current.release()
                        break
                    
                # get data for all frames
                pose_df = pd.DataFrame(pose_all)
                face_df = pd.DataFrame(face_all)
                left_df = pd.DataFrame(left_all)
                right_df = pd.DataFrame(right_all)

                # assemble data
                data = pd.concat([pose_df, face_df, left_df, right_df], axis = 1)
                data.columns = range(0,data.shape[1])

                # save data 
                data.to_csv(path_to + file_name_out, index = False)