This script converts raw videos of asl into numpy array to prepare to feed into the model
These arrays will then be consilidated into 2 csv files, one of arrays and one of labels

# Initialize

In [3]:
import cv2
import numpy as np
import os
import mediapipe as mp
import pandas as pd
import json
import pickle

In [16]:
#initialize variables

json_filepath = '/Users/jerremy/WLASL/start_kit/WLASL_v0.3.json' #enter json file path

RAW_DATA_PATH = '/Users/jerremy/Desktop/python/hackathon/raw_videos' #enter video folder path
DATA_PATH = os.path.join('KP_Data') #folder to contain keypoint numpy arrays (ENTER OWN PATH)
OUTPUT_PATH = os.path.join('output_files')

mp_holistic = mp.solutions.holistic # keypoint model
mp_drawing = mp.solutions.drawing_utils # drawing functions

# Prepare data

In [18]:
#this section sets up the actions array
content = json.load(open(json_filepath))
actions = []
for entry in content:
    gloss = entry['gloss']
    actions.append(gloss)


pd.DataFrame(actions).to_csv(f"{OUTPUT_PATH}/actions.csv")
actions = np.array(actions)
label_map = {label:num for num, label in enumerate(actions)}

# Convert to keypoints

In [19]:
#function to return keypoints from raw data
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

#function to get action of sequence from json file
    #takes in a sequence, looks for it in json file and returns action
def get_action(sequence):
    content = json.load(open(json_filepath))

    for entry in content:
        gloss = entry['gloss']
        instances = entry['instances']
        for inst in instances:
            video_id = inst['video_id']
            if sequence == video_id:
                return gloss
    raise Exception("action cannot be found")

#function to condense arrays containing the 4 groups of keypoints into single array
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

In [20]:
#looping through each video and extracting keypoints into a numpy array and saving into appropriate sequence files

count = 0
total = len(os.listdir(RAW_DATA_PATH))

for sequence in os.listdir(RAW_DATA_PATH):

    filepath = os.path.join(RAW_DATA_PATH,sequence)
    action = get_action(sequence[:-4])
    cap = cv2.VideoCapture(filepath)

    # Set mediapipe model 
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        
        for frame_num in range(int(cap.get(cv2.CAP_PROP_FRAME_COUNT))):
            
            if os.path.isfile(os.path.join(DATA_PATH, action, str(sequence), str(frame_num)+'.npy')):
                print (f"{sequence}{frame_num} is already processed, skipping")
                continue
            else:
                # Read feed
                ret, frame = cap.read()

                # Make detections
                image, results = mediapipe_detection(frame, holistic)
                            
                #Export keypoints of each frame and saves to folder of the video
                keypoints = extract_keypoints(results)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                if os.path.exists(os.path.join(DATA_PATH, action, str(sequence))):
                    np.save(npy_path, keypoints)
                else:
                    os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
                    np.save(npy_path, keypoints)

    count += 1


    print (f'Video:{str(sequence)} processed. ({count}/{total})')                    
    cap.release()
    cv2.destroyAllWindows()

69207.mp40 is already processed, skipping
69207.mp41 is already processed, skipping
69207.mp42 is already processed, skipping
69207.mp43 is already processed, skipping
69207.mp44 is already processed, skipping
69207.mp45 is already processed, skipping
69207.mp46 is already processed, skipping
69207.mp47 is already processed, skipping
69207.mp48 is already processed, skipping
69207.mp49 is already processed, skipping
69207.mp410 is already processed, skipping
69207.mp411 is already processed, skipping
69207.mp412 is already processed, skipping
69207.mp413 is already processed, skipping
69207.mp414 is already processed, skipping
69207.mp415 is already processed, skipping
69207.mp416 is already processed, skipping
69207.mp417 is already processed, skipping
69207.mp418 is already processed, skipping
69207.mp419 is already processed, skipping
69207.mp420 is already processed, skipping
69207.mp421 is already processed, skipping
69207.mp422 is already processed, skipping
69207.mp423 is alread

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


Video:69207.mp4 processed. (1/2115)
26262.mp40 is already processed, skipping
26262.mp41 is already processed, skipping
26262.mp42 is already processed, skipping
26262.mp43 is already processed, skipping
26262.mp44 is already processed, skipping
26262.mp45 is already processed, skipping
26262.mp46 is already processed, skipping
26262.mp47 is already processed, skipping
26262.mp48 is already processed, skipping
26262.mp49 is already processed, skipping
26262.mp410 is already processed, skipping
26262.mp411 is already processed, skipping
26262.mp412 is already processed, skipping
26262.mp413 is already processed, skipping
26262.mp414 is already processed, skipping
26262.mp415 is already processed, skipping
26262.mp416 is already processed, skipping
26262.mp417 is already processed, skipping
26262.mp418 is already processed, skipping
26262.mp419 is already processed, skipping
26262.mp420 is already processed, skipping
26262.mp421 is already processed, skipping
26262.mp422 is already proce

KeyboardInterrupt: 

In [21]:
#combining all keypoints into lists and consolidating them into csvs

#for each action, loop through all videos in the action folder
#for each frame in video, append keypoints into a list
#return a list (res) for each video together with a label

sequences, labels = [], []
for action in actions:
    if os.path.exists(os.path.join(DATA_PATH,action)):
        for sequence in os.listdir(os.path.join(DATA_PATH, action)):
            if sequence != '.DS_Store':
                window = []
                for frame_num in range(len(os.listdir(os.path.join(DATA_PATH, action, str(sequence))))):
                    res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
                    window.append(res)
                sequences.append(window)
                # labels.append(label_map[action])
                labels.append(action)

#generate index list of labels
unique_label_map = {label:num for num, label in enumerate(set(labels))}
clean_label_no = []
for label in labels:
    clean_label_no.append(unique_label_map[label])
clean_label_no = np.array(clean_label_no)

#sequences is a list of keypoints from videos from KP_data file
#sequence[1.npy,2.npy]
#labels is list of action words from KP_data file

def preserve_pickles(thingtosave,filepath):
    file = open(filepath,'wb')
    pickle.dump(thingtosave,file)
    file.close()

preserve_pickles(sequences,f"{OUTPUT_PATH}/sequences_output")
preserve_pickles(clean_label_no,f"{OUTPUT_PATH}/label_output")
pd.DataFrame([unique_label_map]).to_csv(f"{OUTPUT_PATH}/label_map.csv")
print ('pickles created, check output folder')

pickles created, check output folder


In [7]:
DATA_PATH = os.path.join('data') #folder to contain keypoint numpy arrays (ENTER OWN PATH)
OUTPUT_PATH = os.path.join('output_files')
json_filepath = '/Users/jerremy/WLASL/start_kit/WLASL_v0.3.json' #enter json file path
content = json.load(open(json_filepath))
actions = []
for entry in content:
    gloss = entry['gloss']
    actions.append(gloss)


sequences, labels = [], []
for action in actions:
    if os.path.exists(os.path.join(DATA_PATH,action)):
        for sequence in os.listdir(os.path.join(DATA_PATH, action)):
            if sequence != '.DS_Store':
                window = []
                for frame_num in range(len(os.listdir(os.path.join(DATA_PATH, action, str(sequence))))):
                    res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
                    window.append(res)
                sequences.append(window)
                # labels.append(label_map[action])
                labels.append(action)

#generate index list of labels
unique_label_map = {label:num for num, label in enumerate(set(labels))}
clean_label_no = []
for label in labels:
    clean_label_no.append(unique_label_map[label])
clean_label_no = np.array(clean_label_no)

#sequences is a list of keypoints from videos from KP_data file
#sequence[1.npy,2.npy]
#labels is list of action words from KP_data file

def preserve_pickles(thingtosave,filepath):
    file = open(filepath,'wb')
    pickle.dump(thingtosave,file)
    file.close()

preserve_pickles(sequences,f"{OUTPUT_PATH}/sequences_output")
preserve_pickles(clean_label_no,f"{OUTPUT_PATH}/label_output")
pd.DataFrame([unique_label_map]).to_csv(f"{OUTPUT_PATH}/label_map.csv")
print ('pickles created, check output folder')

pickles created, check output folder


File structure
KP_data
    action
        sequence
            contains .npy files of each frame
    sequences_output.csv
    label_output.csv

Required inputs
    WLASL_v0.3.json file
    raw_data file

output will be 2 files with list of sequences and list of labels