In [1]:
import mediapipe as mp
import numpy as np
import pandas as pd
import json
import cv2

In [3]:
with open('./wlasl_gloss.json') as f:
    wlasl_gloss = json.load(f)

In [7]:
train_df = pd.read_csv("./train.csv")
vocab = list(train_df['sign'].unique())

In [8]:
vocab

['blow',
 'wait',
 'cloud',
 'bird',
 'owie',
 'duck',
 'minemy',
 'lips',
 'flower',
 'time',
 'vacuum',
 'apple',
 'puzzle',
 'mitten',
 'there',
 'dry',
 'shirt',
 'owl',
 'yellow',
 'not',
 'zipper',
 'clean',
 'closet',
 'quiet',
 'have',
 'brother',
 'clown',
 'cheek',
 'cute',
 'store',
 'shoe',
 'wet',
 'see',
 'empty',
 'fall',
 'balloon',
 'frenchfries',
 'finger',
 'same',
 'cry',
 'hungry',
 'orange',
 'milk',
 'go',
 'drawer',
 'TV',
 'another',
 'giraffe',
 'wake',
 'bee',
 'bad',
 'can',
 'say',
 'callonphone',
 'finish',
 'old',
 'backyard',
 'sick',
 'look',
 'that',
 'black',
 'yourself',
 'open',
 'alligator',
 'moon',
 'find',
 'pizza',
 'shhh',
 'fast',
 'jacket',
 'scissors',
 'now',
 'man',
 'sticky',
 'jump',
 'sleep',
 'sun',
 'first',
 'grass',
 'uncle',
 'fish',
 'cowboy',
 'snow',
 'dryer',
 'green',
 'bug',
 'nap',
 'feet',
 'yucky',
 'morning',
 'sad',
 'face',
 'penny',
 'gift',
 'night',
 'hair',
 'who',
 'think',
 'brown',
 'mad',
 'bed',
 'drink',
 'st

In [9]:
wlasl_data = {}
for item in wlasl_gloss:
    for k,v in item.items():
        if k in vocab:
            wlasl_data[k] = v

In [15]:
len(wlasl_data)

200

In [20]:
video_ids = []

for k,v in wlasl_data.items():
    for i in v:
        video_ids.append(i)

In [23]:
video_ids

['69241',
 '65225',
 '68011',
 '68208',
 '68012',
 '70212',
 '70266',
 '07085',
 '07086',
 '07087',
 '07069',
 '07088',
 '07089',
 '07090',
 '07091',
 '07092',
 '07093',
 '07068',
 '07094',
 '07095',
 '07096',
 '07097',
 '07070',
 '07098',
 '07099',
 '07071',
 '07072',
 '07073',
 '67424',
 '07074',
 '07075',
 '07076',
 '07077',
 '07078',
 '07079',
 '07080',
 '07081',
 '07082',
 '07083',
 '07084',
 '69302',
 '65539',
 '70173',
 '68538',
 '68042',
 '68660',
 '68041',
 '17725',
 '17726',
 '17727',
 '17728',
 '17710',
 '17729',
 '17730',
 '17731',
 '17732',
 '17733',
 '65540',
 '17734',
 '17711',
 '17712',
 '17713',
 '17714',
 '17715',
 '17716',
 '17717',
 '17718',
 '17709',
 '67594',
 '17719',
 '17720',
 '17721',
 '17722',
 '17723',
 '17724',
 '05724',
 '70348',
 '68007',
 '05744',
 '05746',
 '05728',
 '05747',
 '05748',
 '05749',
 '05750',
 '05729',
 '05730',
 '65167',
 '05731',
 '05732',
 '05733',
 '05734',
 '05735',
 '05736',
 '05727',
 '05737',
 '05739',
 '05740',
 '05741',
 '05742',


In [24]:
def transform_video(video_id):
    video_file = f'./videos/{video_id}.mp4'
    cap = cv2.VideoCapture(video_file)
    mp_holistic = mp.solutions.holistic
    holistic = mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.1)

    video_df = []
    frame_no=0
    while cap.isOpened():
        print('\r',frame_no,end='')
        success, image = cap.read()

        if not success: break
        image = cv2.resize(image, dsize=None, fx=4, fy=4)
        height,width,_ = image.shape

        #print(image.shape)
        image.flags.writeable = False
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        result = holistic.process(image)

        #---
        data = [] 
        fy = height/width

        # -----------------------------------------------------
        if result.face_landmarks is None:
            for i in range(468): #
                data.append({
                    'type' : 'face',
                    'landmark_index' : i,
                    'x' : np.nan,
                    'y' : np.nan,
                    'z' : np.nan,
                })
        else:
            assert(len(result.face_landmarks.landmark)==468)
            for i in range(468): #
                xyz = result.face_landmarks.landmark[i]
                data.append({
                    'type' : 'face',
                    'landmark_index' : i,
                    'x' : xyz.x,
                    'y' : xyz.y *fy,
                    'z' : xyz.z,
                })

        # -----------------------------------------------------
        if result.left_hand_landmarks is None:
            for i in range(21):  #
                data.append({
                    'type': 'left_hand',
                    'landmark_index': i,
                    'x': np.nan,
                    'y': np.nan,
                    'z': np.nan,
                })
        else:
            assert (len(result.left_hand_landmarks.landmark) == 21)
            for i in range(21):  #
                xyz = result.left_hand_landmarks.landmark[i]
                data.append({
                    'type': 'left_hand',
                    'landmark_index': i,
                    'x': xyz.x,
                    'y': xyz.y *fy,
                    'z': xyz.z,
                })

        # -----------------------------------------------------
        #if result.pose_world_landmarks is None:
        if result.pose_landmarks is None:
            for i in range(33):  #
                data.append({
                    'type': 'pose',
                    'landmark_index': i,
                    'x': np.nan,
                    'y': np.nan,
                    'z': np.nan,
                })
        else:
            assert (len(result.pose_landmarks.landmark) == 33)
            for i in range(33):  #
                xyz = result.pose_landmarks.landmark[i]
                data.append({
                    'type': 'pose',
                    'landmark_index': i,
                    'x': xyz.x,
                    'y': xyz.y *fy,
                    'z': xyz.z,
                })

        # -----------------------------------------------------
        if result.right_hand_landmarks is None:
            for i in range(21):  #
                data.append({
                    'type': 'right_hand',
                    'landmark_index': i,
                    'x': np.nan,
                    'y': np.nan,
                    'z': np.nan,
                })
        else:
            assert (len(result.right_hand_landmarks.landmark) == 21)
            for i in range(21):  #
                xyz = result.right_hand_landmarks.landmark[i]
                data.append({
                    'type': 'right_hand',
                    'landmark_index': i,
                    'x': xyz.x,
                    'y': xyz.y *fy,
                    'z': xyz.z,
                })
            zz=0

        frame_df = pd.DataFrame(data)
        frame_df.loc[:,'frame'] =  frame_no
        frame_df.loc[:, 'height'] = height/width
        frame_df.loc[:, 'width'] = width/width
        video_df.append(frame_df)


        #=========================
        frame_no +=1

    video_df = pd.concat(video_df)
    #print(video_df)
    holistic.close()
    video_df.to_csv(f'./wlasl_csv/{video_id}.csv')

In [25]:
for video_id in video_ids:
    transform_video(video_id)

 0

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


 718