In [1]:
import json
import os
import shutil

import numpy as np
import pandas as pd

# Load Data

In [3]:
main_path = "wlasl-processed/"
wlasl_df = pd.read_json(main_path + "WLASL_v0.3.json")

In [4]:
wlasl_df.head()

Unnamed: 0,gloss,instances
0,book,"[{'bbox': [385, 37, 885, 720], 'fps': 25, 'fra..."
1,drink,"[{'bbox': [551, 68, 1350, 1080], 'fps': 25, 'f..."
2,computer,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_..."
3,before,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_..."
4,chair,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_..."


In [5]:
wlasl_df.shape

(2000, 2)

# Extract Available Video List from The Dataset

In [6]:
def get_videos_ids(json_list):
    """
    check if the video id is available in the dataset
    and return the viedos ids of the current instance
    
    Args:
        json_list: Instance of video metadata.
        
    Returns:
        List of video ids.
    """
    video_ids = []
    for ins in json_list:
        video_id = ins['video_id']
        if os.path.exists(f'{main_path}videos/{video_id}.mp4'):
            video_ids.append(video_id)
    return video_ids

In [7]:
with open(main_path+'WLASL_v0.3.json', 'r') as data_file:
    json_data = data_file.read()

instance_json = json.loads(json_data)

In [8]:
get_videos_ids(instance_json[0]['instances'])

['69241', '07069', '07068', '07070', '07099', '07074']

In [9]:
len(get_videos_ids(instance_json[0]['instances']))

6

In [10]:
wlasl_df["video_ids"] = wlasl_df["instances"].apply(get_videos_ids)

In [19]:
features_df = pd.DataFrame(columns=['gloss', 'video_id'])
for row in wlasl_df.iterrows():
    ids = get_videos_ids(row[1][1])
    word = [row[1][0]] * len(ids)
    df = pd.DataFrame(list(zip(word, ids)), columns=features_df.columns)
    features_df = pd.concat([features_df,df])

  ids = get_videos_ids(row[1][1])
  word = [row[1][0]] * len(ids)


In [20]:
features_df

Unnamed: 0,gloss,video_id
0,book,69241
1,book,07069
2,book,07068
3,book,07070
4,book,07099
...,...,...
2,wheelchair,63047
3,wheelchair,63050
0,whistle,63186
1,whistle,63188


# Move Videos to Sub-directory named labels

In [21]:
def move_videos_to_subdir(dataframe):
    for label in dataframe["gloss"].unique():
        dst_path = f'videos/{label}'
        os.makedirs(dst_path, exist_ok=True)
        
        for video in dataframe.loc[dataframe["gloss"] == label]["video_id"]:
            src = f'{main_path}videos/{video}.mp4'
            dst = dst_path + f'/{video}.mp4'
            shutil.copyfile(src, dst)


move_videos_to_subdir(features_df)

In [13]:
os.listdir('videos/about/')

['00414.mp4',
 '00426.mp4',
 '00421.mp4',
 '00416.mp4',
 '65003.mp4',
 '65002.mp4',
 '00415.mp4']