In [14]:
from torch.utils.data import Dataset
import cv2
import numpy as np
import torch
import os
import json
import pandas as pd 
from pytube import YouTube

In [45]:
def open_json_file(fname):
    with open(fname, 'r') as f:
        return json.load(f)

In [46]:
df_train = pd.read_json('datasets/MS-ASL/MSASL_train.json')
df_val = pd.read_json('datasets/MS-ASL/MSASL_val.json')
df_test = pd.read_json('datasets/MS-ASL/MSASL_test.json')


train_hmap = open_json_file('datasets/MS-ASL/hmap_vids.json')
# val_hmap = open_json_file('datasets/MS-ASL/hmap_vids_val.json')
# test_hmap = open_json_file('datasets/MS-ASL/hmap_vids_test.json')

In [47]:
top_10_signs = train_hmap.keys()

df_train = df_train[df_train.clean_text.isin(top_10_signs)]
df_val = df_val[df_val.clean_text.isin(top_10_signs)]
df_val = df_test[df_test.clean_text.isin(top_10_signs)]

In [48]:
df_train['vid'] = df_train.url.apply(lambda x: YouTube(x).video_id)
df_val['vid'] = df_val.url.apply(lambda x: YouTube(x).video_id)
df_test['vid'] = df_test.url.apply(lambda x: YouTube(x).video_id)

### merge_path expects a df column with the format 'sign+video_id'
merge_paths = lambda x: os.path.join('datasets','MS-ASL','trimmed_videos', x.split('+')[0], x.split('+')[1] + '.mp4')



df_train['fpath'] = df_train.clean_text + '+' + df_train.vid 
df_val['fpath']   = df_val.clean_text   + '+' + df_val.vid 
df_test['fpath']  = df_test.clean_text  + '+' + df_test.vid 

df_train['fpath'] = df_train.fpath.apply(merge_paths)
df_val['fpath']   = df_val.fpath.apply(merge_paths)
df_test['fpath']  = df_test.fpath.apply(merge_paths)

df_train.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_val['vid'] = df_val.url.apply(lambda x: YouTube(x).video_id)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_val['fpath']   = df_val.clean_text   + '+' + df_val.vid
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_val['fpath']   = df_val.fpath.apply(merge_paths)


Unnamed: 0,org_text,clean_text,start_time,signer_id,signer,start,end,file,label,height,fps,end_time,url,text,box,width,review,vid,fpath
15,like,like,0.0,269,53,0,52,SignSchool really like,6,360,29.97,1.735,www.youtube.com/watch?v=7y5Ye-2-ZBs,like,"[0.040461480617523006, 0.335311889648437, 0.998772382736206, 0.886972963809967]",640,,7y5Ye-2-ZBs,datasets\MS-ASL\trimmed_videos\like\7y5Ye-2-ZBs.mp4
44,Want (mouth “WA“),want,385.765,6,-1,9594,9751,LASL - Ch 6 Vocab,8,358,24.87,392.077,https://www.youtube.com/watch?v=jQb9NL9_S6U,want,"[0.008866041898727, 0.11897420883178701, 0.843058705329895, 0.7124172449111931]",640,,jQb9NL9_S6U,datasets\MS-ASL\trimmed_videos\want\jQb9NL9_S6U.mp4
56,teacher,teacher,13.995,144,-1,419,464,teacher - ASL sign for teacher,2,360,29.94,15.498,https://www.youtube.com/watch?v=_HOx2QkkTsg,teacher,"[0.045998364686965006, 0.037116646766662, 1.0, 0.9313695430755611]",480,1.0,_HOx2QkkTsg,datasets\MS-ASL\trimmed_videos\teacher\_HOx2QkkTsg.mp4
57,teacher,teacher,15.498,144,-1,464,510,teacher - ASL sign for teacher,2,360,29.94,17.034,https://www.youtube.com/watch?v=_HOx2QkkTsg,teacher,"[0.045998364686965006, 0.037116646766662, 1.0, 0.9313695430755611]",480,1.0,_HOx2QkkTsg,datasets\MS-ASL\trimmed_videos\teacher\_HOx2QkkTsg.mp4
72,EAT,eat,12.913,8,20,387,452,Basic ASL Vocabulary for Babies,3,360,29.97,15.082,https://www.youtube.com/watch?v=htsdwxJ-fTo,eat,"[0.09897658228874201, 0.294420778751373, 1.0, 0.7674044370651241]",640,,htsdwxJ-fTo,datasets\MS-ASL\trimmed_videos\eat\htsdwxJ-fTo.mp4


In [52]:
class MSASLVideoDataset(Dataset):
    def __init__(self, video_paths, labels, num_frames=32, img_size=224, transforms=None):
        self.video_paths = video_paths
        self.labels = labels
        self.num_frames = num_frames
        self.img_size = img_size
        self.transforms = transforms

    def __len__(self):
        return len(self.video_paths)

    def __getitem__(self, idx):
        video_path = self.video_paths[idx]
        label = self.labels[idx]

        frames = self.load_video(video_path)
        if self.transforms:
            frames = self.transforms(frames)

        return frames, label

    def load_video(self, path):
        frames = read_frames(path, self.img_size)
        # Uniformly sample self.num_frames frames
        total_frames = frames.shape[0]
        if total_frames >= self.num_frames:
            idxs = np.linspace(0, total_frames-1, self.num_frames).astype(int)
            frames = frames[idxs]
        else:
            # pad by repeating last frame
            pad_len = self.num_frames - total_frames
            pad_frames = np.repeat(frames[-1:], pad_len, axis=0)
            frames = np.concatenate((frames, pad_frames), axis=0)

        print(frames.shape)
        frames = frames.transpose(0, 3, 1, 2)  # (Frames, Channels, Height, Width)
        frames = torch.from_numpy(frames).float() / 255.0  # normalize 0-1
        return frames
    
def read_frames(path, img_size):
    # path = os.path.join(path)
    cap = cv2.VideoCapture(path)
    frames = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = cv2.resize(frame, (img_size, img_size))
        frames.append(frame)

    cap.release()

    frames = np.array(frames)
    return frames


In [50]:
from fastai.data.core import DataLoaders

train_ds = MSASLVideoDataset(df_train.fpath.to_list(), df_train.clean_text.to_list(), num_frames=32, img_size=224)
valid_ds = MSASLVideoDataset(df_val.fpath.to_list(), df_val.clean_text.to_list(), num_frames=32, img_size=224)
# dls = DataLoaders.from_dsets(train_ds, valid_ds, bs=8, shuffle=True, num_workers=4)

In [43]:
t = os.path.join('datasets','MS-ASL','trimmed_videos','fish','1E8k8gI_xYk_119.mp4')
t = 'datasets\MS-ASL\trimmed_videos\fish\1E8k8gI_xYk_119.mp4'
read_frames(t, 224)

array([], dtype=float64)

In [55]:
read_frames(train_ds.video_paths[0], 224)

array([], dtype=float64)

In [62]:
os.path.exists(train_ds.video_paths[0])

False