In [1]:
import glob
import json
import os
import pickle

from detector import Detector
from parser import parse

In [2]:
def read_json(path='output.json'):
    """
    Check for valid JSON format and read content
    path: path to JSON file
    """
    file = open(path)
    line = file.read().replace('\n', ' ')
    file.close()
    try:
        parsed_json = json.loads(line)
    except:
        assert False, 'Invalid JSON'
    return parsed_json

def get_vid_ext(vid_id, video_dir):
    """
    Returns video file extension
    vid_id: video id
    video_dir: directory path to video files
    """
    vid_prefix = os.path.join(video_dir, vid_id)
    if os.path.exists(vid_prefix+'.mp4'):
        return '.mp4'
    elif os.path.exists(vid_prefix+'.mkv'):
        return '.mkv'
    elif os.path.exists(vid_prefix+'.webm'):
        return '.webm'
    

In [3]:
def download_video(vid_id, video_dir):
    """
    Download video
    vid_id: video id
    video_dir: directory path to video files
    """
    # download the video
    vid_url = 'www.youtube.com/watch?v='+vid_id
    vid_prefix = os.path.join(video_dir, vid_id) 
    os.system(' '.join(("youtube-dl -o", vid_prefix, vid_url)))


def sample_frames(vid_id, video_dir, frame_dir, fps=5):
    """
    Sample video into frames at fixed fps
    vid_id: video id
    video_dir: directory path to video files
    frame_dir: directory path to video frames
    fps: fps for frame extraction
    """
    if not os.path.isdir(os.path.join(frame_dir, vid_id)):
        os.mkdir(os.path.join(frame_dir, vid_id))
    vid_ext = get_vid_ext(vid_id, video_dir)
    ff_command = 'ffmpeg -i {}/{}{} -y -an -qscale 0 -vf fps={} {}/{}/%06d.jpg'.format(video_dir, vid_id, vid_ext, fps, frame_dir, vid_id)
    os.system(ff_command)


def remove_video(vid_id, video_dir):
    """
    Delete video
    vid_id: video id
    video_dir: directory path to video files
    """
    vid_prefix = os.path.join(video_dir, vid_id)
    vid_ext = get_vid_ext(vid_id, video_dir)
    os.remove(vid_prefix+vid_ext)


def select_frames(actions, vid_id, num_frames_per_step):
    """
    Returns representative frames for actions
    actions: list of action annotations from YCII annotations
    vid_id: video id
    num_frames_per_step: number of frames per action step
    Returns required_frames: set contataining names of representative frames
    """
    required_frames = set()
    for action in actions:
        action_start = action['segment'][0]
        action_end = action['segment'][1]
        action_delta = (action_end - action_start) / (num_frames_per_step + 1)    # need num_frames_per_step+1 intervals for num_frames_per_step inner frames
        for i in range(num_frames_per_step):
            frame_time = action_start + action_delta * (i+1)    # in seconds
            frame_id = int( frame_time*(num_frames_per_step + 1) )
            frame_name = '{}.jpg'.format(str(frame_id).zfill(6))
            required_frames.add(frame_name)
    return required_frames


def remove_frames(vid_id, frame_dir, required_frames):
    """
    Remove unused frames
    vid_id: video id
    frame_dir: directory path to video frames
    required_frames: set contataining names of representative frames
    """
    if os.path.isdir(os.path.join(frame_dir, vid_id)):
        curr_frames = os.listdir(os.path.join(frame_dir, vid_id))
        for frame in curr_frames:
            if frame not in required_frames:
                os.remove(os.path.join(frame_dir, vid_id, frame))


def get_actions(actions):
    """
    Returns list of actions text for video
    actions: list of action annotations from YCII annotations
    Returns actions_text: list of actions text for video
    """
    actions_text = []
    for action in actions:
        actions_text.append(action['sentence'])
    return actions_text


def pickle_data(data, pickles_dir, vid_id, fname):
    """
    Pickle data into bytestreams
    data: data to be pickled
    pickles_dir: directory path to pickled data
    vid_id: video id
    fname: name of pickled file
    """
    if not os.path.isdir(os.path.join(pickles_dir, vid_id)):
        os.mkdir(os.path.join(pickles_dir, vid_id))
    pickle_out = open(os.path.join(pickles_dir, vid_id, fname+'.pickle'), 'wb')
    pickle.dump(data, pickle_out)
    pickle_out.close()


def depickle_data(pickles_dir, vid_id, fname):
    """
    Depickle data from bytestreams
    pickles_dir: directory path to pickled data
    vid_id: video id
    fname: name of pickled file
    """
    pickle_path = os.path.join(pickles_dir, vid_id, fname+'.pickle')
    if os.path.exists(pickle_path):
        pickle_in = open(pickle_path, 'rb')
        candidates = pickle.load(pickle_in)
        return candidates
    return []



In [15]:
def prepare_dataset(dataset_root='/h/mkhan/ece496-capstone/datasets', num_frames_per_step=4):
    """
    Download and prepare dataset files
    dataset_root: directory path to dataset base
    num_frames_per_step: number of frames per action step
    """

    annotations = read_json(os.path.join(dataset_root, 'annotations', 'ycii_annotations_trainval.json'))['database']
    
    videos_root = os.path.join(dataset_root, 'ycii_videos')
    if not os.path.isdir(videos_root):
        os.mkdir(videos_root)
    frames_root = os.path.join(dataset_root, 'ycii_frames')
    if not os.path.isdir(frames_root):
        os.mkdir(frames_root)
    pickles_root = os.path.join(dataset_root, 'ycii_pickles')
    if not os.path.isdir(pickles_root):
        os.mkdir(pickles_root)

    missing_vid_list = []

    detector = Detector()
    
    with open(os.path.join(dataset_root, 'vid_list', 'vid_list_ycii_val_short.txt')) as f:
        lines = f.readlines()
        for line in lines:
            rcp_type,vid_id = line.replace('\n','').split('/')
            print('[INFO] Processing video {}'.format(vid_id))
            
            # download the video
#             download_video(vid_id, videos_root)
            vid_url = 'www.youtube.com/watch?v='+vid_id
            vid_prefix = os.path.join(videos_root, vid_id) 
            os.system(' '.join(("youtube-dl -o", vid_prefix, vid_url)))

            # check if the video is available
            if os.path.exists(vid_prefix+'.mp4') or os.path.exists(vid_prefix+'.mkv') or os.path.exists(vid_prefix+'.webm'):
                print('[INFO] Downloaded video {}'.format(vid_id))
            else:
                missing_vid_list.append(line)
                print('[INFO] Cannot download video {}'.format(vid_id))
                continue

            # sample frames at fixed fps
            sample_frames(vid_id, videos_root, frames_root, fps=5)
            print('[INFO] Sampled frames for video {}'.format(vid_id))

            # remove sampled video file (optional)
            remove_video(vid_id, videos_root)
            print('[INFO] Removed video {}'.format(vid_id))
            
            # select representative frames for actions
            actions = annotations[vid_id]['annotations']
            selected_frames = select_frames(actions, vid_id, num_frames_per_step)
            print('[INFO] Selected frames for video {}'.format(vid_id))

            # remove unsued frames
            remove_frames(vid_id, frames_root, selected_frames)
            print('[INFO] Removed unused frames for video {}'.format(vid_id))

            # get candidates for images
            frames = sorted(glob.glob(os.path.join(frames_root, vid_id, '*.*')))
            candidates = [detector.inference(frame, max_detections=5) for frame in frames]
            print('[INFO] Extracted candidates for video {}'.format(vid_id))

            # save pickeled files for candidates
            pickle_data(candidates, pickles_root, vid_id, 'candidates')
            print('[INFO] Saved candidates for video {}'.format(vid_id))
            
            # get annotations list
            actions_list = get_actions(actions)
            print('[INFO] Extracted actions for video {}'.format(vid_id))
            
            # save pickled files for annotations list
            pickle_data(actions_list, pickles_root, vid_id, 'actions')
            print('[INFO] Saved candidates for video {}'.format(vid_id))


    # write the missing videos to file
    missing_vid = open(os.path.join(dataset_root, 'vid_list', 'missing_videos.txt'), 'w')
    for line in missing_vid_list:
        missing_vid.write(line)

    # sanitize and remove the intermediate files
    # os.system("find {} -name '*.part*' -delete".format(dataset_root))
    os.system("find {} -name '*.f*' -delete".format(dataset_root))

In [13]:
def load_dataset(dataset_root='/h/mkhan/ece496-capstone/datasets'):
    """
    Load dataset values from saved files
    dataset_root: directory path to dataset base
    """
    pickles_root = os.path.join(dataset_root, 'ycii_pickles')
    
    all_candidates = []
    all_actions = []
    with open(os.path.join(dataset_root, 'vid_list', 'vid_list_ycii_val_short.txt')) as f:
        lines = f.readlines()
        for line in lines:
            rcp_type,vid_id = line.replace('\n','').split('/')
            print('[INFO] Loading data for video {}'.format(vid_id))
            
            # load candidates data
            candidates = depickle_data(pickles_root, vid_id, 'candidates')
            if candidates:
                all_candidates.extend(candidates)
                print('[INFO] Loaded candidates for video {}'.format(vid_id))
            else:
                print('[INFO] Cannot load candidates for video {}'.format(vid_id))

            # load actions data
            actions = depickle_data(pickles_root, vid_id, 'actions')
            if actions:
                all_actions.append(actions)
                print('[INFO] Loaded actions for video {}'.format(vid_id))
            else:
                print('[INFO] Cannot load actions for video {}'.format(vid_id))

    return all_candidates, all_actions

In [16]:
# USAGE: Run this just once to prepare and save data on disk
prepare_dataset(dataset_root='/h/mkhan/ece496-capstone/datasets', num_frames_per_step=1)

loading configuration file cache
loading weights file https://cdn.huggingface.co/unc-nlp/frcnn-vg-finetuned/pytorch_model.bin from cache at /h/mkhan/.cache/torch/transformers/57f6df6abe353be2773f2700159c65615babf39ab5b48114d2b49267672ae10f.77b59256a4cf8343ae0f923246a81489fc8d82f98d082edc2d2037c977c0d9d0
All model checkpoint weights were used when initializing GeneralizedRCNN.

All the weights of GeneralizedRCNN were initialized from the model checkpoint at unc-nlp/frcnn-vg-finetuned.
If your task is similar to the task the model of the checkpoint was trained on, you can already use GeneralizedRCNN for predictions without further training.
[INFO] Processing video fn9anlEL4FI
[INFO] Extracted actions for video fn9anlEL4FI
[INFO] Saved candidates for video fn9anlEL4FI


In [17]:
# USAGE: Run this to load all candidate and actions data from disk
# TODO: Need to pass all_actions through parser.parse and tokenize text data
all_candidates, all_actions = load_dataset(dataset_root='/h/mkhan/ece496-capstone/datasets')

[INFO] Loading data for video fn9anlEL4FI
[INFO] Loaded candidates for video fn9anlEL4FI
[INFO] Loaded actions for video fn9anlEL4FI


In [None]:
# NOTE: Don't execute the cells below, they are rough code for testing out stuff

In [None]:
annotations = read_json(os.path.join('/h/mkhan/ece496-capstone/datasets', 'annotations', 'ycii_annotations_trainval.json'))['database']

In [None]:
actions = annotations['fn9anlEL4FI']['annotations']

In [None]:
print(actions)

In [None]:
i = 0
for item in annotations:
    if annotations[item]['subset']=='validation':
        print(item)
#         print(annotations[item])
        print(annotations[item]['duration'])
#         print(annotations[item]['annotations'])
        segments = annotations[item]['annotations']
        for segment in segments:
            print(segment)
#             start = segment['segment'][0]
            end = segment['segment'][1]
#             print(str(start) + " " + str(end))
        
        print(end)

        i += 1
        if i==3:
            break