In [None]:
""" 
Video player of the EgoAdapt samples (2.1s fragments) in the user video streams.
Per sample the action label and meta-data is displayed.

Before running this experiment, make sure you have run the Stream meta-data collector (src/continual_ego4d/processing/run_summarize_user_streams.py) and set the resulting paths in this notebook.
"""
import json
import os.path as osp
import pickle

import cv2
import pandas as pd
from IPython.core.interactiveshell import InteractiveShell
from IPython.display import display, Image, clear_output
from tqdm import tqdm

InteractiveShell.ast_node_interactivity = "all"
pd.set_option('display.max_rows', 1000)

In [None]:
"""
CONFIG: Add your config params here
"""
TRAIN_MODE = 'train'

if TRAIN_MODE == 'train': # Path obtained by Stream meta-data collector (src/continual_ego4d/processing/run_summarize_user_streams.py)
    train_usersummary_file_nooverlap_include_videpaths = "/your/path/to/logs/2022-09-06_18-43-43_UIDc4605fd4-8f70-4fcd-bee2-e16b5dd22820/dataset_entries_train_ego4d_LTA_train_usersplit_10users.ckpt"
    FILE_TO_ANALYZE = train_usersummary_file_nooverlap_include_videpaths

elif TRAIN_MODE == 'test': # Path obtained by Stream meta-data collector (src/continual_ego4d/processing/run_summarize_user_streams.py)
    test_usersummary_file_nooverlap_include_videpaths="/your/path/to/logs/2022-10-07_04-33-34_UIDd679068a-dc6e-40ff-b146-70ffe0671a97/dataset_entries_test_ego4d_LTA_test_usersplit_40users.ckpt"
    FILE_TO_ANALYZE = test_usersummary_file_nooverlap_include_videpaths

elif TRAIN_MODE == 'pretrain': # Path to direct JSON from pretraining
    pretrain_unsegmented_json = '/your/path/to/2022-09-08_17-17-16_ego4d_LTA_usersplit/ego4d_LTA_pretrain_incl_nanusers_usersplit_148users.json'
    FILE_TO_ANALYZE = pretrain_unsegmented_json
    video_parent_path ="../data/Ego4D/v1/clips" # Parent dir of videos

# Train/test are segmented in 2s clips for online learning
if TRAIN_MODE in ['train','test']: # Pickle
    with open(FILE_TO_ANALYZE, 'rb') as f:
        ds = pickle.load(f)
    
    CUSTOM_VIDEO_PLAY_LENGTH_SEC = None # Play the start-end 2s
    clip_start_key = 'clip_start_sec'
    clip_end_key = 'clip_end_sec'
    user_key = 'user_id'
    scenarios_key = 'parent_video_scenarios'

    def video_path_fetch_fn(entry):
        return entry['video_path']

# Pretrain uses original ego4d action-annotations
else: # JSON
    with open(pretrain_unsegmented_json, 'r') as f:
        ds = json.load(f)['users']
    
    CUSTOM_VIDEO_PLAY_LENGTH_SEC = None
    clip_start_key = 'action_clip_start_sec'
    clip_end_key = 'action_clip_end_sec'
    user_key = 'fb_participant_id'
    scenarios_key = 'parent_video_scenarios'
    
    def video_path_fetch_fn(entry):
        return osp.join(video_parent_path,f'{entry["clip_uid"]}.mp4')


In [None]:
"""
List all users available
"""
all_users = list(ds.keys())
all_users

In [None]:
""" Select a user. """
SELECTED_USER = '16'

"""Single entry looks like:"""
ds[SELECTED_USER][0]

In [None]:
""" Run the video player """

def draw_label(img, text, pos, bg_color):
    font_face = cv2.FONT_HERSHEY_SIMPLEX
    scale = 0.8
    color = (255, 0, 0)
    thickness = cv2.FILLED
    margin = 2
    txt_size = cv2.getTextSize(text, font_face, scale, thickness)

    end_x = pos[0] + txt_size[0][0] + margin
    end_y = pos[1] - txt_size[0][1] - margin

    cv2.rectangle(img, pos, (end_x, end_y), bg_color, thickness)
    cv2.putText(img, text, pos, font_face, scale, color, 1, cv2.LINE_AA)


user_sequence = ds[SELECTED_USER]
pbar = tqdm(total=len(user_sequence))

initial = True
entry_idx = 0
cmd = None


while cmd != 'q':
    
    if initial:
        cmd='r' # Replay first instance
        initial=False
    else:
        cmd = input()
    
    try:
        cmd = int(cmd)
        entry_idx = cmd
        print(f"Playing from annotation number entry idx: {entry_idx}")
    except:
        if cmd == 'n':
            entry_idx+=1
            print(f'Playing next video:{entry_idx}')
        elif cmd == 'p':
            entry_idx-=1
            print(f'Playing previous video:{entry_idx}')
        elif cmd == 'r':
            print(f'Replaying video {entry_idx}')
        elif cmd == 'q':
            print('Quiting video watcher')
            break
        else:
            raise ValueError('cmd not recognized')
    
    # Overwrite previous output of video and label
    pbar.reset()
    clear_output(wait=True)
    
    entry = user_sequence[entry_idx]
    video_path = video_path_fetch_fn(entry)
    clip_start_msec = entry[clip_start_key] * 1000
    if CUSTOM_VIDEO_PLAY_LENGTH_SEC is None:
        clip_end_msec = entry[clip_end_key] * 1000
    else:
        clip_end_msec = clip_start_msec + CUSTOM_VIDEO_PLAY_LENGTH_SEC * 1000
    print(f"Fetching video: {video_path}")
    
    info_str = "(entry {}) USER {}: {:.1f}s-{:.1f}s, action_idx={}".format(
        entry_idx,
        entry[user_key],
        entry[clip_start_key],
        clip_end_msec/1000,
        entry['action_idx'],

    )
    print(info_str)
    print("LABEL\t{}-{}".format(entry['verb'],entry['noun'],))
    print("SCENARIO\t{}".format(entry[scenarios_key], ))
    print(f"\nuser{entry[user_key]}_t{entry_idx}_{entry['verb']}_{entry['noun']}")
    pbar.update(entry_idx)
    pbar.refresh()
    
    # Video
    # See API to set video start/end times: https://docs.opencv.org/3.4/d4/d15/group__videoio__flags__base.html#gaeb8dd9c89c10a5c63c139bf7c4f5704d
    video = cv2.VideoCapture(video_path)
    video.set(cv2.CAP_PROP_POS_MSEC, clip_start_msec)
    display_handle=display(None, display_id=True)
    
    try:
        while video.get(cv2.CAP_PROP_POS_MSEC) <= clip_end_msec:
            _, frame = video.read()
            _, frame = cv2.imencode('.jpeg', frame)
    
            draw_label(frame,"HELLOW", (50,50),(255,0,0))
            display_handle.update(Image(data=frame.tobytes()))
    except KeyboardInterrupt:
        break
    finally:
        video.release()

display_handle.update(None)
pbar.close()
