# Ego4D Annotation Verification Notebook
## Hello! 👋 
Thanks for helping us catch inconsistencies in Ego4D data! This notebook will let you browse annotations on video frames and label them. It's also a great way to sample our dataset.

## Prerequisites
1. Use the [Ego4D CLI](https://ego4d-data.org/docs/start-here/) to download the annotations and full_scale datasets (you don't need to download any videos)
1. Run the downloader with the manifest-only option: `python3 -m ego4d.cli.cli --output_directory ~/ego4d_data/ --manifest --yes`
2. Install all the packages in this notebook using `requirements.txt`. iPyWidgets has some nuances, if the widgets at the bottom don't show - follow its [install instructions](https://ipywidgets.readthedocs.io/en/latest/user_install.html).
3. Ensure your AWS CLI is authenticated. Follow the [CLI Readme](https://github.com/facebookresearch/Ego4d/blob/main/ego4d/cli/README.md) if it's not.

## Process
1. Restart the kernel and run all cells in this notebook.
1. Scroll to the bottom, input a userid, and begin!
1. For each video, mark its annotations 'correct' **or** 'incorrect'. If incorrect, explain why in the notes.
1. If you hit errors loading a clip or something doesn't make sense, skip the frame or video.
1. Download the csv and send it to a member of the Meta Ego4D team on Slack!

# Setup

In [1]:
import os
# Set your options here
# Sampled Videos will be downloaded to <version>/full_scale/ if they aren't already there

CLI_OUTPUT_DIR = "/Users/<userid>/ego4d" # Replace with the full path to the --output_directory you pass to the cli
VERSION = "v1"
MANIFEST_PATH = os.path.join(CLI_OUTPUT_DIR, VERSION, 'manifest.csv') # Use this if manifest is at <version>/manifest.csv
# MANIFEST_PATH = os.path.join(CLI_OUTPUT_DIR, VERSION, 'full_scale' ,'manifest.csv') # Use this if manifest is at <version>/full_scale/

assert os.path.exists(MANIFEST_PATH), f"Manifest doesn't exist at {MANIFEST_PATH}. Is the CLI_OUTPUT_DIR right? Do you satisfy the pre-requisites?"

In [2]:
# Setup
import sys
if os.path.abspath(".") not in sys.path: # Allow us to use util files in the same dir
    sys.path.insert(0, os.path.abspath("."))

import av
import base64
import boto3
import cv2
import hashlib
import json
import math
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import progressbar
import pandas as pd
import random
import uuid
import warnings

from botocore.exceptions import NoCredentialsError
from enum import Enum, auto
from IPython.display import HTML, display
from ipywidgets import HBox, Layout
from ipywidgets.widgets import VBox
import ipywidgets as widgets
from iopath.common.file_io import PathManager
from nb_video_utils import _get_frames
from typing import Callable

%matplotlib inline
plt.rcParams["animation.html"] = "jshtml"

pathmgr = PathManager()

warnings.filterwarnings('ignore')

def vid_df_des(df):
    return f"#{len(df)} {df.duration_sec.sum()/60/60:.1f}h"
def vid_des(videos):
    return f"#{len(videos)} {sum((x.duration_sec for x in videos))/60/60:.1f}h"
def deserialize_str_list(list_: str):
    list_ = list_[1:-2]
    items = list_.split("', '")
    return list(map(lambda z: z.strip("'"), items))
def to_1D(series):
    return pd.Series([x for _list in series for x in _list])

config = [
    {
        'section': 'Annotations Assessment (select one):',
        'options': [{
            'annotations_correct': {'btn_label': 'Correct', 'btn_color':'#47a84b'},
            'annotations_incorrect': {'btn_label':'Incorrect', 'btn_color':'#fb8e26'},
        }]
    }
]
option_base_color = "#b7c2ce"
label_frames_per_video = 10
fho_s3_path = "s3://ego4d-consortium-sharing/annotations/fho_220208.json"

# Load Video Data

In [3]:
# S3 Utils

def split_s3_path(s3_path):
    path_parts=s3_path.replace("s3://","").split("/")
    bucket=path_parts.pop(0)
    key="/".join(path_parts)
    return bucket, key
    

def download_from_s3(s3_path, local_path):
    print(f"Downloading {s3_path} -> {local_path}")
    bucket, key = split_s3_path(s3_path)
    s3 = boto3.client('s3')
    size = s3.head_object(Bucket=bucket, Key=key)['ContentLength']
    up_progress = progressbar.progressbar.ProgressBar(maxval=size)
    up_progress.start()

    def upload_progress(chunk):
        up_progress.update(up_progress.currval + chunk)

    try:
        s3.download_file(bucket, key, local_path, Callback=upload_progress)
        up_progress.finish()
        print("Download Successful")
        return True
    except FileNotFoundError:
        print("The file was not found")
        return False
    except NoCredentialsError:
        print("Credentials not available")
        return False

In [4]:
# Load video metadata
videos_df = pd.read_csv(MANIFEST_PATH)
videos_df['scenarios'] = videos_df['scenarios'].apply(deserialize_str_list)
def get_video(video_uid):
    return videos_df.loc[videos_df['video_uid'] == '353ae622-c322-443e-95b4-e9927dedfa1c'].iloc[0]
print(f"R1 Videos: {vid_df_des(videos_df)}")

# Load FHO
fho_local_path = os.path.join(CLI_OUTPUT_DIR, VERSION, 'annotations', 'annotation_verification_fho.json')
download_from_s3(fho_s3_path, fho_local_path)
with pathmgr.open(fho_local_path, "r") as f:
    fho_annotations = json.load(f)
    fho_ann_video_uids = list(fho_annotations['video_data'].keys())
fho_video_dict = fho_annotations['video_data']
print(f"FHO: {len(fho_ann_video_uids)} videos - top level: {fho_annotations.keys()}")

# Load VQ
with pathmgr.open(os.path.join(CLI_OUTPUT_DIR, VERSION, 'annotations', 'vq_train.json'), "r") as f:
    vq_annotations = json.load(f)
    vq_ann_video_uids = [x["video_uid"] for x in vq_annotations["videos"]]
vq_video_dict = {x["video_uid"]: x["clips"] for x in vq_annotations["videos"]}
print(f"VQ: {len(vq_ann_video_uids)} videos - top level: {vq_annotations.keys()}")

# Load AV
with pathmgr.open(os.path.join(CLI_OUTPUT_DIR, VERSION, 'annotations', 'av_train.json'), "r") as f:
    av_annotations = json.load(f)
    av_ann_video_uids = [x["video_uid"] for x in av_annotations["videos"]]
av_video_dict = {x["video_uid"]: x["clips"] for x in av_annotations["videos"]}
print(f"AV: {len(av_ann_video_uids)} videos - top level: {av_annotations.keys()}")

class AnnotationType(Enum):
    FHO = auto()
    VQ = auto()
    AV = auto()

ANNOTATIONS = {
    AnnotationType.FHO: {
        'annotations': fho_video_dict,
        'video_uids': fho_ann_video_uids
    },
    AnnotationType.VQ: {
        'annotations': vq_video_dict,
        'video_uids': vq_ann_video_uids
    },
    AnnotationType.AV: {
        'annotations': av_video_dict,
        'video_uids': av_ann_video_uids
    }
}

MANIFEST_DF = videos_df

R1 Videos: #9650 3877.9h
Downloading s3://ego4d-consortium-sharing/annotations/fho_220202.json -> /Users/eugenebyrne/ego4d/v1/annotations/annotation_verification_fho.json


100% |########################################################################|
An exception occurred in telemetry logging.Disabling telemetry to prevent further exceptions.
Traceback (most recent call last):
  File "/Users/eugenebyrne/opt/anaconda3/envs/py38/lib/python3.8/site-packages/iopath/common/file_io.py", line 946, in __log_tmetry_keys
    handler.log_event()
  File "/Users/eugenebyrne/opt/anaconda3/envs/py38/lib/python3.8/site-packages/iopath/common/event_logger.py", line 97, in log_event
    del self._evt
AttributeError: _evt


Download Successful
VQ: 1326 videos - top level: dict_keys(['version', 'date', 'description', 'manifest', 'videos'])
AV: 153 videos - top level: dict_keys(['date', 'version', 'description', 'videos'])


# Widget Utils

In [5]:
# Pulled from https://stackoverflow.com/questions/61708701/how-to-download-a-file-using-ipywidget-button
class DownloadButton(widgets.Button):
    """Download button with dynamic content

    The content is generated using a callback when the button is clicked.
    """

    def __init__(self, filename: str, contents: Callable[[], str], **kwargs):
        super(DownloadButton, self).__init__(**kwargs)
        self.filename = filename
        self.contents = contents
        self.on_click(self.__on_click)

    def __on_click(self, b):
        contents: bytes = self.contents().encode('utf-8')
        b64 = base64.b64encode(contents)
        payload = b64.decode()
        digest = hashlib.md5(contents).hexdigest()  # bypass browser cache
        id = f'dl_{digest}'

        display(HTML(f"""
<html>
<body>
<a id="{id}" download="{self.filename}" href="data:text/csv;base64,{payload}" download>
</a>

<script>
(function download() {{
document.getElementById('{id}').click();
}})()
</script>

</body>
</html>
"""))

# Visualization Utils

In [6]:
# in: video_path, frame_number, boxes: [{ object_type, bbox: {x, y, width, height} }]}, draw_labels
# out: path to image of bboxes rendered onto the video frame
def render_frame_with_bboxes(video_path, frame_number, boxes, draw_labels = True):
    colormap = { # Custom colors for FHO annotations
        'object_of_change': (0, 255, 255),
        'left_hand': (0, 0, 255),
        'right_hand': (0, 255, 0)
    }
    defaultColor = (255, 255, 0)
    rect_thickness = 5
    rectLineType = cv2.LINE_4
    fontColor = (0, 0, 0)
    fontFace = cv2.FONT_HERSHEY_DUPLEX
    fontScale = 1
    fontThickness = 1
    with av.open(video_path) as input_video:
        frames = list(_get_frames([frame_number], input_video, include_audio=False, audio_buffer_frames=0))
        assert len(frames) == 1
        img = frames[0].to_ndarray(format="bgr24")
        for box in boxes:
            label, bbox = box['object_type'], box['bbox']
            rectColor = colormap.get(label, defaultColor) if label else defaultColor
            x, y, width, height = list(map(lambda x: int(x), [bbox['x'], bbox['y'], bbox['width'], bbox['height']]))
            cv2.rectangle(img, pt1=(x,y), pt2=(x+width, y+height), color=rectColor, thickness=rect_thickness, lineType=rectLineType)
            if label and draw_labels:
                textSize, baseline = cv2.getTextSize(label, fontFace, fontScale, fontThickness)
                textWidth, textHeight = textSize
                cv2.rectangle(img, pt1=(x - rect_thickness//2, y - rect_thickness//2), pt2=(x + textWidth + 10 + rect_thickness, y - textHeight - 10 - rect_thickness), color=rectColor, thickness=-1)
                cv2.putText(img, text=label, org=(x + 10, y - 10), fontFace=fontFace, fontScale=fontScale, color=fontColor, thickness=fontThickness, lineType=cv2.LINE_AA)
    path = f"/tmp/{frame_number}_{str(uuid.uuid1())}.jpg"
    cv2.imwrite(path, img)
    return path

# in: video_path, frames: [{ frame_number, frame_type, boxes: [{ object_type, bbox: {x, y, width, height} }] }]
# out: void; as a side-effect, renders frames from the video with matplotlib
def plot_frames_with_bboxes(video_path, frames, max_cols = 3):
    cols = min(max_cols, len(frames))
    rows = math.ceil(len(frames) / cols)
    fig, axes = plt.subplots(nrows=rows, ncols=cols, figsize=(10*cols, 7 * rows))
    if len(frames) > 1:
        [axi.set_axis_off() for axi in axes.ravel()] # Hide axes
    for idx, frame_data in enumerate(frames):
        row = idx // max_cols
        col = idx % max_cols
        frame_path = render_frame_with_bboxes(video_path, frame_data['frame_number'], frame_data['boxes'])
        axes[row, col].title.set_text(frame_data['frame_type'])
        axes[row, col].imshow(mpimg.imread(frame_path, format='jpeg'))
    plt.subplots_adjust(wspace=.05, hspace=.05)
    plt.show()

In [7]:
# AV Data Utils

# Aggregate av tracking bounding boxes in an indexable dictionary
def get_av_frame_dict(av_video_annotation):
    frame_aggregator = {}
    for person in av_video_annotation['persons']:
        for tracking_path in person['tracking_paths']:
            for track in tracking_path['track']:
                frame = frame_aggregator.get(track['video_frame'])
                if frame is None:
                    frame = {
                        "frame_number": track['video_frame'],
                        "frame_label": f"Frame: {track['video_frame']}",
                        "frame_type": f"Frame: {track['video_frame']}",
                        "boxes": []
                    }
                frame['boxes'].append({
                    "object_type": tracking_path['track_id'],
                    "bbox": {
                        "x": track['x'],
                        "y": track['y'],
                        "width": track['width'],
                        "height": track['height']
                    }
                })
                frame_aggregator[track['video_frame']] = frame
    return frame_aggregator

    
# Get ordered list of frames 
def get_av_frames_with_bboxes(av_video_annotation):
    frame_dict = get_av_frame_dict(av_video_annotation)
    return sorted(list(frame_dict.values()), key=lambda x: x['frame_number'])

In [8]:
# returns a dict with frame id info
def render_fho(video_path, fho_video_annotations):
    # Display critical frames for an action as a grid
    interval = random.sample(fho_video_annotations['annotated_intervals'], 1)[0]
    actions = list(filter(lambda x: not (x['is_invalid_annotation'] or x['is_rejected']) and x['frames'], interval['narrated_actions']))
    action = random.sample(actions, 1)[0]
    sample_frame = random.sample(action['frames'], 1)[0]
    
    frame_path = render_frame_with_bboxes(video_path, sample_frame['frame_number'], sample_frame['boxes'])
    plt.rcParams['figure.figsize'] = [15, 10]
    plt.imshow(mpimg.imread(frame_path, format='jpeg'))
    plt.title(f"Frame: {sample_frame['frame_number']}")
    plt.show()

    #     TODO: add more identifying info
    return {
        'frame_number': sample_frame['frame_number']
    }

# returns a dict with frame id info
def render_vq(video_path, vq_video_annotations):
    annotations = random.sample(vq_video_annotations, 1)[0]['annotations']
    query_sets = random.sample(annotations, 1)[0]['query_sets']
    sample_query_set = query_sets[random.sample(query_sets.keys(), 1)[0]]
    sample_frame = sample_query_set['visual_crop']
    box = {
        'object_type': sample_query_set['object_title'],
        'bbox': {
            'x': sample_frame['x'],
            'y': sample_frame['y'],
            'width': sample_frame['width'],
            'height': sample_frame['height'],
        }
    }
    
    frame_path = render_frame_with_bboxes(video_path, sample_frame['video_frame_number'], [box])
    plt.rcParams['figure.figsize'] = [15, 10]
    plt.imshow(mpimg.imread(frame_path, format='jpeg'))
    plt.show()
    
    #     TODO: add more identifying info
    return {
        'frame_number': sample_frame['video_frame_number']
    }

# returns a dict with frame id info
def render_av(video_path, av_video_annotations):
    av_tracked_frames = get_av_frames_with_bboxes(random.sample(av_video_annotations, 1)[0])
    sample_frame = random.sample(av_tracked_frames, 1)[0]

    frame_path = render_frame_with_bboxes(video_path, sample_frame['frame_number'], sample_frame['boxes'])
    plt.rcParams['figure.figsize'] = [15, 10]
    plt.imshow(mpimg.imread(frame_path, format='jpeg'))
    plt.title(f"Frame: {sample_frame['frame_number']}")
    plt.show()
    
    #     TODO: add more identifying info
    return {
        'frame_number': sample_frame['frame_number']
    }

def render_frame_with_annotations(video_path, video_annotations, annotation_type):
    return {
        AnnotationType.FHO: render_fho,
        AnnotationType.VQ: render_vq,
        AnnotationType.AV: render_av,
    }[annotation_type](video_path, video_annotations)

In [9]:
# labeling_state = {
#     'video_uid': None,
#     'video_frames_labeled': 0,
#     'annotation_type': AnnotationType.FHO
#     'decisions_df': pd.DataFrame([])
#     'render_identification_info': {}
# }

# ui_elements = {
#     'userid_input': None,
#     'buttons': [],
#     'textareas': [],
#     'textboxes': [],
# }

# outputs = {
#     'downloads': None
#     'frames': None
#     'logs': None
# }
    
    
def get_local_video_path(video_uid):
    return os.path.join(CLI_OUTPUT_DIR, VERSION, 'full_scale', video_uid)
    
    
def load_new_video(outputs, labeling_state, annotation_type):
    with outputs['downloads']:
        video_uid = random.sample(ANNOTATIONS[annotation_type]['video_uids'], 1)[0]
        video_path = get_local_video_path(video_uid)
        print(f"Picked new video: {video_uid} for annotation type: {annotation_type.name}")
        if not os.path.exists(video_path):
            s3_path = MANIFEST_DF[MANIFEST_DF.video_uid == video_uid].iloc[0].canonical_s3_location
            download_from_s3(s3_path, video_path)
        labeling_state['video_uid'] = video_uid
        labeling_state['video_frames_labeled'] = 0
        labeling_state['annotation_type'] = annotation_type


# TODO: Refactor ui disable/enable
def load_next_frame(ui_elements, outputs, labeling_state):
    annotation_types = list(AnnotationType)
    
#     Clear Outputs and Disable UI
    for out in outputs.values():
        out.clear_output()
    for elem in [*ui_elements['buttons'], *ui_elements['textareas'], *ui_elements['textboxes'], *ui_elements['dropdowns']]:
        elem.disabled = not hasattr(elem, 'enabled_during_refresh')
    
    if not labeling_state['video_uid'] or labeling_state['video_frames_labeled'] >= label_frames_per_video:
#         a_index = annotation_types.index(labeling_state['annotation_type'])
#         new_annotation_type = annotation_types[(a_index + 1) % len(annotation_types)]
        load_new_video(outputs, labeling_state, labeling_state['annotation_type'])
    else:
        with outputs['downloads']:
            print(f"Video {labeling_state['video_uid']}: {labeling_state['annotation_type'].name}")
    
    video_annotations = ANNOTATIONS[labeling_state['annotation_type']]['annotations'].get(labeling_state['video_uid'])
    video_path = get_local_video_path(labeling_state['video_uid'])
    
    frame_id_info = {}
    with outputs['frames']:
        frame_id_info = render_frame_with_annotations(video_path, video_annotations, labeling_state['annotation_type'])
        
    labeling_state['render_identification_info'] = {
        'video_uid': labeling_state['video_uid'],
        'annotation_type': labeling_state['annotation_type'].name,
        **frame_id_info
    }
    
#     Enable UI
    for elem in [*ui_elements['buttons'], *ui_elements['textareas'], *ui_elements['textboxes'], *ui_elements['dropdowns']]:
        elem.disabled = hasattr(elem, 'disabled_during_labeling')
    

def get_selections_ui(ui_elements, labeling_state):
    userid_textbox = widgets.Text(value='', placeholder='Enter your userid here', description='User Id:')
    annotation_type_dropdown = widgets.Dropdown(
        options = {
            (k, v) for k, v in AnnotationType.__members__.items()
        },
        description="Pick Type:"
    )
    
    def on_type_change(change):
        if change['type'] == 'change' and change['name'] == 'value':
            labeling_state['annotation_type'] = change['new']
            labeling_state['video_uid'] = None

    annotation_type_dropdown.observe(on_type_change)
    annotation_type_dropdown.enabled_at_start = True
    
    userid_textbox.tag = 'user_id'
    userid_textbox.enabled_at_start = True
    userid_textbox.disabled_during_labeling = True
    userid_textbox.fix_value = True
    
    ui_elements['dropdowns'] += [annotation_type_dropdown]
    ui_elements['textboxes'] += [userid_textbox]
    ui_elements['userid_input'] = userid_textbox

    row = HBox()
    row.children = [userid_textbox, annotation_type_dropdown]
    return row
    
    
    
def get_notes_textarea_ui(ui_elements):
    notes_layout = Layout(width="95%", height="100px")
    notes_textarea = widgets.Textarea(
        value="",
        placeholder="Notice anything strange, unique, or interesting? See an incorrect annotation? Add details here!",
        description="Notes:",
        disabled=False,
        layout=notes_layout
    )
    notes_textarea.tag = "notes"
    ui_elements['textareas'] += [notes_textarea]
    return notes_textarea


def get_options_ui(ui_elements):
    btn_layout = Layout(width="100%", height="100px")
    btn_style = dict(font_weight='bold')
    
    def on_option_clicked(b):
        b.selected = not b.selected
        if b.selected:
            b.style.button_color = b.selected_color
        else:
            b.style.button_color = option_base_color
    
    ui_sections = []
    for section in config:
        ui_sections += [widgets.Label(value=section['section'])]
        buttons = []
        for btn_config in section['options']:
            for (decision_string, opt) in btn_config.items():
                newBtn = widgets.Button(description=opt['btn_label'], layout=btn_layout, style=btn_style, button_style='success')
                newBtn.style.button_color = option_base_color
                newBtn.reset_color = option_base_color
                newBtn.tag = decision_string
                newBtn.selected = False
                newBtn.selected_color = opt['btn_color']
                newBtn.on_click(on_option_clicked)
                buttons += [newBtn]
        btn_row = HBox()
        btn_row.children = buttons
        ui_sections += [btn_row]
        ui_elements['buttons'] += buttons
    
    grid = VBox(layout=Layout(width="95%"))
    grid.children = ui_sections
    return grid


def get_utility_ui(ui_elements, outputs, labeling_state):
    begin_btn = widgets.Button(description="Start Labeling")
    submit_btn = widgets.Button(description="Label Frame")
    download_btn = DownloadButton(filename='ego4d_annotation_labels.csv', contents=lambda: labeling_state['decisions_df'].to_csv(), description='Download CSV')
    skip_frame_btn = widgets.Button(description="Skip Frame")
    skip_video_btn = widgets.Button(description="Skip Video")
    
    utility_btns = [begin_btn, submit_btn, download_btn, skip_frame_btn, skip_video_btn]
    
    def on_begin_btn_clicked(b):
        if ui_elements['userid_input'].value == '':
            with outputs['logs']:
                print("Please enter your userid first")
        else:
            b.layout.display = 'none'
            ui_elements['userid_input'].disabled = True
            load_next_frame(ui_elements, outputs, labeling_state)
    
    def on_submit_btn_clicked(b):
        tagged_btns = [btn for btn in ui_elements['buttons'] if hasattr(btn, 'tag')]
        tagged_textboxes = [textbox for textbox in ui_elements['textboxes'] if hasattr(textbox, 'tag')]
        tagged_textareas = [textarea for textarea in ui_elements['textareas'] if hasattr(textarea, 'tag')]
        info_to_log = {
            **{btn.tag: btn.selected for btn in tagged_btns},
            **{textbox.tag: textbox.value for textbox in tagged_textboxes},
            **{textarea.tag: textarea.value for textarea in tagged_textareas},
            **labeling_state['render_identification_info']
        }
        labeling_state['decisions_df'] = labeling_state['decisions_df'].append(info_to_log, ignore_index=True)
        labeling_state['video_frames_labeled'] += 1
        
#         Reset button style and text input contents
        for btn in ui_elements['buttons']:
            btn.selected = False
            if hasattr(btn, 'reset_color'):
                btn.style.button_color = btn.reset_color
        for textinput in [*ui_elements['textareas'], *ui_elements['textboxes']]:
            textinput.value = textinput.value if hasattr(textinput, 'fix_value') else ''
        
        load_next_frame(ui_elements, outputs, labeling_state)

    def on_skip_frame_btn_clicked(b):
        load_next_frame(ui_elements, outputs, labeling_state)
    
    def on_skip_video_btn_clicked(b):
        labeling_state['video_uid'] = None
        load_next_frame(ui_elements, outputs, labeling_state)
        
    begin_btn.on_click(on_begin_btn_clicked)
    submit_btn.on_click(on_submit_btn_clicked)
    skip_frame_btn.on_click(on_skip_frame_btn_clicked)
    skip_video_btn.on_click(on_skip_video_btn_clicked)
    
    begin_btn.enabled_at_start = True
#     download_btn.enabled_during_refresh = True
#     skip_frame_btn.enabled_during_refresh = True
#     skip_video_btn.enabled_during_refresh = True
    
    ui_elements['buttons'] += utility_btns
    
    row = HBox()
    row.children = utility_btns
    
    return row

In [None]:
labeling_state = {
    'video_uid': None,
    'video_frames_labeled': 0,
#     'annotation_type': random.sample(list(AnnotationType), 1)[0],
    'annotation_type': AnnotationType.VQ,
    'decisions_df': pd.DataFrame([]),
    'render_identification_info': None
}

ui_elements = {
    'userid_input': None,
    'buttons': [],
    'textareas': [],
    'textboxes': [],
    'dropdowns': [],
}

outputs = {
    'downloads': widgets.Output(layout=Layout(height="auto")),
    'frames': widgets.Output(layout=Layout(height="auto")),
    'logs': widgets.Output(layout=Layout(height="auto"))
}

ui = [
    outputs['downloads'], # Render video download logging info
    get_selections_ui(ui_elements, labeling_state), # Render main selections row
    outputs['logs'], # Render logs
    outputs['frames'], # Render video frames
    get_options_ui(ui_elements), # Render options
    get_notes_textarea_ui(ui_elements), # Render notes textarea
    get_utility_ui(ui_elements, outputs, labeling_state) # Render utility row
]

# Initially disable UI
for elem in [*ui_elements['buttons'], *ui_elements['textareas'], *ui_elements['textboxes'], *ui_elements['dropdowns']]:
    elem.disabled = not hasattr(elem, 'enabled_at_start')

display(*ui)