In [None]:
%matplotlib inline
import cv2
import matplotlib.pyplot as plt
from matplotlib import rcParams

rcParams['figure.figsize'] = 36,128

import os
import random
import json
import pandas as pd
from tqdm.auto import tqdm
from collections import defaultdict

In [None]:
RELEASE_DIR = "/checkpoint/miguelmartin/egoexo_data/dev/"  # NOTE: changeme

egoexo = {
    "takes": os.path.join(RELEASE_DIR, "takes.json"),
    "captures": os.path.join(RELEASE_DIR, "captures.json"),
    "physical_setting": os.path.join(RELEASE_DIR, "physical_setting.json"),
    "participants": os.path.join(RELEASE_DIR, "participants.json"),
    "visual_objects": os.path.join(RELEASE_DIR, "visual_objects.json"),
}


egoexo_pd = {}
for k, v in egoexo.items():
    egoexo_pd[k] = pd.read_json(open(v))

for k, v in egoexo.items():
    egoexo[k] = json.load(open(v))

In [None]:
num_traj_takes = sum(take["capture"]["has_trajectory"] for take in egoexo["takes"])
num_valid_traj_takes = sum(
    take["capture"]["has_trajectory"] and take["validated"]
    for take in egoexo["takes"]
)
num_traj_captures = sum(capture["has_trajectory"] for capture in egoexo["captures"])
num_valid_traj_captures = sum(
    capture["has_trajectory"] and (capture["timesync_validated"] > 0)
    for capture in egoexo["captures"]
)

takes_df = egoexo_pd["takes"]
captures_df = egoexo_pd["captures"]

validated_takes_df = takes_df[takes_df.validated > 0]
num_validated_takes = (takes_df.validated > 0).sum()
num_validated_captures = (captures_df.timesync_validated > 0).sum()

assert len(validated_takes_df) == num_validated_takes

num_takes = len(takes_df)
num_captures = len(captures_df)

print("Summary Stats\n-------------")
print(f"Number of Validated Captures: {num_validated_captures} / {num_captures}")
print(
    f"Number of Captures w/ Trajectory (&validated): {num_traj_captures} (valid={num_valid_traj_captures}) / {num_captures} (valid={num_validated_captures}))"
)
print(f"Number of Validated Takes: {num_validated_takes} / {num_takes}")
print(
    f"Number of Takes w/ Trajectory (&validated): {num_traj_takes} (valid={num_valid_traj_takes}) / {num_takes} (valid={num_validated_takes}))"
)
print(
    f"Number of Narrated Takes: {takes_df.is_narrated.sum()} / {num_validated_takes} (total takes={num_takes})"
)
print(
    f"Take Hours Validated (All): {(validated_takes_df.duration_sec.sum() / 3600):.4f}hrs ({(takes_df.duration_sec.sum() / 3600):.4f}hrs)"
)
print(f"Mean Take Duration: {(takes_df.duration_sec.mean() / 60):.4f}mins")
print()
print("Take Scenarios\n-------------")
for k, v in takes_df.groupby("task_name").count()["take_uid"].to_dict().items():
    print(f"{k}: {v}")

print()
print()

ps_counts = defaultdict(int)
for _, take in takes_df.iterrows():
    ps_counts[take["physical_setting_uid"]] += 1

print(f"Number of unique physical settings for takes: {len(ps_counts)}")
print()
print("Number of takes per physical setting\n----------------------")
for k, v in ps_counts.items():
    print(f"{k}: {v}")

print()

In [None]:
num_physical = len(
    egoexo_pd["captures"]
    .groupby("physical_setting_uid")
    .count()["capture_uid"]
    .to_dict()
)
print(f"Number of all possible physical settings: {num_physical}")

# Read Video Data

The videos we reccomend to read are the synchronized videos trimmed for each take. You 
can access the locations of these with `egoexo["takes"]` which corresponds to 
the `take.json` file from the `RELEASE_DIR`.

The synchronized videos are frame-aligned between all cameras, meaning you
do not need to refer to metadata in order to obtain a synchronized frame. Just read 
the same frame index for each associated video of a take.

There is additionally collage videos, which can be used for visualization purposes. These are the videos being used to annotate keystep and narrations.

Usage of both is demonstrated below.

In [None]:
take_idx = random.randint(0, len(egoexo["takes"]))
take = egoexo["takes"][take_idx]

In [None]:
from ego4d.research.readers import PyAvReader
from ego4d.research.readers import TorchAudioStreamReader

VideoReader = PyAvReader
# VideoReader = TorchAudioStreamReader  # see: https://pytorch.org/audio/stable/hw_acceleration_tutorial.html
gpu_idx = -1 # use >= 0 to use a CUDA GPU (only for TorchAudioStreamReader)

In [None]:
# create readers for each video
videos = {}
for k, temp in take["frame_aligned_videos"].items():
    for stream_id, v in temp.items():
        path = v["relative_path"]
        local_path = os.path.join(RELEASE_DIR, "takes", take["root_dir"], f"{v['relative_path']}")
        print(path, local_path)
        videos[(k, stream_id)] = VideoReader(
            local_path,
            resize=None,
            mean=None,
            frame_window_size=1,
            stride=1,
            gpu_idx=gpu_idx,
        )

for k, v in videos.items():
    print(f"{k}: {len(v)}")
n_frames = len(videos[k])

In [None]:
def get_frames(frames_by_key):
    result = {}
    for key, frame_indices in tqdm(frames_by_key.items()):
        frames = []
        reader = videos[key]
        for idx in frame_indices:
            frames.append(reader[idx])
        result[key] = frames
    return result

# Visualize

In [None]:
frame_idx = random.randint(0, n_frames)

In [None]:
frames = get_frames({
    ("aria01", "rgb"): [frame_idx],
    ("cam01", "0"): [frame_idx],
    ("cam02", "0"): [frame_idx],
    ("cam03", "0"): [frame_idx],
    ("cam04", "0"): [frame_idx],
})

In [None]:
N = len(frames)
f, ax = plt.subplots(N, 1)

for idx, key in enumerate(frames.keys()):
    img = frames[key][0].squeeze().cpu().numpy()
    ax[idx].imshow(img)
    ax[idx].set_title(f"camera: {key}")

In [None]:
collage_frame = videos[("collage", "0")][frame_idx]

f, ax = plt.subplots(1, 1)

img = collage_frame[0].squeeze().cpu().numpy()
ax.imshow(img)
_ = ax.set_title("Take-Level Collage")

# Other Data

In [None]:
len(egoexo["visual_objects"])

In [None]:
egoexo["visual_objects"]

In [None]:
egoexo["participants"]