# Introduction

This notebook is a tutorial to better understand what is Project Aria Glasses and its Machine Perception Services (MPS) data linked to an EgoExo scene.


In [None]:
ego_exo_root = '~/egoexo/' # Replace with your cli's download directory for Ego-Exo4D
take_name = 'cmu_bike01_5'

import os
ego_exo_project_path = os.path.join(ego_exo_root, 'takes', take_name)
print(f'EgoExo Sequence: {ego_exo_project_path}')

if not os.path.exists(ego_exo_project_path):
    print("Please do update your path to a valid EgoExo sequence folder.")

# Visualization

This tutorial is using Rerun to display temporal and interactive data
We are defining here some functions that we will use all across the tutorial

In [None]:
##
# Visualization utils
# Utility function to log to rerun:
#  - AriaGlasses outline
#  - Camera Calibration
#  - Camera pose 
#  - Images
#  - Point cloud
##

import rerun as rr
import numpy as np

from projectaria_tools.core.calibration import CameraCalibration, DeviceCalibration
from projectaria_tools.core.sophus import SE3

def log_aria_glasses(
    device_calibration: DeviceCalibration,
    label: str,
    use_cad_calibration: bool = True
) -> None:
    ## Plot Project Aria Glasses outline (as lines)
    aria_glasses_point_outline = AriaGlassesOutline(
        device_calibration, use_cad_calibration
    )
    rr.log(label, rr.LineStrips3D([aria_glasses_point_outline]), timeless=True)

def log_calibration(
    camera_calibration: CameraCalibration,
    label: str
) -> None:
    rr.log(
        label,
        rr.Pinhole(
            resolution=[
                camera_calibration.get_image_size()[0],
                camera_calibration.get_image_size()[1],
            ],
            focal_length=float(
                camera_calibration.get_focal_lengths()[0]
            ),
        ),
        timeless=True,
    )

def log_pose(
    pose: SE3,
    label: str,
    timeless = False
) -> None:
    rr.log(
        label,
        ToTransform3D(pose, False),
        timeless = timeless
    )

def log_image(
    image_array : np.array,
    label: str,
    timeless = False
) -> None:
    rr.log(label, rr.DisconnectedSpace())
    rr.log(label, rr.Image(image_array), timeless = timeless)

def log_point_cloud(
    point_positions : np.array,
    label: str,
    timeless: bool = True) -> None:
    rr.log(label,rr.Points3D(point_positions, radii=0.001, colors=[200, 200, 200]), timeless=timeless)

## Getting to know the Project Aria Glasses and its sensor suite
[Project Aria](https://facebookresearch.github.io/projectaria_tools/docs/tech_spec/hardware_spec) is a glasses form factor device to capture multimodal data from an ego-centric perspective.

Take Home Message:
- You learned about:
  - what and where are the various sensors on the Glasses
  - That `DeviceCalibration` is the interface to use to retrieve:
    - Intrinsics for Image Stream data (i.e Camera data) - `CameraCalibration`
    - Extrinsics are defined for all sensors - `SE3`

In [None]:
from projectaria_tools.core import data_provider
from projectaria_tools.utils.rerun_helpers import AriaGlassesOutline, ToTransform3D

###
# We are using here the projectaria_tools API for:
# - retrieving the DeviceCalibration and the POSE of each sensor
# - we are then plotting those POSE onto the Aria glasses outline
###

##
# Retrieve device calibration and plot sensors locations, orientations
vrs_file_path = os.path.join(ego_exo_project_path, 'aria01.vrs')
print(vrs_file_path)
assert os.path.exists(vrs_file_path), "We are not finding the required vrs file"

vrs_data_provider = data_provider.create_vrs_data_provider(vrs_file_path)
if not vrs_data_provider:
    print("Couldn't create data vrs_data_provider from vrs file")
    exit(1)

device_calibration = vrs_data_provider.get_device_calibration()

# Init rerun api
rr.init("Aria Glasses")
rec = rr.memory_recording()

# Aria coordinate system sets X down, Z in front, Y Left
rr.log("device", rr.ViewCoordinates.RIGHT_HAND_X_DOWN, timeless=True)

cam_labels = device_calibration.get_camera_labels()
print(f"Log {len(cam_labels)} Cameras")
for cam in cam_labels:
    camera_calibration = device_calibration.get_camera_calib(cam)
    T_device_sensor = camera_calibration.get_transform_device_camera()
    log_pose(T_device_sensor, f"device/camera/{cam}")
    log_calibration(camera_calibration, f"device/camera/{cam}")

mic_labels = device_calibration.get_microphone_labels()
print(f"Log {len(mic_labels)} Microphones")
for mic in mic_labels:  # Note: Only defined in CAD calibration
    T_device_sensor = device_calibration.get_transform_device_sensor(mic, True)
    log_pose(T_device_sensor, f"device/mic/{mic}")

imu_labels = device_calibration.get_imu_labels()
print(f"Log {len(imu_labels)} IMUs")
for imu in imu_labels:
    T_device_sensor = device_calibration.get_transform_device_sensor(imu, True)
    log_pose(T_device_sensor, f"device/imu/{imu}")

magnetometer_labels = device_calibration.get_magnetometer_labels()
print(f"Log {len(magnetometer_labels)} Magnetometer")
for magnetometer in magnetometer_labels:  # Note: Only defined in CAD calibration
    T_device_sensor = device_calibration.get_transform_device_sensor(magnetometer, True)
    log_pose(T_device_sensor, f"device/{magnetometer}")

barometer_labels = device_calibration.get_barometer_labels()
print(f"Log {len(barometer_labels)} Barometer")
for barometer in barometer_labels:  # Note: Only defined in CAD calibration
    T_device_sensor = device_calibration.get_transform_device_sensor(barometer, True)
    log_pose(T_device_sensor, f"device/{barometer}")
    

# Plot CPF (Central Pupil Frame coordinate system)
T_device_CPF = device_calibration.get_transform_device_cpf()
log_pose(T_device_CPF, "device/CPF_CentralPupilFrame")

# Plot Project Aria Glasses outline (as lines)
log_aria_glasses(device_calibration, "device/glasses_outline")

# Showing the rerun window
rec


## What is VRS?
Aria is providing VRS files.
[VRS](https://facebookresearch.github.io/vrs/) is the file format used to store the Project Aria Glasses multimodal data.

We are demonstrating here how to retrieve Image data from a VRS file:
- Using the `VrsDataProvider` -> allowing random access
- Using the `DeliveryQueue` -> allowing sequential access (as data would come from the glasses if streaming)

Take home message:
- You learned about:
  - VRS Data is stored Stream and are identified with a unique StreamId
  - That a convenient class `VrsDataProvider` enables you to list and retrieve all VRS data and calibration data
  - That there is a convenient delivery mechanism `DeliveryQueue` to retrieve data as they would be streaming

- You now know how to:
  - retrieve an data for an arbritraty timestamp
     - using `vrs_data_provider.get_X_data_by_time_ns` to retrieve Image or Imu data
       - X being `get_image_data_by_time_ns` for Image data
       - X being `get_imu_data_by_time_ns` for Imu data
    - know how to use the `TimeDomain` option to query `DEVICE_TIME` data (timestamp at which data where captured
  - retrieve the configuration of a given stream (i.e Image Size of a Image Stream)

In [None]:
# projectaria_tools is providing an easy to use API allowing you to get data for a given StreamId and a given TimeDomain

from projectaria_tools.core.stream_id import StreamId
from projectaria_tools.core.sensor_data import TimeDomain, TimeQueryOptions
from tqdm import tqdm

###
# We are demonstrating here how to retrieve the time stamp of a given stream
# - and how to retrieve 10 frames along this time span
###

rgb_stream_id = StreamId("214-1")
slam_left_stream_id = StreamId("1201-1")
slam_right_stream_id = StreamId("1201-2")
rgb_stream_label = vrs_data_provider.get_label_from_stream_id(rgb_stream_id)
slam_left_stream_label = vrs_data_provider.get_label_from_stream_id(slam_left_stream_id)
slam_right_stream_label = vrs_data_provider.get_label_from_stream_id(slam_right_stream_id)

# Init rerun api
rr.init("Aria Data Provider - Retrieve Image Stream data")
rec = rr.memory_recording()

# Configure option for data retrieval
time_domain = TimeDomain.DEVICE_TIME  # query data based on host time
option = TimeQueryOptions.CLOSEST # get data whose time [in TimeDomain] is CLOSEST to query time

# Retrieve Start and End time for the given Sensor Stream Id
start_time = vrs_data_provider.get_first_time_ns(rgb_stream_id, time_domain)
end_time = vrs_data_provider.get_last_time_ns(rgb_stream_id, time_domain)

# FYI, you can retrieve the Image configuration using the following
image_config = vrs_data_provider.get_image_configuration(rgb_stream_id)
width = image_config.image_width
height = image_config.image_height
print(f"StreamId {rgb_stream_id}, StreamLabel {rgb_stream_label}, ImageSize: {width, height}")

sample_count = 10
sample_timestamps = np.linspace(start_time, end_time, sample_count)
for sample in tqdm(sample_timestamps):

    # Retrieve the RGB image
    image_tuple_rgb = vrs_data_provider.get_image_data_by_time_ns(rgb_stream_id, int(sample), time_domain, option)
    timestamp = image_tuple_rgb[1].capture_timestamp_ns
    
    # Log timestamp as:
    # - device_time (so you can see the effective time between two frames)
    # - timestamp (so you can see the real VRS timestamp as INT value in the Rerun Timeline dropdown)
    rr.set_time_nanos("device_time", timestamp)
    rr.set_time_sequence("timestamp", timestamp)

    log_image(image_tuple_rgb[0].to_numpy_array(), f"vrs/{rgb_stream_label}")

    # Retrieving the SLAM images
    image_tuple_slam_left = vrs_data_provider.get_image_data_by_time_ns(slam_left_stream_id, int(sample), time_domain, option)
    log_image(image_tuple_slam_left[0].to_numpy_array(), f"vrs/{slam_left_stream_label}")

    image_tuple_slam_right = vrs_data_provider.get_image_data_by_time_ns(slam_right_stream_id, int(sample), time_domain, option)
    log_image(image_tuple_slam_right[0].to_numpy_array(), f"vrs/{slam_right_stream_label}")

# Showing the rerun window
rec

# Note:
# See in the Timeline that you have a TIMESTAMP (VRS timestamp) and a Device_time 

In [None]:
##
# Imu data
# Showing 'accelerometer readout (m/sec2)' and 'gyroscope readout (rad/sec)'
##

# Init rerun api
rr.init("Aria Data Provider - Retrieve IMU data")
rec = rr.memory_recording()

stream_id = vrs_data_provider.get_stream_id_from_label("imu-left")
for index in range(0, int(vrs_data_provider.get_num_data(stream_id) / 10)):

    imu_data = vrs_data_provider.get_imu_data_by_index(stream_id, index)
    timestamp = imu_data.capture_timestamp_ns
    # Log timestamp as:
    # - device_time (so you can see the effective time between two frames)
    # - timestamp (so you can see the real VRS timestamp as INT value in the Rerun Timeline dropdown)
    rr.set_time_nanos("device_time", timestamp)
    rr.set_time_sequence("timestamp", timestamp)

    rr.log("imu-left/accel_msec2/x", rr.Scalar(imu_data.accel_msec2[0]))
    rr.log("imu-left/accel_msec2/y", rr.Scalar(imu_data.accel_msec2[1]))
    rr.log("imu-left/accel_msec2/z", rr.Scalar(imu_data.accel_msec2[2]))

    rr.log("imu-left/gyro_radsec/x", rr.Scalar(imu_data.gyro_radsec[0]))
    rr.log("imu-left/gyro_radsec/y", rr.Scalar(imu_data.gyro_radsec[1]))
    rr.log("imu-left/gyro_radsec/z", rr.Scalar(imu_data.gyro_radsec[2]))


# Showing the rerun window
rec    

# Using VRS Data Provider and its delivery queue

Take Home Message:
- You can use the `vrs_data_provider.get_X_data_by_time_ns` to retrieve data chunk at arbritrary timestamp
- You can also use a delivery queue to retrieve data as you they were streaming from the device


In [None]:

from projectaria_tools.core.sensor_data import SensorData, SensorDataType

# Init rerun api
rr.init("Aria Data Provider - Delivery Queue")
rec = rr.memory_recording()

# Configure the Delivery Queue for data replay for RGB and EyeTracking images
eye_tracking_stream_id = StreamId("211-1")
imu_left_stream_id = StreamId("1202-2")

deliver_option = vrs_data_provider.get_default_deliver_queued_options()
deliver_option.deactivate_stream_all()
for stream_id in [rgb_stream_id, eye_tracking_stream_id, imu_left_stream_id]:
    deliver_option.activate_stream(stream_id)
    deliver_option.set_subsample_rate(stream_id, 60)

# Downsampling the image for faster preview
down_sampling_factor = 6

# Iterate over the data and LOG data as we see fit
for data in vrs_data_provider.deliver_queued_sensor_data(deliver_option):
    device_time_ns = data.get_time_ns(TimeDomain.DEVICE_TIME)
    rr.set_time_nanos("device_time", device_time_ns)
    rr.set_time_sequence("timestamp", device_time_ns)

    if data.sensor_data_type() == SensorDataType.IMAGE:
        img = data.image_data_and_record()[0].to_numpy_array()
        img = img[::down_sampling_factor, ::down_sampling_factor]
        stream_label = vrs_data_provider.get_label_from_stream_id(data.stream_id())
        log_image(img, f"vrs/{stream_label}")
    elif data.sensor_data_type() == SensorDataType.IMU:
        imu_data = data.imu_data()
        
        rr.log("vrs/imu-left/accel_msec2/x", rr.Scalar(imu_data.accel_msec2[0]))
        rr.log("vrs/imu-left/accel_msec2/y", rr.Scalar(imu_data.accel_msec2[1]))
        rr.log("vrs/imu-left/accel_msec2/z", rr.Scalar(imu_data.accel_msec2[2]))
    
        rr.log("vrs/imu-left/gyro_radsec/x", rr.Scalar(imu_data.gyro_radsec[0]))
        rr.log("vrs/imu-left/gyro_radsec/y", rr.Scalar(imu_data.gyro_radsec[1]))
        rr.log("vrs/imu-left/gyro_radsec/z", rr.Scalar(imu_data.gyro_radsec[2]))

# Showing the rerun window
rec

# NOTE:
# Pressing the RELOAD icon next to the "BluePrint" keyword on the left, will show the image side by side, and not overlapping!

# Do notice on the timeline that the RGB image frequency is HIGHER that the EyeTracking image stream


# Using (MPS) Machine Perception Services Artifacts

Each EgoExo scene is coming with metadata such as:
- Ego Device trajectory (Aria)
- Exo Device camera poses (GoPro)
- Point Cloud (3d point cloud + observations)
- EyeGaze (where the user is looking at)

We are going to learn how to use:

- First -> the static assets
- Then -> the dynamic assets

# Static Assets

Take Home Message:
- Your learned about load and display static assets, such as:
  - How to configure and use `MpsDataProvider`
  - Point cloud -> `MpsDataProvider.get_semidense_point_cloud`
  - Ego device trajectory -> `mps.read_closed_loop_trajectory`
  - Exo devices static GoPros camera -> `mps.read_static_camera_calibrations`


- Learn that point cloud data can be noisy and that you can filter out the noisy measurements using user defined thresholds `mps.filter_points_from_confidence`

In [None]:
# static assets

import math
from projectaria_tools.core import mps # IO for Aria MPS assets
from projectaria_tools.core.calibration import CameraCalibration, KANNALA_BRANDT_K3 # Aria/GoPro Camera Calibration

from projectaria_tools.core.mps.utils import ( # Aria MPS utilities
    filter_points_from_confidence,
    filter_points_from_count,
)

# Init rerun api
rr.init("MPS - Static Assets")
rec = rr.memory_recording()
rr.log("world", rr.ViewCoordinates.RIGHT_HAND_Z_UP, timeless=True)

## Configure the MpsDataProvider (interface used to retrieve Trajectory data)
mps_data_paths_provider = mps.MpsDataPathsProvider(ego_exo_project_path)
mps_data_paths = mps_data_paths_provider.get_data_paths()
mps_data_provider = mps.MpsDataProvider(mps_data_paths)

#
## Loading Ego Dynamic device path 
#
print(f"Loading Device Trajectory: {mps_data_paths.slam.closed_loop_trajectory}")
trajectory_data = mps.read_closed_loop_trajectory(mps_data_paths.slam.closed_loop_trajectory)
device_trajectory = [it.transform_world_device.translation()[0] for it in trajectory_data][0::80]

rr.log("world/device_trajectory", rr.LineStrips3D(device_trajectory, radii=0.006, colors=[120, 250, 120]), timeless=True)

#
## Loading and filtering the point cloud
#
print("Loading and filtering point cloud ... be patient ...")
point_cloud = mps_data_provider.get_semidense_point_cloud()
# Filter the point cloud by inv depth and depth and load
threshold_invdep = 5e-4
threshold_dep = 5e-4
filtered_point_cloud = filter_points_from_confidence(point_cloud, threshold_invdep, threshold_dep)
# Downsampling the data for web viewing
downsampled_points_cloud = filter_points_from_count(filtered_point_cloud, 500_000)
# Retrieve point positions
points_position = np.stack([it.position_world for it in downsampled_points_cloud])

log_point_cloud(points_position, "world/point_cloud")

#
## Loading Exo Static camera calibration data
#
static_calibrations = mps.read_static_camera_calibrations(os.path.join(ego_exo_project_path,"trajectory","gopro_calibs.csv"))

for static_calibration in static_calibrations:
    T_world_device = static_calibration.transform_world_cam
    camera_intrinsics = CameraCalibration(static_calibration.camera_uid,
                            KANNALA_BRANDT_K3,
                            static_calibration.intrinsics,
                            static_calibration.transform_world_cam,
                            static_calibration.width,
                            static_calibration.height,
                            None,
                            math.pi,
                            "")

    log_calibration(camera_intrinsics, f"world/{static_calibration.camera_uid}")
    log_pose(T_world_device, f"world/{static_calibration.camera_uid}")

# Showing the rerun window
rec

# Dynamic Assets

Take Home Message:
- Your learned about load and display dynamic assets, such as:
  - Ego device pose at a given timestamp T -> `mps_data_provider.get_closed_loop_pose(timestamp)`
  - Ego Eye Gaze ray at a given timestamp T -> `mps_data_provider.get_personalized_eyegaze(timestamp)`

- You know how to use relative pose, to display where is the RGB and SLAM cameras on top of the Device pose at time T

In [None]:
# Tutorial on how to use Trajectory data

from projectaria_tools.core import mps
from projectaria_tools.core.mps.utils import get_gaze_vector_reprojection

###
# Trajectory is in Device coordinates
# 1. To move to a camera, you have to apply the relative transform from Device_to_Camera_of_your_choice
###

###
# Eye Gaze data is independent of the Device pose
# 1. To display the eye gaze ray, you are applying the right relative transform Device_to_CPF
###

from projectaria_tools.core import mps

# Init rerun api
rr.init("MPS - Trajectory data")
rec = rr.memory_recording()
rr.log("world", rr.ViewCoordinates.RIGHT_HAND_Z_UP, timeless=True)

## Configure the MpsDataProvider (interface used to retrieve Trajectory data)
mps_data_paths_provider = mps.MpsDataPathsProvider(ego_exo_project_path)
mps_data_paths = mps_data_paths_provider.get_data_paths()
mps_data_provider = mps.MpsDataProvider(mps_data_paths)

assert mps_data_provider.has_personalized_eyegaze(), "The sequence does not have Eye Gaze data"
assert mps_data_provider.has_closed_loop_poses(), "The sequence does not have Trajectory data"

# Collect all the trajectory points
print(mps_data_paths.slam.closed_loop_trajectory)
trajectory_data = mps.read_closed_loop_trajectory(mps_data_paths.slam.closed_loop_trajectory)
device_trajectory = [it.transform_world_device.translation()[0] for it in trajectory_data][0::80]

rr.log("world/device_trajectory", rr.LineStrips3D(device_trajectory, radii=0.001), timeless=True)

# Log Glasses & calibration linked to the image we want to show
log_aria_glasses(device_calibration, "world/device/glasses_outline")
rgb_camera_calibration = device_calibration.get_camera_calib(rgb_stream_label)
slam_left_camera_calibration = device_calibration.get_camera_calib(slam_left_stream_label)
slam_right_camera_calibration = device_calibration.get_camera_calib(slam_right_stream_label)
log_calibration(rgb_camera_calibration, f"world/device/{rgb_stream_label}")
log_calibration(slam_left_camera_calibration, f"world/device/{slam_left_stream_label}")
log_calibration(slam_right_camera_calibration, f"world/device/{slam_right_stream_label}")

sample_count = 40
sample_timestamps = np.linspace(start_time, end_time, sample_count)
for sample in tqdm(sample_timestamps):
    image_tuple = vrs_data_provider.get_image_data_by_time_ns(rgb_stream_id, int(sample), time_domain, option)
    timestamp = image_tuple[1].capture_timestamp_ns
    rr.set_time_nanos("device_time", timestamp)
    rr.set_time_sequence("timestamp", timestamp)

    ##
    # Retrieve the camera pose at a given timestamp
    # 1. Log the Device pose
    # 2. Use the extrinsics camera_calibration to apply the relative pose to go from Device to Camera_X
    ##
    pose_info = mps_data_provider.get_closed_loop_pose(timestamp)
    if pose_info:
        T_world_device = pose_info.transform_world_device
        T_device_camera = rgb_camera_calibration.get_transform_device_camera()

        # Log image
        log_image(image_tuple[0].to_numpy_array(), f"world/device/{rgb_stream_label}")
    
        # 1. Log the Device pose
        log_pose(
            T_world_device,
            "world/device",
        )

        # 2. Use the extrinsics camera_calibration to apply the relative pose to go from Device to Camera_X
        # Show the RGB camera
        # Note: Rerun will apply T_world_device @ T_device_camera and display T_world_camera
        log_pose(
            T_device_camera,
            f"world/device/{rgb_stream_label}",
        )

        # Show the SLAM cameras
        log_pose(
            device_calibration.get_camera_calib(slam_left_stream_label).get_transform_device_camera(),
            f"world/device/{slam_left_stream_label}",
        )
        log_pose(
            device_calibration.get_camera_calib(slam_right_stream_label).get_transform_device_camera(),
            f"world/device/{slam_left_stream_label}",
        )

    ##
    # Eye Gaze data
    # 1. Retrieve the eye_gaze data vector for a given timestamp
    # 2. Compute the corresponding 3D vector and retrieve its depth
    # 3. Reproject the eyegaze vector at Depth X on a given image (using Calibration data)
    ##

    # 1. Retrieve the eye_gaze data vector for a given timestamp
    eye_gaze = mps_data_provider.get_personalized_eyegaze(timestamp)

    # 2. Compute the corresponding 3D vector and retrieve its depth
    # Here is how to retrieve the depth of the EyeGaze vector
    # depth_m = eye_gaze.depth or 1.0
    # But here for display we are using a proxy of 30cm, so you can better see things in context of each other
    depth_m = 0.1
    gaze_vector_in_cpf = mps.get_eyegaze_point_at_depth(eye_gaze.yaw, eye_gaze.pitch, depth_m)
    gaze_vector_in_cpf = np.nan_to_num(gaze_vector_in_cpf)
    # Move EyeGaze vector to CPF coordinate system for visualization and log a 3D ray
    rr.log(
        "world/device/eye-gaze",
        rr.Arrows3D(
            origins=[T_device_CPF @ [0, 0, 0]],
            vectors=[T_device_CPF @ gaze_vector_in_cpf],
            colors=[[255, 0, 255]],
        ),
    )

    # 3. Reproject the eyegaze vector at Depth X on a given image (using Calibration data)
    # Compute eye_gaze vector at depth_m reprojection in the image
    depth_m = eye_gaze.depth or 1.0
    gaze_projection = get_gaze_vector_reprojection(
        eye_gaze,
        rgb_stream_label,
        device_calibration,
        rgb_camera_calibration,
        depth_m,
    )
    if gaze_projection is not None:
        rr.log(
            f"world/device/{rgb_stream_label}/eye-gaze_projection",
            rr.Points2D(gaze_projection, radii=30, colors=[0,255,0]),
        )

# Showing the rerun window
rec

# Reprojecting Ego data in Exo cameras



In [None]:

# Create black images representing GoPro images

import math
import torchvision # to read video
from projectaria_tools.core.calibration import CameraCalibration, KANNALA_BRANDT_K3 # Aria/GoPro Camera Calibration
from projectaria_tools.core import mps

## Configure the MpsDataProvider (interface used to retrieve Trajectory data)
mps_data_paths_provider = mps.MpsDataPathsProvider(ego_exo_project_path)
mps_data_paths = mps_data_paths_provider.get_data_paths()
mps_data_provider = mps.MpsDataProvider(mps_data_paths)

# Init rerun api
rr.init("Ego_Exo - image reprojection")
rec = rr.memory_recording()

#
## Loading Exo Static camera calibration data
#
go_pro_proxy = []
static_calibrations = mps.read_static_camera_calibrations(os.path.join(ego_exo_project_path,"trajectory","gopro_calibs.csv"))
for static_calibration in static_calibrations:
    # assert the GoPro was correctly localized
    if static_calibration.quality != 1.0:
        print(f"Camera: {static_calibration.camera_uid} was not localized, ignoring this camera.")
        continue
    proxy = {}
    proxy["name"] = static_calibration.camera_uid
    proxy["image"] = zeros = np.zeros((static_calibration.height, static_calibration.width))
    proxy["pose"] = static_calibration.transform_world_cam
    proxy["camera"] = CameraCalibration(
                            static_calibration.camera_uid,
                            KANNALA_BRANDT_K3,
                            static_calibration.intrinsics,
                            static_calibration.transform_world_cam,
                            static_calibration.width,
                            static_calibration.height,
                            None,
                            math.pi,
                            "")

    # Replace proxy image with an image from the gopro video
    video_path = os.path.join( ego_exo_project_path ,"frame_aligned_videos", static_calibration.camera_uid + ".mp4")
    reader = torchvision.io.VideoReader(video_path, "video")
    # Grab a frame at the middle of the video
    reader_metadata = reader.get_metadata()
    reader.seek(reader_metadata['video']['duration'][0] / 2)
    frame = next(reader)
    proxy["image"] = frame['data'][0].numpy()
    
    # Log the image
    log_image(proxy["image"], f"image/{proxy['name']}", timeless= True)

    go_pro_proxy.append(proxy)


per_go_pro_reprojection = {}
# Sample the camera trajectory and reproject it on the GoPro images

sample_count = 120
sample_timestamps = np.linspace(start_time, end_time, sample_count)
for sample in tqdm(sample_timestamps):
    image_tuple = vrs_data_provider.get_image_data_by_time_ns(rgb_stream_id, int(sample), time_domain, option)
    timestamp = image_tuple[1].capture_timestamp_ns
    rr.set_time_nanos("device_time", timestamp)
    rr.set_time_sequence("timestamp", timestamp)

    ##
    # Retrieve the camera pose at a given timestamp
    # 1. Log the Device pose
    # 2. Use the extrinsics camera_calibration to apply the relative pose to go from Device to Camera_X
    ##
    pose_info = mps_data_provider.get_closed_loop_pose(timestamp)
    if pose_info:
        T_world_device = pose_info.transform_world_device
        for go_pro in go_pro_proxy:
            point_in_go_pro_world = go_pro["pose"].inverse() @ T_world_device.translation()[0]
            device_projection = go_pro["camera"].project(point_in_go_pro_world)
            if device_projection is not None:
                if go_pro['name'] not in per_go_pro_reprojection.keys():
                    per_go_pro_reprojection[go_pro['name']] = []
                per_go_pro_reprojection[go_pro['name']].append(device_projection)


# Plot projected device positions as line strip
for go_pro in go_pro_proxy:
    points = per_go_pro_reprojection[go_pro['name']]
    rr.log(
        f"image/{go_pro['name']}/ego_device_translation_projection",
        rr.LineStrips2D(
            points,
            radii=[20],
        ),
    )

# Showing the rerun window
rec

# Image undistortion

Take Home Message:
- You learned about how to:
  - create a pinhole camera model to get an undistorted version of a given VRS image 

In [None]:

from typing import List

from projectaria_tools.core import calibration
from projectaria_tools.core.calibration import (
    CameraCalibration,
    distort_by_calibration,
)

from PIL import Image 
from IPython.display import display # to display images

def get_image_stream_ids() -> List[StreamId]:
    """
    Return the list of image stream ids
    """
    stream_ids = vrs_data_provider.get_all_streams()
    image_stream_ids = [
        p
        for p in stream_ids
        if vrs_data_provider.get_label_from_stream_id(p).startswith(
            "camera-"
        ) and "et" not in vrs_data_provider.get_label_from_stream_id(p)
    ]
    return image_stream_ids
    
def get_camera_calibration(
        stream_id: StreamId
    ) -> CameraCalibration:
    device_calibration = vrs_data_provider.get_device_calibration()
    
    stream_label = vrs_data_provider.get_label_from_stream_id(stream_id)
    camera_calibration = device_calibration.get_camera_calib(stream_label)
    return camera_calibration


# Init rerun api
rr.init("Aria - Image undistortion")
rec = rr.memory_recording()

def pil_grid(images, max_horiz=np.iinfo(int).max):
    n_images = len(images)
    n_horiz = min(n_images, max_horiz)
    h_sizes, v_sizes = [0] * n_horiz, [0] * (n_images // n_horiz)
    for i, im in enumerate(images):
        h, v = i % n_horiz, i // n_horiz
        h_sizes[h] = max(h_sizes[h], im.size[0])
        v_sizes[v] = max(v_sizes[v], im.size[1])
    h_sizes, v_sizes = np.cumsum([0] + h_sizes), np.cumsum([0] + v_sizes)
    im_grid = Image.new('RGB', (h_sizes[-1], v_sizes[-1]), color='white')
    for i, im in enumerate(images):
        im_grid.paste(im, (h_sizes[i % n_horiz], v_sizes[i // n_horiz]))
    return im_grid

images = []
sample_count = 2
sample_timestamps = np.linspace(start_time, end_time, sample_count)
for time_sample in tqdm(sample_timestamps):

    for stream_id in get_image_stream_ids():

        # Retrieving the image
        image_tuple = vrs_data_provider.get_image_data_by_time_ns(stream_id, int(time_sample), time_domain, option)

        #
        # Camera undistortion
        #  - We are creating a pinhole camera (target calibration)
        #  - Then the undistortion will resample the camera ray from the original camera calibration and create the expected image
        #
        
        # Retrieve the camera calibration attached to the Image (stream_id)
        camera_calibration = get_camera_calibration(stream_id)

        # Building the target calibration (Pinhole camera) to get the undistorted image
        focal_lengths = camera_calibration.get_focal_lengths()
        image_size = camera_calibration.get_image_size()
        pinhole_calib = calibration.get_linear_camera_calibration(
            image_size[0], image_size[1], focal_lengths[0]
        )

        # Compute the actual undistorted image (pixel sampling by using ray projection/reprojection)
        undistorted_image = distort_by_calibration(
            image_tuple[0].to_numpy_array(), pinhole_calib, camera_calibration
        )

        # Concat images in an array (show side by side Original - Undistorted images)
        original_image_display = Image.fromarray(image_tuple[0].to_numpy_array())
        undistorted_image_display = Image.fromarray(undistorted_image)
        # resize for easy previewing
        resampling_factor = 6 if stream_id == StreamId("214-1") else 2
        original_image_display = original_image_display.resize((image_size / resampling_factor).astype(int))
        undistorted_image_display = undistorted_image_display.resize((image_size / resampling_factor).astype(int))
        images.append(original_image_display)
        images.append(undistorted_image_display)
    
concat = pil_grid(images, 6) # RGB, SLAM Left, SLAM Right
display(concat)

## Bonus - Using Eye Gaze data to project on various image stream

Take Home Message:
- Your learned about:
  - Using the `MpsDataProvider` to retrieve if an EyeGaze file is available and to retrieve EyeGaze data at a given timestamp
  - That EyeGaze data is represented as a 3D ray with depth (showing the point of user focus)
  - That EyeGaze ray is starting from CPF (Central Pupil Frame)
  - How to use this EyeGaze data to reproject in any Aria Image Stream (RGB, SLAMs)
 
Note that you don't need to use any 3D device pose, since EyeGaze is independent of the device pose.

In [None]:
# Tutorial on how to use EyeGaze data

from projectaria_tools.core import mps
from projectaria_tools.core.mps.utils import get_gaze_vector_reprojection

# Init rerun api
rr.init("Eye Gaze - CPF - Image Reprojection")
rec = rr.memory_recording()
# Aria coordinate system sets X down, Z in front, Y Left
rr.log("device", rr.ViewCoordinates.RIGHT_HAND_X_DOWN, timeless=True)

## Configure the MpsDataProvider (interface used to retrieve EyeGaze data)
mps_data_paths_provider = mps.MpsDataPathsProvider(ego_exo_project_path)
mps_data_paths = mps_data_paths_provider.get_data_paths()
mps_data_provider = mps.MpsDataProvider(mps_data_paths)

assert mps_data_provider.has_personalized_eyegaze(), "The sequence does not have Eye Gaze data"

# Plot the Aria Glasses in 3D to give context, on where does Eye Gaze Tracking origin is.

## Plot CPF (Central Pupil Frame coordinate system)
T_device_CPF = device_calibration.get_transform_device_cpf()
log_pose(T_device_CPF, "device/CPF_CentralPupilFrame", timeless=True)
## Plot Project Aria Glasses outline (as lines)
log_aria_glasses(device_calibration, "device/glasses_outline")

# Retrieve the RGB camera calibration (required for projecting the Eye Gaze 3D point to the image plane)
rgb_camera_calibration = device_calibration.get_camera_calib(rgb_stream_label)
slam_left_camera_calibration = device_calibration.get_camera_calib(slam_left_stream_label)
slam_right_camera_calibration = device_calibration.get_camera_calib(slam_right_stream_label)

sample_count = 20
sample_timestamps = np.linspace(start_time, end_time, sample_count)
for sample in tqdm(sample_timestamps):

    # Retrieving the RGB image
    image_tuple_rgb = vrs_data_provider.get_image_data_by_time_ns(rgb_stream_id, int(sample), time_domain, option)
    timestamp = image_tuple_rgb[1].capture_timestamp_ns
    
    # Log timestamp as:
    # - device_time (so you can see the effective time between two frames)
    # - timestamp (so you can see the real VRS timestamp as INT value in the Rerun Timeline dropdown)
    rr.set_time_nanos("device_time", timestamp)
    rr.set_time_sequence("timestamp", timestamp)

    log_image(image_tuple_rgb[0].to_numpy_array(), f"device/{rgb_stream_label}")
    
    # Retrieving the SLAM images
    image_tuple_slam_left = vrs_data_provider.get_image_data_by_time_ns(slam_left_stream_id, int(sample), time_domain, option)
    log_image(image_tuple_slam_left[0].to_numpy_array(), f"device/{slam_left_stream_label}")

    image_tuple_slam_right = vrs_data_provider.get_image_data_by_time_ns(slam_right_stream_id, int(sample), time_domain, option)
    log_image(image_tuple_slam_right[0].to_numpy_array(), f"device/{slam_right_stream_label}")

    ##
    # Eye Gaze data
    # 1. Retrieve the eye_gaze data vector for a given timestamp
    # 2. Compute the corresponding 3D vector and retrieve its depth
    # 3. Reproject the eyegaze vector at Depth X on a given image (using Calibration data)
    ##

    # 1. Retrieve the eye_gaze data vector for a given timestamp
    eye_gaze = mps_data_provider.get_personalized_eyegaze(timestamp)

    # 2. Compute the corresponding 3D vector and retrieve its depth
    # Here is how to retrieve the depth of the EyeGaze vector
    # depth_m = eye_gaze.depth or 1.0
    # But here for display we are using a proxy of 30cm, so you can better see things in context of each other
    depth_m = 0.1
    gaze_vector_in_cpf = mps.get_eyegaze_point_at_depth(
        eye_gaze.yaw, eye_gaze.pitch, depth_m
    )
    gaze_vector_in_cpf = np.nan_to_num(gaze_vector_in_cpf)
    # Move EyeGaze vector to CPF coordinate system for visualization
    rr.log(
        "device/eye-gaze",
        rr.Arrows3D(
            origins=[T_device_CPF @ [0, 0, 0]],
            vectors=[T_device_CPF @ gaze_vector_in_cpf],
            colors=[[255, 0, 255]],
        ),
    )

    # 3. Reproject the eyegaze vector at Depth X on a given image (using Calibration data)
    # Compute eye_gaze vector at depth_m reprojection in the image
    depth_m = eye_gaze.depth or 1.0

    for stream_label in [rgb_stream_label, slam_left_stream_label, slam_right_stream_label]:
        if stream_label is rgb_stream_label:
            camera_calibration = rgb_camera_calibration
        elif stream_label is slam_left_stream_label:
            camera_calibration = slam_left_camera_calibration
        elif stream_label is slam_right_stream_label:
            camera_calibration = slam_right_camera_calibration
        else:
            camera_calibration = None

        gaze_projection = get_gaze_vector_reprojection(
            eye_gaze,
            stream_label,
            device_calibration,
            camera_calibration,
            depth_m,
        )
        if gaze_projection is not None:
            rr.log(
                f"device/{stream_label}/eye-gaze_projection",
                rr.Points2D(gaze_projection, radii=20),
            )


# Showing the rerun window
rec