# ATEK Demo 1: ATEK data preprocessing + model inference

This demo will walk through the steps of preparing an Aria data sequence with annotations ([AriaDigitalTwin (ADT)](https://www.projectaria.com/datasets/adt/)), for use in a 3D object detection model CubeRCNN, run model inference on the preprocessed data, and evaluate the model performance. 

In [1]:
import faulthandler

import logging
import os
from logging import StreamHandler
import numpy as np
from typing import Dict, List, Optional
import torch
import sys
import subprocess
from tqdm import tqdm

from atek.data_preprocess.genera_atek_preprocessor_factory import (
    create_general_atek_preprocessor_from_conf,
)
from atek.viz.atek_visualizer import NativeAtekSampleVisualizer
from atek.data_preprocess.general_atek_preprocessor import GeneralAtekPreprocessor
from atek.data_loaders.atek_wds_dataloader import (
    create_native_atek_dataloader
)
from atek.data_loaders.cubercnn_model_adaptor import (
    cubercnn_collation_fn,
    create_atek_dataloader_as_cubercnn
)
from atek.data_preprocess.atek_data_sample import (
    create_atek_data_sample_from_flatten_dict,
)
from cubercnn.config import get_cfg_defaults
from cubercnn.modeling.backbone import build_dla_from_vision_fpn_backbone  # noqa
from cubercnn.modeling.meta_arch import build_model  # noqa
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import get_cfg
from omegaconf import OmegaConf

faulthandler.enable()

# Configure logging to display the log messages in the notebook
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(sys.stdout)
    ]
)

logger = logging.getLogger()

# Prettier colors
COLOR_GREEN = [42,157,143]
COLOR_RED = [231, 111, 81]

# -------------------- Helper functions --------------------#
def print_data_sample_dict_content(data_sample, if_pretty: bool = False):
    """
    A helper function to print the content of data sample dict
    """
    logger.info("Printing the content in a ATEK data sample dict: ")
    for key, val in data_sample.items():
        if if_pretty and "#" in key:
            key = key.split("#", 1)[1]
        
        msg = f"\t {key}: is a {type(val)}, "
        if isinstance(val, torch.Tensor):
            msg += f"with shape of : {val.shape}"
        elif isinstance(val, list):
            msg += f"with len of : {len(val)}"
        elif isinstance(val, str):
            msg += f"value is {val}"
        else:
            pass
        logger.info(msg)

def create_inference_model(config_file, ckpt_dir, use_cpu_only=False):
    """
    Create the model for inference pipeline, with the model config.
    """
    # Create default model configuration
    model_config = get_cfg()
    get_cfg_defaults(model_config)

    # add extra configs for data
    model_config.MAX_TRAINING_ATTEMPTS = 3
    model_config.TRAIN_LIST = ""
    model_config.TEST_LIST = ""
    model_config.TRAIN_WDS_DIR = ""
    model_config.TEST_WDS_DIR = ""
    model_config.ID_MAP_JSON = ""
    model_config.OBJ_PROP_JSON = ""
    model_config.CATEGORY_JSON = ""
    model_config.DATASETS.OBJECT_DETECTION_MODE = ""
    model_config.SOLVER.VAL_MAX_ITER = 0
    model_config.SOLVER.MAX_EPOCH = 0

    model_config.merge_from_file(config_file)
    if use_cpu_only:
        model_config.MODEL.DEVICE = "cpu"
    model_config.freeze()

    model = build_model(model_config, priors=None)

    _ = DetectionCheckpointer(model, save_dir=ckpt_dir).resume_or_load(
        model_config.MODEL.WEIGHTS, resume=True
    )
    model.eval()

    return model_config, model

def run_command_and_display_output(command):
    # Start the process
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
    
    # Poll process.stdout to show stdout live
    while True:
        output = process.stdout.readline()
        if output == '' and process.poll() is not None:
            break
        if output:
            print(output.strip())
    rc = process.poll()
    return rc


## Set up data and code paths

In [2]:
# Follow the following guide to download example ADT sequence to a local path `~/Documents/projectaria_tools_adt_data`
# https://facebookresearch.github.io/projectaria_tools/docs/open_datasets/aria_digital_twin_dataset/dataset_download.

# Set up local data paths
data_dir = os.path.join(os.path.expanduser("~"), "Documents", "projectaria_tools_adt_data")
sequence_name = "Apartment_release_golden_skeleton_seq100_10s_sample_M1292"
example_adt_data_dir = os.path.join(data_dir, sequence_name)
output_wds_path = os.path.join(data_dir, "wds_output")

# Set up ATEK paths
atek_src_path = os.path.join(os.path.expanduser("~"), "atek_on_fbsource")
atek_preprocess_config_path = "/home/louy/Calibration_data_link/Atek/2024_08_05_DryRun/adt_cubercnn_preprocess_config.yaml"
category_mapping_file = os.path.join(atek_src_path, "data", "adt_prototype_to_atek.csv")
preprocess_conf = OmegaConf.load(atek_preprocess_config_path)

# Set up trained model weight path
model_ckpt_path = "/home/louy/Calibration_data_link/Atek/pre_trained_models/2024_08_28_AdtCubercnnWeights"

# Step 1: ATEK data preprocessing
In this example, we demonstrate how to preprocess Aria data sequences for ML training. 


### Set up and run ATEK data preprocessor
Common data required for 3D object detection models: 
* camera image.
* bounding box annotations (3D, maybe + 2D)
* camera models + pose info.

Typical preprocessing needed: 
* Data synchronization. 
* Image rotation.
* Undistort to linear camera.

In [3]:
# Create ATEK preprocessor from conf. It will automatically choose which type of sample to build.
atek_preprocessor = create_general_atek_preprocessor_from_conf(
    # [required]
    conf=preprocess_conf,  
    raw_data_folder = example_adt_data_dir,   
    sequence_name = sequence_name, 
    # [optional]
    output_wds_folder=output_wds_path, 
    output_viz_file=os.path.join(example_adt_data_dir, "atek_preprocess_viz.rrd"),
    category_mapping_file=category_mapping_file,
)

# Loop over all samples, and write valid ones to local tar files.
atek_preprocessor.process_all_samples(write_to_wds_flag=True, viz_flag=True)

2024-09-08 14:28:46,958 - INFO - Located ATEK data paths: {'video_vrs_file': '/home/louy/Documents/projectaria_tools_adt_data/Apartment_release_golden_skeleton_seq100_10s_sample_M1292/video.vrs', 'mps_closedloop_traj_file': '/home/louy/Documents/projectaria_tools_adt_data/Apartment_release_golden_skeleton_seq100_10s_sample_M1292/aria_trajectory.csv', 'mps_semidense_points_file': '/home/louy/Documents/projectaria_tools_adt_data/Apartment_release_golden_skeleton_seq100_10s_sample_M1292/mps/slam/semidense_points.csv.gz', 'mps_semidense_observations_file': '/home/louy/Documents/projectaria_tools_adt_data/Apartment_release_golden_skeleton_seq100_10s_sample_M1292/mps/slam/semidense_observations.csv.gz', 'mps_online_calib_file': '/home/louy/Documents/projectaria_tools_adt_data/Apartment_release_golden_skeleton_seq100_10s_sample_M1292/mps/slam/online_calibration.jsonl', 'depth_vrs_file': '/home/louy/Documents/projectaria_tools_adt_data/Apartment_release_golden_skeleton_seq100_10s_sample_M1292/

[38;2;000;000;255m[ProgressLogger][INFO]: 2024-09-08 14:28:46: Opening /home/louy/Documents/projectaria_tools_adt_data/Apartment_release_golden_skeleton_seq100_10s_sample_M1292/video.vrs...[0m
[0m[38;2;000;128;000m[MultiRecordFileReader][DEBUG]: Opened file '/home/louy/Documents/projectaria_tools_adt_data/Apartment_release_golden_skeleton_seq100_10s_sample_M1292/video.vrs' and assigned to reader #0[0m
[0m[38;2;000;000;255m[VrsDataProvider][INFO]: streamId 211-1/camera-et activated[0m
[0m[38;2;000;000;255m[VrsDataProvider][INFO]: streamId 214-1/camera-rgb activated[0m
[0m[38;2;000;000;255m[VrsDataProvider][INFO]: streamId 247-1/baro0 activated[0m
[0m[38;2;000;000;255m[VrsDataProvider][INFO]: Timecode stream found: 285-2[0m
[0m[38;2;000;000;255m[VrsDataProvider][INFO]: streamId 1201-1/camera-slam-left activated[0m
[0m[38;2;000;000;255m[VrsDataProvider][INFO]: streamId 1201-2/camera-slam-right activated[0m
[0m[38;2;000;000;255m[VrsDataProvider][INFO]: streamId 120

Loaded #closed loop trajectory poses records: 300
# writing /home/louy/Documents/projectaria_tools_adt_data/wds_output/shards-0000.tar 0 0.0 GB 0
# writing /home/louy/Documents/projectaria_tools_adt_data/wds_output/shards-0001.tar 32 0.0 GB 32
2024-09-08 14:29:04,664 - INFO - Saving visualization to /home/louy/Documents/projectaria_tools_adt_data/Apartment_release_golden_skeleton_seq100_10s_sample_M1292/atek_preprocess_viz.rrd
2024-09-08 14:29:04,666 - INFO - ATEK has processed 50 valid samples in total.


50

## Inspecting content in the preprocessed file

In [4]:
listing_command = ["ls", f"{output_wds_path}"]
return_code = run_command_and_display_output(listing_command)

shards-0000.tar
shards-0001.tar


In [5]:
inspect_command = ["tar", "tvf", f"{output_wds_path}/shards-0000.tar"]
return_code = run_command_and_display_output(inspect_command)

-r--r--r-- bigdata/bigdata 2588 2024-09-08 14:28 _AtekDataSample_000000.gt_data#obb2_gt+camera-rgb+box_ranges.pth
-r--r--r-- bigdata/bigdata 1884 2024-09-08 14:28 _AtekDataSample_000000.gt_data#obb2_gt+camera-rgb+category_ids.pth
-r--r--r-- bigdata/bigdata 1884 2024-09-08 14:28 _AtekDataSample_000000.gt_data#obb2_gt+camera-rgb+instance_ids.pth
-r--r--r-- bigdata/bigdata 1500 2024-09-08 14:28 _AtekDataSample_000000.gt_data#obb2_gt+camera-rgb+visibility_ratios.pth
-r--r--r-- bigdata/bigdata 2268 2024-09-08 14:28 _AtekDataSample_000000.gt_data#obb2_gt+camera-slam-left+box_ranges.pth
-r--r--r-- bigdata/bigdata 1692 2024-09-08 14:28 _AtekDataSample_000000.gt_data#obb2_gt+camera-slam-left+category_ids.pth
-r--r--r-- bigdata/bigdata 1692 2024-09-08 14:28 _AtekDataSample_000000.gt_data#obb2_gt+camera-slam-left+instance_ids.pth
-r--r--r-- bigdata/bigdata 1436 2024-09-08 14:28 _AtekDataSample_000000.gt_data#obb2_gt+camera-slam-left+visibility_ratios.pth
-r--r--r-- bigdata/bigdata 2460 2024-09-08

# Step 2: Run Object detection inference using pre-trained CubeRCNN model
In this example, we demonstrate how to run model inference with preprocessed ATEK data. 

### Create a PyTorch DataLoader from ATEK WDS files

In [6]:
# create ATEK dataloader with native ATEK format.
tar_file_urls = [os.path.join(output_wds_path, f"shards-000{i}.tar") for i in range(2)]

atek_dataloader = create_native_atek_dataloader(urls = tar_file_urls, batch_size = None, num_workers = 1)
first_atek_sample = next(iter(atek_dataloader)) 
logger.info(f"Loading WDS into ATEK natvie format, each sample contains the following keys: {first_atek_sample.keys()}")



2024-09-08 14:29:05,267 - INFO - Loading WDS into ATEK natvie format, each sample contains the following keys: dict_keys(['__key__', '__url__', '__local_path__', 'gt_data', 'mfcd#camera-rgb+camera_label', 'mfcd#camera-rgb+camera_model_name', 'mfcd#camera-rgb+capture_timestamps_ns', 'mfcd#camera-rgb+exposure_durations_s', 'mfcd#camera-rgb+frame_ids', 'mfcd#camera-rgb+gains', 'mfcd#camera-rgb+origin_camera_label', 'mfcd#camera-rgb+projection_params', 'mfcd#camera-rgb+t_device_camera', 'mfcd#camera-slam-left+camera_label', 'mfcd#camera-slam-left+camera_model_name', 'mfcd#camera-slam-left+capture_timestamps_ns', 'mfcd#camera-slam-left+exposure_durations_s', 'mfcd#camera-slam-left+frame_ids', 'mfcd#camera-slam-left+gains', 'mfcd#camera-slam-left+origin_camera_label', 'mfcd#camera-slam-left+projection_params', 'mfcd#camera-slam-left+t_device_camera', 'mfcd#camera-slam-right+camera_label', 'mfcd#camera-slam-right+camera_model_name', 'mfcd#camera-slam-right+capture_timestamps_ns', 'mfcd#camera

### Create PyTorch DataLoader, converted to CubeRCNN format
The `create_atek_dataloader_as_cubercnn` API is a thin wrapper on top of a `CubeRCNN` data converter class. 

In [7]:
cubercnn_dataloader = create_atek_dataloader_as_cubercnn(urls = tar_file_urls, batch_size = 6, num_workers = 1)
first_cubercnn_sample = next(iter(cubercnn_dataloader)) 
logger.info(f"Loading WDS into CubeRCNN format, each sample contains the following keys: {first_cubercnn_sample[0].keys()}")

2024-09-08 14:29:05,958 - INFO - Loading WDS into CubeRCNN format, each sample contains the following keys: dict_keys(['image', 'K', 'height', 'width', 'K_matrix', 'timestamp_ns', 'frame_id', 'sequence_name', 'T_world_camera', 'instances', 'Ts_world_object', 'object_dimensions', 'category'])


## Run model inference over the dataset

In [8]:
from tqdm import tqdm

# load pre-trained CubeRCNN model
model_config_file = os.path.join(model_ckpt_path, "config.yaml")
conf = OmegaConf.load(model_config_file)

# setup config and model
model_config, model = create_inference_model(
    model_config_file, model_ckpt_path, False
)


# Cache inference results for visualization
input_output_data_pairs = []

# Loop over created Pytorch Dataloader
with torch.no_grad():
    for cubercnn_input_data in tqdm(
       cubercnn_dataloader,
        desc="Inference progress: ",
    ):
        cubercnn_model_output = model(cubercnn_input_data)

        # cache inference results for visualization
        input_output_data_pairs.append((cubercnn_input_data, cubercnn_model_output))

logger.info("Inference completed.")

2024-09-08 14:29:06,572 - INFO - [DetectionCheckpointer] Loading from /home/louy/Calibration_data_link/Atek/pre_trained_models/2024_08_28_AdtCubercnnWeights/model_final.pth ...
2024-09-08 14:29:06,573 - INFO - [Checkpointer] Loading from /home/louy/Calibration_data_link/Atek/pre_trained_models/2024_08_28_AdtCubercnnWeights/model_final.pth ...


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
  return F.conv2d(input, weight, bias, self.stride,
Inference progress: : 9it [00:05,  1.57it/s]

2024-09-08 14:29:12,601 - INFO - Inference completed.





### Visualize inference results

In [9]:
from atek.viz.cubercnn_visualizer import CubercnnVisualizer

# Visualize cached inference results
logger.info("Visualizing inference results.")
viz_conf = preprocess_conf.visualizer
cubercnn_visualizer = CubercnnVisualizer(viz_prefix = "inference_visualizer", conf = viz_conf)
for input_data_as_list, output_data_as_list in input_output_data_pairs:
    for single_cubercnn_input, single_cubercnn_output in zip(input_data_as_list, output_data_as_list):
        timestamp_ns = single_cubercnn_input["timestamp_ns"]
        # Plot RGB image
        cubercnn_visualizer.plot_cubercnn_img(single_cubercnn_input["image"], timestamp_ns = timestamp_ns)

        # Plot GT and prediction in different colors
        single_cubercnn_output["T_world_camera"] = single_cubercnn_input["T_world_camera"] # This patch is needed for visualization
        cubercnn_visualizer.plot_cubercnn_dict(cubercnn_dict = single_cubercnn_input, timestamp_ns = timestamp_ns, plot_color = cubercnn_visualizer.COLOR_GREEN, suffix = "_model_input")
        cubercnn_visualizer.plot_cubercnn_dict(cubercnn_dict = single_cubercnn_output, timestamp_ns = timestamp_ns, plot_color = cubercnn_visualizer.COLOR_RED, suffix = "_model_output")

2024-09-08 14:29:12,726 - INFO - Visualizing inference results.


[2024-09-08T21:29:12Z INFO  re_sdk::spawn] A process is already listening at this address. Assuming it's a Rerun Viewer. addr=0.0.0.0:9876
[2024-09-08T21:29:12Z INFO  re_sdk_comms::server] New SDK client connected from: 127.0.0.1:45548


# Step 3: Evaluate model performance

### Write inference results into ATEK-format csv files

In [10]:
from atek.evaluation.static_object_detection.obb3_csv_io import AtekObb3CsvWriter

gt_writer = AtekObb3CsvWriter(output_filename = os.path.join(data_dir, "gt_obbs.csv"))
prediction_writer = AtekObb3CsvWriter(output_filename = os.path.join(data_dir, "prediction_obbs.csv"))

for input_data_as_list, output_data_as_list in input_output_data_pairs:
    for single_cubercnn_input, single_cubercnn_output in zip(input_data_as_list, output_data_as_list):
        timestamp_ns = single_cubercnn_input["timestamp_ns"]
        single_cubercnn_output["T_world_camera"] = single_cubercnn_input["T_world_camera"]
        
        gt_writer.write_from_cubercnn_dict(cubercnn_dict = single_cubercnn_input, timestamp_ns = timestamp_ns)
        prediction_writer.write_from_cubercnn_dict(cubercnn_dict = single_cubercnn_output, timestamp_ns = timestamp_ns)

2024-09-08 14:29:14,604 - INFO - starting writing obb3 to /home/louy/Documents/projectaria_tools_adt_data/gt_obbs.csv
2024-09-08 14:29:14,606 - INFO - starting writing obb3 to /home/louy/Documents/projectaria_tools_adt_data/prediction_obbs.csv


### Call ATEK's benchmarking script to evaluate the results

In [11]:
benchmarking_command = [
    "python3", f"{atek_src_path}/tools/benchmarking_static_object_detection.py",
    "--pred-csv", f"{data_dir}/prediction_obbs.csv", 
    "--gt-csv", f"{data_dir}/gt_obbs.csv", 
    "--output-file", f"{data_dir}/atek_metrics.json"
]
return_code = run_command_and_display_output(benchmarking_command)

2024-09-08 14:29:17,220-INFO:Running file-level eval on /home/louy/Documents/projectaria_tools_adt_data/prediction_obbs.csv and /home/louy/Documents/projectaria_tools_adt_data/gt_obbs.csv
2024-09-08 14:29:17,220-INFO:starting loading evaluation obb3s from /home/louy/Documents/projectaria_tools_adt_data/prediction_obbs.csv
2024-09-08 14:29:17,840-INFO:starting loading evaluation obb3s from /home/louy/Documents/projectaria_tools_adt_data/gt_obbs.csv
2024-09-08 14:29:18,115-INFO:Computing 3D obb metric
2024-09-08 14:29:20,536-INFO:DONE Computing 3D obb metric in 2.421750783920288 seconds
2024-09-08 14:29:20,537-INFO:Object Detection Model Performance Summary
mAP (Average across IoU thresholds, defined by MeanAveragePrecision3D class, default is [0.05, 0.10, 0.15, ..., 0.5]): 0.5787
mAP (IoU=0.25): 0.6403
mAP (IoU=0.50): 0.3044
===mAP across IoU thresholds [0.05, 0.10, 0.15, ..., 0.5]) per Class===
Refrigerator: 0.9957
Table: 0.8986
Display: 0.8953
Shelves: 0.7894
Bed: 0.7576
Vase: 0.7140
