Data Labelling:
==============
1) run **Code A** to get the recording.vrs in the SequenceID folder to obtain the .mp4 file (INPUT: Sequence_id) "with and without the gaze information"
3) run the GUI.py to make the relevant annotations
4) run **Code B** to extract the frames for each video (INPUT: frame_interval)

In [3]:
from dataset_api import Hot3dDataProvider
from data_loaders.loader_object_library import load_object_library
from data_loaders.mano_layer import MANOHandModel
from projectaria_tools.core.sophus import SE3
from projectaria_tools.utils.rerun_helpers import ToTransform3D
from data_loaders.headsets import Headset
from projectaria_tools.core.calibration import FISHEYE624
from projectaria_tools.core.stream_id import StreamId

import os
import cv2
from PIL import Image
from tqdm import tqdm
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import subprocess

home = os.path.expanduser("~")
hot3d_dataset_path = home + "/hot3d/hot3d/dataset"
object_library_path = os.path.join(hot3d_dataset_path, "assets")
mano_hand_model_path = os.path.join("/home/ttyh/hot3d/hot3d/mano/mano_v1_2/models")

if not os.path.exists(object_library_path):
    print("Invalid input sequence or library path.")
    print("Please do update the path to VALID values for your system.")
    raise
object_library = load_object_library(object_library_folderpath=object_library_path)

mano_hand_model = None
if mano_hand_model_path is not None:
    mano_hand_model = MANOHandModel(mano_hand_model_path)

stream_id = StreamId("214-1")
camera_model = FISHEYE624



num_betas=10, shapedirs.shape=(778, 3, 10), self.SHAPE_SPACE_DIM=300
num_betas=10, shapedirs.shape=(778, 3, 10), self.SHAPE_SPACE_DIM=300


In [3]:
#Code A1 (Obtain '.mp4' for annotation through GUI including the gaze)

#DEFINE SEQUENCE ID HERE
sequence_id = "P0001_a68492d5"

home = os.path.expanduser("~")
hot3d_dataset_path = home + "/hot3d/hot3d/dataset"
sequence_path = os.path.join(hot3d_dataset_path, "Aria/", sequence_id)
hot3d_data_provider = Hot3dDataProvider(
    sequence_folder=sequence_path,
    object_library=object_library,
    mano_hand_model=mano_hand_model,
)
device_data_provider = hot3d_data_provider.device_data_provider
timestamps = device_data_provider.get_sequence_timestamps()

def mark_neighbours_as_red(rgb_array, coord, radius=5):
    height, width, _ = rgb_array.shape
    y, x = coord
    
    # Define the color red in RGB
    red = [255, 0, 0]
    
    # Loop through the neighboring pixels in the specified radius
    rgb_array[int(x)-radius:int(x)+radius, int(y)-radius:int(y)+radius] = red

    return rgb_array

gaze_shots = []
# Loop through the specified range of timestamps with a step of 5
for i in range(0, len(timestamps), 1):
    # Get the current timestamp
    timestamp_ns = timestamps[i]
    
    # Get image data for the current timestamp
    image_data = device_data_provider.get_image(timestamp_ns, stream_id)
    aria_eye_gaze_data = (
        device_data_provider.get_eye_gaze(timestamp_ns)
        if hot3d_data_provider.get_device_type() is Headset.Aria
        else None
    )
    
    # Reproject EyeGaze for raw images if eye gaze data is available
    if aria_eye_gaze_data is not None:
        eye_gaze_reprojection_data = (
            device_data_provider.get_eye_gaze_in_camera(
                stream_id, timestamp_ns, camera_model=camera_model
            )
        )

    modified_rgb_array = mark_neighbours_as_red(image_data, list(eye_gaze_reprojection_data), radius=10)

    #plt.imshow(modified_rgb_array)
    #plt.axis('off')
    #plt.show()

    gaze_shots.append(np.rot90(modified_rgb_array, 3))

height, width = gaze_shots[0].shape[:2]

folder_path = os.path.join("/home/uril/hot3d/hot3d/dataset/Aria/", sequence_id)
os.makedirs(folder_path, exist_ok=True)
print('folder_path:',folder_path)
video_name = f"{sequence_id}.mp4"
video_path = os.path.join(folder_path, video_name)
print(f"Video saved in {video_path}")

# Define the video writer# Define the video writer
fps = 30
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for .mp4 files
out = cv2.VideoWriter(video_path, fourcc, fps, (width, height))


# Add each frame to the video
for image in gaze_shots:
    if len(image.shape) == 2:  # Grayscale image
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
    elif image.shape[2] == 3:  # RGB image
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    out.write(image)

out.release()

#Changing the file formatting
cmd = ['ffmpeg', '-i', video_path, '-vcodec', 'libx264', os.path.join(folder_path, f"{sequence_id}_new.mp4")]
subprocess.run(cmd)

print("USE THIS AS YOUR INPUT VIDEO DIRECTORY INTO THE GUI:" , os.path.join(folder_path, f"{sequence_id}_new.mp4"))

create ariadataprovider
THIS IS THE VRS FILEPATH: /home/uril/hot3d/hot3d/dataset/Aria/P0001_a68492d5/recording.vrs
THIS IS THE MPS FILEPATH: /home/uril/hot3d/hot3d/dataset/Aria/P0001_a68492d5/mps
MPS DATA PATHS YAYYY: 
MPS Data Paths
MPS SLAM Data Paths
--closedLoopTrajectory: /home/uril/hot3d/hot3d/dataset/Aria/P0001_a68492d5/mps/slam/closed_loop_trajectory.csv
--openLoopTrajectory: /home/uril/hot3d/hot3d/dataset/Aria/P0001_a68492d5/mps/slam/open_loop_trajectory.csv
--semidensePoints: /home/uril/hot3d/hot3d/dataset/Aria/P0001_a68492d5/mps/slam/semidense_points.csv.gz
--semidenseObservations: /home/uril/hot3d/hot3d/dataset/Aria/P0001_a68492d5/mps/slam/semidense_observations.csv.gz
--onlineCalibration: /home/uril/hot3d/hot3d/dataset/Aria/P0001_a68492d5/mps/slam/online_calibration.jsonl
--summary: /home/uril/hot3d/hot3d/dataset/Aria/P0001_a68492d5/mps/slam/summary.json
MPS Eyegaze Data Paths
--generalEyegaze: /home/uril/hot3d/hot3d/dataset/Aria/P0001_a68492d5/mps/eye_gaze/general_eye_gaz

[38;2;000;128;000m[MultiRecordFileReader][DEBUG]: Opened file '/home/uril/hot3d/hot3d/dataset/Aria/P0001_a68492d5/recording.vrs' and assigned to reader #0[0m
[0m[38;2;000;000;255m[VrsDataProvider][INFO]: streamId 214-1/camera-rgb activated[0m
[0m[38;2;000;000;255m[VrsDataProvider][INFO]: Timecode stream found: 285-2[0m
[0m[38;2;000;000;255m[VrsDataProvider][INFO]: Fail to activate streamId 286-1[0m
[0m[38;2;000;000;255m[VrsDataProvider][INFO]: streamId 1201-1/camera-slam-left activated[0m
[0m[38;2;000;000;255m[VrsDataProvider][INFO]: streamId 1201-2/camera-slam-right activated[0m
[0m[38;2;000;000;255m[VrsDataProvider][INFO]: streamId 1202-1/imu-right activated[0m
[0m[38;2;000;000;255m[VrsDataProvider][INFO]: streamId 1202-2/imu-left activated[0m
[0m

folder_path: /home/uril/hot3d/hot3d/dataset/Aria/P0001_a68492d5
Video saved in /home/uril/hot3d/hot3d/dataset/Aria/P0001_a68492d5/P0001_a68492d5.mp4


ffmpeg version 6.1.1-3ubuntu5 Copyright (c) 2000-2023 the FFmpeg developers
  built with gcc 13 (Ubuntu 13.2.0-23ubuntu3)
  configuration: --prefix=/usr --extra-version=3ubuntu5 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --disable-omx --enable-gnutls --enable-libaom --enable-libass --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libglslang --enable-libgme --enable-libgsm --enable-libharfbuzz --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --ena

USE THIS AS YOUR INPUT VIDEO DIRECTORY INTO THE GUI: /home/uril/hot3d/hot3d/dataset/Aria/P0001_a68492d5/P0001_a68492d5_new.mp4


[out#0/mp4 @ 0x6436f612b6c0] video:125901kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.018446%
frame= 3738 fps=185 q=-1.0 Lsize=  125924kB time=00:02:04.50 bitrate=8285.7kbits/s speed=6.15x    
[libx264 @ 0x6436f6147640] frame I:22    Avg QP:22.17  size: 82360
[libx264 @ 0x6436f6147640] frame P:3441  Avg QP:24.22  size: 35591
[libx264 @ 0x6436f6147640] frame B:275   Avg QP:25.64  size: 16877
[libx264 @ 0x6436f6147640] consecutive B-frames: 86.1% 11.8%  1.2%  0.9%
[libx264 @ 0x6436f6147640] mb I  I16..4: 10.0% 88.0%  2.0%
[libx264 @ 0x6436f6147640] mb P  I16..4:  3.0% 19.8%  0.1%  P16..4: 56.1%  6.3%  3.4%  0.0%  0.0%    skip:11.3%
[libx264 @ 0x6436f6147640] mb B  I16..4:  1.6%  6.7%  0.0%  B16..8: 41.6%  2.3%  0.3%  direct: 4.6%  skip:43.1%  L0:69.1% L1:26.1% BI: 4.8%
[libx264 @ 0x6436f6147640] 8x8 transform intra:86.3% inter:92.0%
[libx264 @ 0x6436f6147640] coded y,uvDC,uvAC intra: 59.8% 89.6% 48.7% inter: 22.5% 53.3% 4.5%
[libx264 @ 0x6436f6147640]

In [8]:
#Code A2 (Obtain '.mp4' for annotation through GUI excluding the gaze)
#DEFINE SEQUENCE ID HERE
sequence_id = "P0003_c701bd11"

home = os.path.expanduser("~")
hot3d_dataset_path = home + "/hot3d/hot3d/dataset"
sequence_path = os.path.join(hot3d_dataset_path, "Aria/", sequence_id)
hot3d_data_provider = Hot3dDataProvider(
    sequence_folder=sequence_path,
    object_library=object_library,
    mano_hand_model=mano_hand_model,
)
device_data_provider = hot3d_data_provider.device_data_provider
timestamps = device_data_provider.get_sequence_timestamps()

gaze_shots = []
# Loop through the specified range of timestamps with a step of 5
for i in range(0, len(timestamps), 1):
    # Get the current timestamp
    timestamp_ns = timestamps[i]
    
    # Get image data for the current timestamp
    image_data = device_data_provider.get_image(timestamp_ns, stream_id)
    gaze_shots.append(np.rot90(image_data, 3))

height, width = gaze_shots[0].shape[:2]

folder_path = os.path.join("/home/ttyh/hot3d/hot3d/dataset/Aria/", sequence_id)
os.makedirs(folder_path, exist_ok=True)
print('folder_path:',folder_path)
video_name = f"{sequence_id}.mp4"
video_path = os.path.join(folder_path, video_name)
print(f"Video saved in {video_path}")

# Define the video writer# Define the video writer
fps = 30
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for .mp4 files
out = cv2.VideoWriter(video_path, fourcc, fps, (width, height))


# Add each frame to the video
for image in gaze_shots:
    if len(image.shape) == 2:  # Grayscale image
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
    elif image.shape[2] == 3:  # RGB image
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    out.write(image)

out.release()

#Changing the file formatting
cmd = ['ffmpeg', '-i', video_path, '-vcodec', 'libx264', os.path.join(folder_path, f"{sequence_id}_no_gaze.mp4")]
subprocess.run(cmd)

print("USE THIS AS YOUR INPUT VIDEO DIRECTORY INTO THE GUI:" , os.path.join(folder_path, f"{sequence_id}_no_gaze.mp4"))

[38;2;000;128;000m[MultiRecordFileReader][DEBUG]: Opened file '/home/ttyh/hot3d/hot3d/dataset/Aria/P0003_c701bd11/recording.vrs' and assigned to reader #0[0m
[0m[38;2;000;000;255m[VrsDataProvider][INFO]: streamId 214-1/camera-rgb activated[0m
[0m[38;2;000;000;255m[VrsDataProvider][INFO]: Timecode stream found: 285-2[0m
[0m[38;2;000;000;255m[VrsDataProvider][INFO]: Fail to activate streamId 286-1[0m
[0m[38;2;000;000;255m[VrsDataProvider][INFO]: streamId 1201-1/camera-slam-left activated[0m
[0m[38;2;000;000;255m[VrsDataProvider][INFO]: streamId 1201-2/camera-slam-right activated[0m
[0m[38;2;000;000;255m[VrsDataProvider][INFO]: streamId 1202-1/imu-right activated[0m
[0m[38;2;000;000;255m[VrsDataProvider][INFO]: streamId 1202-2/imu-left activated[0m
[0m

MPS Data Paths
MPS SLAM Data Paths
--closedLoopTrajectory: 
--openLoopTrajectory: 
--semidensePoints: 
--semidenseObservations: 
--onlineCalibration: 
--summary: 
MPS Eyegaze Data Paths
--generalEyegaze: /home/ttyh/hot3d/hot3d/dataset/Aria/P0003_c701bd11/mps/eye_gaze/general_eye_gaze.csv
--personalizedEyegaze: /home/ttyh/hot3d/hot3d/dataset/Aria/P0003_c701bd11/mps/eye_gaze/personalized_eye_gaze.csv
--summary: /home/ttyh/hot3d/hot3d/dataset/Aria/P0003_c701bd11/mps/eye_gaze/summary.json
MPS Hand Tracking Data Paths
--wristAndPalmPoses: 
--summary: 
folder_path: /home/ttyh/hot3d/hot3d/dataset/Aria/P0003_c701bd11
Video saved in /home/ttyh/hot3d/hot3d/dataset/Aria/P0003_c701bd11/P0003_c701bd11.mp4


FileNotFoundError: [Errno 2] No such file or directory: 'ffmpeg'

In [5]:
#Code B (Extract frames for each video)

import cv2
import os

#DEFINE THE FRAME INTERVAL
frame_interval = 5

def process_videos(video_folder, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
        
    video_files = [f for f in os.listdir(video_folder) if f.endswith('.mp4')]
    
    for video_file in video_files:
        video_path = os.path.join(video_folder, video_file)
        cap = cv2.VideoCapture(video_path)
        
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        print(f"Video: {video_file}, Frames: {frame_count}")

        # Create a subfolder for each video file 
        video_name = os.path.splitext(video_file)[0] 
        video_output_folder = os.path.join(output_folder, video_name) 
        if not os.path.exists(video_output_folder): 
            os.makedirs(video_output_folder)
        
        frame_num = 0
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            if frame_num % frame_interval == 0:
                frame_name = f"{video_name}_frame_{frame_num}.jpg"
                frame_path = os.path.join(video_output_folder, frame_name)
                cv2.imwrite(frame_path, frame)
                
            frame_num += 1
        
        cap.release()
    print("Processing complete.")

# Set your video folder path and output folder path here
video_folder = '/home/ttyh/hot3d/hot3d/dataset/Labelled/Videos'
output_folder = '/home/ttyh/hot3d/hot3d/dataset/Labelled/Videos/new_frames'

process_videos(video_folder, output_folder)


Video: Pick up_P0001_a68492d5_no_gaze_1.mp4, Frames: 174
Video: Pick up_P0001_a68492d5_no_gaze_8.mp4, Frames: 105
Video: Pick up_P0001_a68492d5_no_gaze_6.mp4, Frames: 108
Video: Pick up_P0001_a68492d5_no_gaze_4.mp4, Frames: 107
Video: Place_P0001_a68492d5_no_gaze_2.mp4, Frames: 38
Video: Place_P0001_a68492d5_no_gaze_3.mp4, Frames: 319
Video: Place_P0001_a68492d5_no_gaze_5.mp4, Frames: 92
Video: Place_P0001_a68492d5_no_gaze_7.mp4, Frames: 94
Processing complete.


In [None]:
#Code C (Preparing data to be fed into VLM)
