In [1]:
%matplotlib widget

import numpy as np
import k3d
from ipywidgets import widgets, HBox, VBox
from ipycanvas import Canvas, hold_canvas
import pyzed.sl as sl
from PIL import Image
from tqdm import tqdm
from dataclasses import dataclass
from typing import List, Dict, Any
import cv2
import os

In [2]:
# Configuration
SVO_FILE = "../../data/HD720_SN38781663_10-23-10.svo2"
MAX_SECONDS = 20
SET_FLOOR_AS_ORIGIN = True

@dataclass
class TrackingConfig:
    name: str
    body_format: Any
    enable_body_fitting: bool
    skeleton_smoothing: float
    color: int  # Hex color for k3d

# Define configurations to compare
# CONFIGS = [
#     TrackingConfig(
#         name="BODY_34 smoothing=0.0",
#         body_format=sl.BODY_FORMAT.BODY_34,
#         enable_body_fitting=True,
#         skeleton_smoothing=0.0,
#         color=0xff0000,  # Red
#     ),
#     TrackingConfig(
#         name="BODY_34 smoothing=0.5",
#         body_format=sl.BODY_FORMAT.BODY_34,
#         enable_body_fitting=True,
#         skeleton_smoothing=0.5,
#         color=0x00ff00,  # Green
#     ),
#     TrackingConfig(
#         name="BODY_34 smoothing=1.0",
#         body_format=sl.BODY_FORMAT.BODY_34,
#         enable_body_fitting=True,
#         skeleton_smoothing=1.0,
#         color=0x0000ff,  # Blue
#     ),
# ]

CONFIGS = [
    TrackingConfig(
        name="BODY_18 fitting",
        body_format=sl.BODY_FORMAT.BODY_18,
        enable_body_fitting=True,
        skeleton_smoothing=0.5,
        color=0xff0000,  # Red
    ),
    TrackingConfig(
        name="BODY_18 no fitting",
        body_format=sl.BODY_FORMAT.BODY_18,
        enable_body_fitting=False,
        skeleton_smoothing=0.5,
        color=0x00ff00,  # Green
    ),
]



print(f"Comparing {len(CONFIGS)} configurations:")
for cfg in CONFIGS:
    print(f"  - {cfg.name} (color: #{cfg.color:06x})")

Comparing 2 configurations:
  - BODY_18 fitting (color: #ff0000)
  - BODY_18 no fitting (color: #00ff00)


In [3]:
def get_bones(body_format):
    """Get bone connections for a body format."""
    if body_format == sl.BODY_FORMAT.BODY_34:
        return [
            (0, 1), (1, 2), (2, 3), (3, 26),  # Spine to head
            (2, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (7, 10),  # Left arm
            (2, 11), (11, 12), (12, 13), (13, 14), (14, 15), (15, 16), (14, 17),  # Right arm
            (0, 18), (18, 19), (19, 20), (20, 21), (20, 32),  # Left leg
            (0, 22), (22, 23), (23, 24), (24, 25), (24, 33),  # Right leg
            (26, 27), (26, 28), (26, 30), (28, 29), (30, 31),  # Face
        ]
    else:
        # BODY_18
        return [
            (0, 1), (0, 14), (0, 15), (14, 16), (15, 17),  # Head
            (1, 2), (1, 5), (2, 8), (5, 11), (8, 11),  # Torso
            (2, 3), (3, 4),  # Right arm
            (5, 6), (6, 7),  # Left arm
            (8, 9), (9, 10),  # Right leg
            (11, 12), (12, 13),  # Left leg
        ]

In [4]:
def collect_tracking_data(svo_file: str, config: TrackingConfig, max_seconds: int, 
                          set_floor_as_origin: bool, save_frames: bool = False) -> tuple:
    """Run body tracking with given config and collect frame data.
    
    Returns:
        tuple: (frames_data, img_size) where img_size is (width, height) or None
    """
    zed = sl.Camera()
    
    # Initialize
    init_params = sl.InitParameters()
    init_params.set_from_svo_file(svo_file)
    init_params.coordinate_units = sl.UNIT.METER
    init_params.depth_mode = sl.DEPTH_MODE.NEURAL
    init_params.coordinate_system = sl.COORDINATE_SYSTEM.RIGHT_HANDED_Z_UP
    
    err = zed.open(init_params)
    if err != sl.ERROR_CODE.SUCCESS:
        raise RuntimeError(f"Failed to open SVO file: {err}")
    
    img_size = None
    
    try:
        camera_info = zed.get_camera_information()
        fps = camera_info.camera_configuration.fps
        total_frames = zed.get_svo_number_of_frames()
        max_frames = min(int(max_seconds * fps), total_frames)
        
        # Enable positional tracking
        positional_tracking_params = sl.PositionalTrackingParameters()
        positional_tracking_params.set_as_static = True
        positional_tracking_params.set_floor_as_origin = set_floor_as_origin
        zed.enable_positional_tracking(positional_tracking_params)
        
        # Enable body tracking
        body_param = sl.BodyTrackingParameters()
        body_param.enable_tracking = True
        body_param.detection_model = sl.BODY_TRACKING_MODEL.HUMAN_BODY_ACCURATE
        body_param.body_format = config.body_format
        body_param.enable_body_fitting = config.enable_body_fitting
        zed.enable_body_tracking(body_param)
        
        body_runtime_param = sl.BodyTrackingRuntimeParameters()
        body_runtime_param.detection_confidence_threshold = 40
        body_runtime_param.skeleton_smoothing = config.skeleton_smoothing
        
        # Collect data
        frames_data = []
        bodies = sl.Bodies()
        image = sl.Mat()
        
        if save_frames:
            os.makedirs("tmp", exist_ok=True)
        
        with tqdm(total=max_frames, desc=f"  {config.name}") as pbar:
            frame_idx = 0
            while zed.grab() == sl.ERROR_CODE.SUCCESS and frame_idx < max_frames:
                zed.retrieve_bodies(bodies, body_runtime_param)
                
                # Save video frames (only for first config)
                if save_frames:
                    zed.retrieve_image(image, sl.VIEW.LEFT, sl.MEM.CPU)
                    img_data = image.get_data()
                    if img_size is None:
                        img_size = (img_data.shape[1], img_data.shape[0])
                    cv2.imwrite(f"tmp/{frame_idx:06d}.png", img_data)
                
                frame_bodies = {}
                for body in bodies.body_list:
                    if body.tracking_state == sl.OBJECT_TRACKING_STATE.OK:
                        frame_bodies[int(body.id)] = {
                            'keypoints_3d': body.keypoint.copy(),
                            'keypoints_2d': body.keypoint_2d.copy(),
                        }
                
                frames_data.append(frame_bodies)
                frame_idx += 1
                pbar.update(1)
        
        return frames_data, img_size
    
    finally:
        zed.close()

In [5]:
# Collect data for all configurations
all_data = {}
img_size = None

print("Collecting tracking data for all configurations...\n")
for i, config in enumerate(CONFIGS):
    # Save frames only for first config (they're all from the same SVO)
    save_frames = (i == 0)
    frames, size = collect_tracking_data(SVO_FILE, config, MAX_SECONDS, SET_FLOOR_AS_ORIGIN, save_frames)
    
    if size is not None:
        img_size = size
    
    all_data[config.name] = {
        'config': config,
        'frames': frames,
        'bones': get_bones(config.body_format),
    }
    print()

print(f"\nCollected data for {len(all_data)} configurations")
for name, data in all_data.items():
    print(f"  {name}: {len(data['frames'])} frames")
if img_size:
    print(f"Image size: {img_size[0]}x{img_size[1]}")

Collecting tracking data for all configurations...

[2026-01-24 20:16:00 UTC][ZED][INFO] Logging level INFO
[2026-01-24 20:16:00 UTC][ZED][INFO] [Init]  Depth mode: NEURAL
[2026-01-24 20:16:00 UTC][ZED][INFO] [Init]  Serial Number: S/N 38781663


  BODY_18 fitting: 100%|████████████████████████████████████████| 1200/1200 [01:07<00:00, 17.75it/s]



[2026-01-24 20:17:09 UTC][ZED][INFO] Logging level INFO
[2026-01-24 20:17:09 UTC][ZED][INFO] [Init]  Depth mode: NEURAL
[2026-01-24 20:17:09 UTC][ZED][INFO] [Init]  Serial Number: S/N 38781663


  BODY_18 no fitting: 100%|█████████████████████████████████████| 1200/1200 [00:25<00:00, 47.19it/s]



Collected data for 2 configurations
  BODY_18 fitting: 1200 frames
  BODY_18 no fitting: 1200 frames
Image size: 1280x720





In [None]:
# 2D Video visualization with skeleton overlay comparing all configurations

# Use full resolution for crisp graphics
# For high-DPI displays, we render at 2x and scale down via CSS
dpi_scale = 2  # Set to 1 for normal displays, 2 for Retina/HiDPI
display_width = img_size[0]
display_height = img_size[1]
canvas_width = display_width * dpi_scale
canvas_height = display_height * dpi_scale

canvas = Canvas(width=canvas_width, height=canvas_height)
canvas.layout.width = f"{display_width}px"
canvas.layout.height = f"{display_height}px"

def draw_frame_2d(n):
    """Draw frame with all configuration skeletons overlaid."""
    with hold_canvas(canvas):
        # Scale context for HiDPI
        canvas.scale(dpi_scale, dpi_scale)
        
        # Load and draw the background frame at full resolution
        img = Image.open(f"tmp/{n:06d}.png")
        # For HiDPI we need to draw at display size, scaling handles the rest
        canvas.put_image_data(np.array(img), 0, 0)
        
        # Draw skeletons for each configuration
        for name, data in all_data.items():
            config = data['config']
            bones = data['bones']
            frames = data['frames']
            
            # Convert hex color to CSS string
            color = f"#{config.color:06x}"
            
            if n >= len(frames) or not frames[n]:
                continue
            
            # Get first person's keypoints (no scaling needed - using full res)
            first_person_id = list(frames[n].keys())[0]
            keypoints_2d = frames[n][first_person_id]['keypoints_2d']
            
            # Draw bones
            canvas.stroke_style = color
            canvas.line_width = 3
            for start_idx, end_idx in bones:
                if start_idx < len(keypoints_2d) and end_idx < len(keypoints_2d):
                    x1, y1 = keypoints_2d[start_idx]
                    x2, y2 = keypoints_2d[end_idx]
                    # Skip invalid coordinates
                    if x1 > 0 and y1 > 0 and x2 > 0 and y2 > 0:
                        canvas.begin_path()
                        canvas.move_to(x1, y1)
                        canvas.line_to(x2, y2)
                        canvas.stroke()
            
            # Draw joints
            canvas.fill_style = color
            for x, y in keypoints_2d:
                if x > 0 and y > 0:
                    canvas.fill_arc(x, y, 4, 0, 2 * np.pi)
        
        # Draw legend with background for readability
        legend_x = 15
        legend_y = 25
        legend_height = len(all_data) * 22 + 10
        
        # Semi-transparent background
        canvas.fill_style = "rgba(0, 0, 0, 0.6)"
        canvas.fill_rect(legend_x - 5, legend_y - 18, 200, legend_height)
        
        for name, data in all_data.items():
            color = f"#{data['config'].color:06x}"
            canvas.fill_style = color
            canvas.fill_rect(legend_x, legend_y - 12, 18, 18)
            canvas.fill_style = "#ffffff"
            canvas.font = "bold 14px sans-serif"
            canvas.fill_text(name, legend_x + 25, legend_y + 2)
            legend_y += 22
        
        # Reset transform for next frame
        canvas.reset_transform()

# Frame slider for 2D view
num_frames = len(list(all_data.values())[0]['frames'])

slider_2d = widgets.IntSlider(
    min=0,
    max=num_frames - 1,
    step=1,
    value=0,
    description='Frame:',
    layout=widgets.Layout(width='80%')
)

frame_label_2d = widgets.Label(value=f"Frame 0 / {num_frames - 1}")

def on_slider_2d_change(change):
    frame_label_2d.value = f"Frame {change['new']} / {num_frames - 1}"
    draw_frame_2d(change['new'])

slider_2d.observe(on_slider_2d_change, names='value')

# Initial draw
draw_frame_2d(0)

print("2D Comparison View (Full Resolution)")
display(VBox([canvas, HBox([slider_2d, frame_label_2d])]))

In [7]:
# Create k3d plot with all configurations
plot = k3d.plot(grid=(-3, -3, -1, 3, 3, 3), camera_mode='orbit')

# Add floor reference at z=0
floor_verts = np.array([
    [-3, -1, 0], [3, -1, 0], [3, 5, 0], [-3, 5, 0],
], dtype=np.float32)
floor_indices = np.array([[0, 1, 2], [0, 2, 3]], dtype=np.uint32)
floor_mesh = k3d.mesh(floor_verts, floor_indices, color=0x444444, opacity=0.2, name="Floor (z=0)")
plot += floor_mesh

# Create visualization objects for each config
viz_objects = {}
for name, data in all_data.items():
    config = data['config']
    bones = data['bones']
    
    joints = k3d.points([], point_size=0.03, shader='flat', color=config.color, name=f"{name} joints")
    skeleton = k3d.lines([], bones, width=0.008, color=config.color, indices_type="segment", name=f"{name} bones")
    
    plot += joints
    plot += skeleton
    
    viz_objects[name] = {'joints': joints, 'skeleton': skeleton, 'bones': bones}

# Get number of frames (should be same for all)
num_frames = len(list(all_data.values())[0]['frames'])

def update_frame(frame_idx):
    """Update all visualizations for a given frame."""
    for name, data in all_data.items():
        frames = data['frames']
        viz = viz_objects[name]
        
        if frame_idx < len(frames) and frames[frame_idx]:
            # Get first person's keypoints
            first_person_id = list(frames[frame_idx].keys())[0]
            keypoints = frames[frame_idx][first_person_id]['keypoints_3d']
            
            viz['joints'].positions = keypoints
            viz['skeleton'].vertices = keypoints
        else:
            viz['joints'].positions = np.array([], dtype=np.float32).reshape(0, 3)
            viz['skeleton'].vertices = np.array([], dtype=np.float32).reshape(0, 3)

# Create legend
legend_html = "<div style='background: rgba(0,0,0,0.7); padding: 10px; border-radius: 5px;'>"
legend_html += "<b style='color: white;'>Legend</b><br>"
for name, data in all_data.items():
    color = data['config'].color
    legend_html += f"<span style='color: #{color:06x};'>&#9632;</span> <span style='color: white;'>{name}</span><br>"
legend_html += "</div>"

legend = widgets.HTML(value=legend_html)

# Frame slider
frame_slider = widgets.IntSlider(
    min=0,
    max=num_frames - 1,
    step=1,
    value=0,
    description='Frame:',
    layout=widgets.Layout(width='80%')
)

frame_label = widgets.Label(value=f"Frame 0 / {num_frames - 1}")

def on_frame_change(change):
    frame_label.value = f"Frame {change['new']} / {num_frames - 1}"
    update_frame(change['new'])

frame_slider.observe(on_frame_change, names='value')

# Initial update
update_frame(0)

# Display
plot.display()
display(VBox([
    legend,
    HBox([frame_slider, frame_label])
]))

Output()

VBox(children=(HTML(value="<div style='background: rgba(0,0,0,0.7); padding: 10px; border-radius: 5px;'><b sty…

In [8]:
# Toggle visibility controls
checkboxes = {}

def make_toggle(name):
    def toggle(change):
        viz = viz_objects[name]
        viz['joints'].visible = change['new']
        viz['skeleton'].visible = change['new']
    return toggle

for name, data in all_data.items():
    color = data['config'].color
    cb = widgets.Checkbox(
        value=True,
        description=name,
        style={'description_width': 'initial'}
    )
    cb.observe(make_toggle(name), names='value')
    checkboxes[name] = cb

print("Toggle visibility:")
display(VBox(list(checkboxes.values())))

Toggle visibility:


VBox(children=(Checkbox(value=True, description='BODY_18 fitting', style=CheckboxStyle(description_width='init…