In [8]:
import open3d as o3d
import numpy as np
import cv2

In [2]:
frame = 11685

In [1]:
from decord import VideoLoader, cpu
import decord
frames_per_clip = 16
frame_step = 4
video_paths = [
    "/home/vgl/emir/datasets/tudl/train_real/compressed_objects/segmentation_videos/000001/input.mp4",
    "/home/vgl/emir/datasets/tudl/train_real/compressed_objects/segmentation_videos/000002/input.mp4",
    "/home/vgl/emir/datasets/tudl/train_real/compressed_objects/segmentation_videos/000003/input.mp4",
]
K_jsons = [
    "/home/vgl/emir/datasets/tudl/train_real/000001/scene_camera.json",
    "/home/vgl/emir/datasets/tudl/train_real/000002/scene_camera.json",
    "/home/vgl/emir/datasets/tudl/train_real/000003/scene_camera.json"
]
gt_jsons = [
    "/home/vgl/emir/datasets/tudl/train_real/000001/scene_gt.json",
    "/home/vgl/emir/datasets/tudl/train_real/000002/scene_gt.json",
    "/home/vgl/emir/datasets/tudl/train_real/000003/scene_gt.json"
]
models = [
    "/home/vgl/emir/datasets/tudl/tudl_models/models/obj_000001.ply",
    "/home/vgl/emir/datasets/tudl/tudl_models/models/obj_000002.ply",
    "/home/vgl/emir/datasets/tudl/tudl_models/models/obj_000003.ply"
]

In [2]:
decord.bridge.set_bridge('torch')

In [3]:
from utils import Video3DBBoxDataset

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [4]:
def visualize_3d_bbox_on_frame(frame, bbox):
    """
    Visualize the 3D bounding box on a single frame.
    
    Parameters:
    - frame: numpy array of shape (H, W, 3)
    - bbox: dictionary with keys 'center', 'dimensions', 'rotation_matrix', 'K'
    
    Returns:
    - frame_with_bbox: frame with the 3D bounding box drawn on it
    """
    center = bbox['center']  # [x, y, z]
    dimensions = bbox['dimensions']  # [width, height, depth]
    rotation_matrix = bbox['rotation_matrix']  # [3, 3]
    K = bbox['K']  # [3, 3]

    # Half dimensions
    dx = dimensions[0] / 2
    dy = dimensions[1] / 2
    dz = dimensions[2] / 2

    # Define the 8 corners in local coordinates
    corners_local = np.array([
        [-dx, -dy, -dz],
        [-dx, -dy, dz],
        [-dx, dy, -dz],
        [-dx, dy, dz],
        [dx, -dy, -dz],
        [dx, -dy, dz],
        [dx, dy, -dz],
        [dx, dy, dz],
    ])  # Shape: (8, 3)

    corners_world = (rotation_matrix @ corners_local.T).T + center.reshape(1, 3)  # Shape: (8, 3)
    corners_homogeneous = corners_world.T  # Shape: (3, 8)
    corners_2d_hom = K @ corners_homogeneous  # Shape: (3, 8)
    corners_2d = (corners_2d_hom[:2, :] / corners_2d_hom[2, :]).T  # Shape: (8, 2)

    # Draw the bounding box on the frame
    lines = [
        [0, 1], [1, 3], [3, 2], [2, 0],  # Bottom face
        [4, 5], [5, 7], [7, 6], [6, 4],  # Top face
        [0, 4], [1, 5], [2, 6], [3, 7]   # Vertical edges
    ]

    frame_with_bbox = np.array(frame).copy()
    for start, end in lines:
        pt1 = tuple(corners_2d[start].astype(int))
        pt2 = tuple(corners_2d[end].astype(int))
        # Ensure points are within image boundaries
        h, w = frame.shape[:2]
        pt1 = (np.clip(pt1[0], 0, w - 1), np.clip(pt1[1], 0, h - 1))
        pt2 = (np.clip(pt2[0], 0, w - 1), np.clip(pt2[1], 0, h - 1))
        cv2.line(frame_with_bbox, pt1, pt2, color=(0, 255, 0), thickness=2)

    return frame_with_bbox

In [5]:
vd = Video3DBBoxDataset(
    video_paths=video_paths,
    models=models,
    K_jsons=K_jsons,
    gt_jsons=gt_jsons,
    frames_per_clip=frames_per_clip,
    frame_step=frame_step,
)

In [6]:
len(vd)

477

In [9]:
vr = cv2.VideoWriter("./bbox_vis.mp4", cv2.VideoWriter_fourcc(*'mp4v'), 30, (224, 224))

In [10]:
for i in range(len(vd)):
    x, bbox = vd[i]
    for j in range(frames_per_clip):
        frame_with_bbox = visualize_3d_bbox_on_frame(x[j], bbox[j])
        vr.write(frame_with_bbox)
vr.release()

  frame_with_bbox = np.array(frame).copy()
