A plug-and-play Colab to visualize samples from the `minival` split of the TAPVid-3D dataset (a split of randomly chosen examples from the whole dataset). Just click Runtime -> Run All to visualize a sample!

In [None]:
#@title Install and import a few libraries

print('Installing mediapy. ', end='')
!pip install -q mediapy
print('Done.')

print('Installing flow-vis. ', end='')
!pip install -q flow-vis==0.1
print('Done.')

print('Installing scenepic. ', end='')
!pip install -q scenepic
print('Done.')

import numpy as np
import cv2
import IPython
import random
from google.cloud import storage

import torch
import torch.nn.functional as F
import torchvision.transforms as transforms

import matplotlib
from matplotlib import cm
import matplotlib.pyplot as plt
from matplotlib.figure import Figure
from matplotlib.backends.backend_agg import FigureCanvasAgg


import mediapy as media
import flow_vis
import scenepic as sp

!wget https://raw.githubusercontent.com/google-deepmind/tapnet/main/tapnet/tapvid3d/splits/tapvid3d_splits.py -O tapvid3d_splits.py
from tapvid3d_splits import MINIVAL_FILES

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m21.4 MB/s[0m eta [36m0:00:00[0m
[?25hDone.
Installing flow-vis. Done.
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m687.8/687.8 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hDone.
--2024-12-05 01:53:56--  https://raw.githubusercontent.com/google-deepmind/tapnet/main/tapnet/tapvid3d/splits/tapvid3d_splits.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 330096 (322K) [text/plain]
Saving to: ‘tapvid3d_splits.py’


2024-12-05 01:53:56 (7.20 MB/s) - ‘tapvid3d_splits.py’ saved [330096/330096]



In [None]:
#@title Select a random `minival` example, and download it (or specify a specific dataset example to view)

CHOOSE_RANDOM_VIDEO_TO_VIEW=False  # @param {type:"boolean", isTemplate: true}
SPECIFIC_EXAMPLE_TO_VIEW="tapvid3d_9142545919543484617_86_000_106_000_2_5AKc-TYQochsSWXpv376cA.npz"  # @param ['None', 'Lite_release_recognition_GreenDecorationTall_seq031_6.npz', 'Apartment_release_meal_seq136_8.npz', 'Lite_release_recognition_WoodenBowl_seq032_1.npz', 'Apartment_release_work_seq108_5.npz', 'Apartment_release_decoration_seq138_6.npz', 'Apartment_release_multiskeleton_party_seq122_5.npz', 'Apartment_release_work_skeleton_seq138_5.npz', 'Apartment_release_decoration_seq138_5.npz', 'Apartment_release_clean_seq145_7.npz', 'Apartment_release_meal_seq139_3.npz', 'Apartment_release_multiuser_meal_seq134_0.npz', 'Apartment_release_meal_seq147_3.npz', 'Apartment_release_clean_seq140_3.npz', 'Apartment_release_work_skeleton_seq136_2.npz', 'Apartment_release_meal_seq133_5.npz', 'Lite_release_recognition_BlackCeramicBowl_seq033_3.npz', 'Apartment_release_work_seq106_2.npz', 'Apartment_release_decoration_seq133_6.npz', 'Apartment_release_work_skeleton_seq138_3.npz', 'Apartment_release_decoration_seq135_7.npz', 'Lite_release_recognition_WoodenFork_seq032_1.npz', 'Apartment_release_meal_seq136_2.npz', 'Apartment_release_work_seq107_4.npz', 'Apartment_release_work_seq109_7.npz', 'Lite_release_recognition_WoodenBoxSmall_seq031_2.npz', 'Apartment_release_multiuser_clean_seq119_4.npz', 'Lite_release_recognition_Flask_seq033_2.npz', 'Lite_release_recognition_BookDeepLearning_seq032_5.npz', 'Lite_release_recognition_DinoToy_seq031_3.npz', 'Apartment_release_multiuser_clean_seq114_0.npz', 'Apartment_release_multiuser_cook_seq144_4.npz', 'Apartment_release_multiuser_party_seq134_7.npz', 'Apartment_release_multiuser_party_seq133_3.npz', 'Lite_release_recognition_BookDeepLearning_seq032_1.npz', 'Apartment_release_multiskeleton_party_seq121_2.npz', 'Lite_release_recognition_BirdHouseToy_seq030_6.npz', 'Apartment_release_multiskeleton_party_seq121_1.npz', 'Apartment_release_multiuser_clean_seq120_0.npz', 'Apartment_release_multiuser_cook_seq115_8.npz', 'Apartment_release_multiskeleton_party_seq126_8.npz', 'Apartment_release_multiskeleton_party_seq117_4.npz', 'Apartment_release_multiuser_cook_seq118_8.npz', 'Apartment_release_decoration_skeleton_seq134_4.npz', 'Apartment_release_meal_seq132_7.npz', 'Apartment_release_multiuser_cook_seq117_4.npz', 'Lite_release_recognition_WoodenBoxSmall_seq033_1.npz', 'Apartment_release_multiuser_meal_seq139_5.npz', 'Apartment_release_work_seq140_1.npz', 'Apartment_release_multiuser_cook_seq111_7.npz', 'Apartment_release_meal_seq138_4.npz', 'tapvid3d_9142545919543484617_86_000_106_000_2_5AKc-TYQochsSWXpv376cA.npz', 'tapvid3d_2681180680221317256_1144_000_1164_000_2_erT5IMWqaVRCzF6oN66E7Q.npz', 'tapvid3d_10940952441434390507_1888_710_1908_710_2_ktWWj6EBhJZaj0IQHjSjZw.npz', 'tapvid3d_6674547510992884047_1560_000_1580_000_2_sLjGdDksAJG7GgV0xFdh0g.npz', 'tapvid3d_3872781118550194423_3654_670_3674_670_2_6upUeXn7HnQBNkgQ9ZomvQ.npz', 'tapvid3d_5268267801500934740_2160_000_2180_000_2_ZU0XFHUBm0q8zqV8PBXuUQ.npz', 'tapvid3d_10235335145367115211_5420_000_5440_000_2_4iry-kJsWTWnjLdVm-S8XA.npz', 'tapvid3d_16105359875195888139_4420_000_4440_000_2_yqvi3P9YV-xoDyx3PZKEIw.npz', 'tapvid3d_11967272535264406807_580_000_600_000_2_y5T1W9Gwcqsnzuc4pqX8Fw.npz', 'tapvid3d_16331619444570993520_1020_000_1040_000_1_5KdC4474H0F_3c2pzLEG3g.npz', 'tapvid3d_16578409328451172992_3780_000_3800_000_1_U-FL_r6V59Ml1puP3Ra7hQ.npz', 'tapvid3d_18136695827203527782_2860_000_2880_000_2_3IPjUxMMOG3mFqNmMutzqA.npz', 'tapvid3d_15062351272945542584_5921_360_5941_360_1_eiZ0zt164wCjt9lgslNFyg.npz', 'tapvid3d_17993467596234560701_4940_000_4960_000_1_XbksitbrlYR9_DvWA-JKqg.npz', 'tapvid3d_1022527355599519580_4866_960_4886_960_2_L58RM2TH_i-3sYbjr6JjQQ.npz', 'tapvid3d_6771922013310347577_4249_290_4269_290_1_CQ9kW2zcKGsyhWnMn71VFw.npz', 'tapvid3d_5459113827443493510_380_000_400_000_2_auYuu4nr89m2SJAXhx5csQ.npz', 'tapvid3d_11940460932056521663_1760_000_1780_000_1_VvP3Pijy57rez0dxOvGajQ.npz', 'tapvid3d_4967385055468388261_720_000_740_000_3_8I6i37GamzdjGA0aVXFyrw.npz', 'tapvid3d_2475623575993725245_400_000_420_000_1_r_j-Erk4psYIRNnGvSSJzg.npz', 'tapvid3d_6638427309837298695_220_000_240_000_1_SbPukwTCEiap4DzMUklocw.npz', 'tapvid3d_5495302100265783181_80_000_100_000_1_Hb9oGoCRgYCdPA_DtouLKQ.npz', 'tapvid3d_15578655130939579324_620_000_640_000_2_LcTHEaU9_O9lx0ktOeLrBg.npz', 'tapvid3d_17386176497741125938_2180_000_2200_000_1_arpqjSyz9GYDTR9Eu7VQdw.npz', 'tapvid3d_2656110181316327570_940_000_960_000_1__rolS4YsRwsEQqHw6BNVTg.npz', 'tapvid3d_2681180680221317256_1144_000_1164_000_3_jnPMSEK1d86HLP-6zeEplQ.npz', 'tapvid3d_13862220583747475906_1260_000_1280_000_2_-qclBYH8q_I-LnwAnxzzjA.npz', 'tapvid3d_14250544550818363063_880_000_900_000_2_VDacMbTOddDXFtRVMF_m3w.npz', 'tapvid3d_16608525782988721413_100_000_120_000_1_EpLEMxIlwLHsikWvlfy54Q.npz', 'tapvid3d_3908622028474148527_3480_000_3500_000_1_-Jt1JVOOCOUNrj7A7zglBw.npz', 'tapvid3d_2863984611797967753_3200_000_3220_000_1_4zC7IlkORDhkPdNS3V286Q.npz', 'tapvid3d_2899357195020129288_3723_163_3743_163_1_ywJTCE1V98xZNGnNmY37Zg.npz', 'tapvid3d_11940460932056521663_1760_000_1780_000_1_mvawqieCtHi2txLJCl5qEA.npz', 'tapvid3d_5459113827443493510_380_000_400_000_1_bERYokPcM5jevThAHCLKsA.npz', 'tapvid3d_10876852935525353526_1640_000_1660_000_2_H7txZoV9099pUCGqWjv2mQ.npz', 'tapvid3d_6638427309837298695_220_000_240_000_2_F9t5HJfGP-79OPf7Us8eew.npz', 'tapvid3d_14133920963894906769_1480_000_1500_000_3_418OdBqhvhfpsZyTBEPhPg.npz', 'tapvid3d_12956664801249730713_2840_000_2860_000_2_dtmqCN3-JhK9Un7gCqs3Ug.npz', 'tapvid3d_33101359476901423_6720_910_6740_910_3_JNFn13if2djGYt0wQ682cw.npz', 'tapvid3d_17674974223808194792_8787_692_8807_692_2_rCRiNOIt-bOV910-j9YzxQ.npz', 'tapvid3d_16042886962142359737_1060_000_1080_000_1_-UuWM0RiGK5o-uBrPllMrQ.npz', 'tapvid3d_13207915841618107559_2980_000_3000_000_1_ZF1-vGaHNjltsRoVWjqYiQ.npz', 'tapvid3d_15365821471737026848_1160_000_1180_000_1_Q5Yo2qc51Rcbvkdu37A3sQ.npz', 'tapvid3d_7089765864827567005_1020_000_1040_000_3_qXPrSpe3gRXqM88J3-2zlw.npz', 'tapvid3d_13862220583747475906_1260_000_1280_000_1_AvO7llz46ToGLClAYUV7yA.npz', 'tapvid3d_8582923946352460474_2360_000_2380_000_2_zRIkIIu-FqXPx1B25304Gg.npz', 'tapvid3d_14018515129165961775_483_260_503_260_3_AkhfkQrkuIYeFOVdMEji4A.npz', 'tapvid3d_16105359875195888139_4420_000_4440_000_2_q-uok16avK_6CqAlNECcDw.npz', 'tapvid3d_13862220583747475906_1260_000_1280_000_1_0igcubPJdfXj-jTWT0hKag.npz', 'tapvid3d_6038200663843287458_283_000_303_000_2_jasa6rpTGZZo6fyug-U9jA.npz', 'basketball_5.npz', 'softball_25.npz', 'boxes_22.npz', 'boxes_19.npz', 'juggle_8.npz', 'boxes_12.npz', 'boxes_6.npz', 'basketball_29.npz', 'tennis_28.npz', 'tennis_22.npz', 'basketball_9.npz', 'basketball_24.npz', 'football_3.npz', 'tennis_17.npz', 'softball_21.npz', 'tennis_23.npz', 'juggle_5.npz', 'football_1.npz', 'tennis_5.npz', 'basketball_6.npz', 'basketball_14.npz', 'football_21.npz', 'football_19.npz', 'basketball_4.npz', 'basketball_3.npz', 'softball_2.npz', 'boxes_11.npz', 'juggle_4.npz', 'softball_23.npz', 'juggle_7.npz', 'football_16.npz', 'boxes_29.npz', 'boxes_7.npz', 'juggle_9.npz', 'boxes_17.npz', 'juggle_22.npz', 'football_29.npz', 'football_22.npz', 'boxes_28.npz', 'tennis_2.npz', 'softball_9.npz', 'basketball_13.npz', 'tennis_4.npz', 'football_7.npz', 'softball_19.npz', 'basketball_20.npz', 'tennis_26.npz', 'softball_14.npz', 'boxes_5.npz', 'boxes_27.npz']

if CHOOSE_RANDOM_VIDEO_TO_VIEW:
  print("Choosing random sample...")
  chosen_filename = random.choice(MINIVAL_FILES)
else:
  chosen_filename = SPECIFIC_EXAMPLE_TO_VIEW

print(f'Downloading example: {chosen_filename}')
file_url = f'https://storage.googleapis.com/dm-tapnet/tapvid3d/release_files/minival_v1.0/{chosen_filename}'
!wget $file_url -O $chosen_filename
print("Done downloading!")

Downloading example: tapvid3d_9142545919543484617_86_000_106_000_2_5AKc-TYQochsSWXpv376cA.npz
--2024-12-05 02:00:08--  https://storage.googleapis.com/dm-tapnet/tapvid3d/release_files/minival_v1.0/tapvid3d_9142545919543484617_86_000_106_000_2_5AKc-TYQochsSWXpv376cA.npz
Resolving storage.googleapis.com (storage.googleapis.com)... 142.250.98.207, 74.125.134.207, 142.251.107.207, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|142.250.98.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 13738380 (13M) [application/octet-stream]
Saving to: ‘tapvid3d_9142545919543484617_86_000_106_000_2_5AKc-TYQochsSWXpv376cA.npz’


2024-12-05 02:00:08 (35.6 MB/s) - ‘tapvid3d_9142545919543484617_86_000_106_000_2_5AKc-TYQochsSWXpv376cA.npz’ saved [13738380/13738380]

Done downloading!


In [None]:
#@title Parse and examine contents of the dataset example file

with open(chosen_filename, 'rb') as in_f:
  in_npz = np.load(in_f)
  images_jpeg_bytes = in_npz['images_jpeg_bytes']
  queries_xyt = in_npz['queries_xyt']
  tracks_xyz = in_npz['tracks_XYZ']
  visibility = in_npz['visibility']
  intrinsics = in_npz['fx_fy_cx_cy']
  if 'extrinsics_w2c' in in_npz.files:
    extrinsics_w2c = in_npz['extrinsics_w2c']
  else:
    extrinsics_w2c = None

video = []
for frame_bytes in images_jpeg_bytes:
  arr = np.frombuffer(frame_bytes, np.uint8)
  image_bgr = cv2.imdecode(arr, flags=cv2.IMREAD_UNCHANGED)
  image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
  video.append(image_rgb)
video = np.stack(video, axis=0)

print(f"In example {chosen_filename}:")
print(f"  images_jpeg_bytes: {len(images_jpeg_bytes)} frames, each stored as JPEG bytes (and after decoding, the video shape: {video.shape})")
print(f"  intrinsics: (fx, fy, cx, cy)={intrinsics}", intrinsics.dtype)
print(f"  tracks_xyz: {tracks_xyz.shape}", tracks_xyz.dtype)
print(f"  visibility: {visibility.shape}", visibility.dtype)
print(f"  queries_xyt: {queries_xyt.shape}", queries_xyt.dtype)
if extrinsics_w2c is not None:
  print(f"  extrinsics_w2c: {extrinsics_w2c.shape}", extrinsics_w2c.dtype)

In example tapvid3d_9142545919543484617_86_000_106_000_2_5AKc-TYQochsSWXpv376cA.npz:
  images_jpeg_bytes: 25 frames, each stored as JPEG bytes (and after decoding, the video shape: (25, 1280, 1920, 3))
  intrinsics: (fx, fy, cx, cy)=[2082.68450581 2082.68450581  984.53642746  643.27569684] float64
  tracks_xyz: (25, 256, 3) float64
  visibility: (25, 256) bool
  queries_xyt: (256, 3) float64
  extrinsics_w2c: (25, 4, 4) float64


In [None]:
# @title Limit number of frames and tracks for visualization
NUM_FRAMES=100  # @param {type:"integer", isTemplate: true}
NUM_TRACKS = 300 # @param {type:"integer", isTemplate: true}

if video.shape[0] > NUM_FRAMES:
  video = video[:NUM_FRAMES]
  tracks_xyz = tracks_xyz[:NUM_FRAMES]
  visibility = visibility[:NUM_FRAMES]

if tracks_xyz.shape[1] > NUM_TRACKS:
  indices = np.random.choice(tracks_xyz.shape[1], NUM_TRACKS, replace=False)
  tracks_xyz = tracks_xyz[:, indices]
  visibility = visibility[:, indices]

In [None]:
# @title Sort points by their height in 3D for rainbow visualization

sorted_indices = np.argsort(tracks_xyz[0, ..., 1])  # Sort points over height
tracks_xyz = tracks_xyz[:, sorted_indices]
visibility = visibility[:, sorted_indices]

In [None]:
# @title Project to 2D in pixel coordinates

def project_points_to_video_frame(camera_pov_points3d, camera_intrinsics, height, width):
  """Project 3d points to 2d image plane."""
  u_d = camera_pov_points3d[..., 0] / (camera_pov_points3d[..., 2] + 1e-8)
  v_d = camera_pov_points3d[..., 1] / (camera_pov_points3d[..., 2] + 1e-8)

  f_u, f_v, c_u, c_v = camera_intrinsics

  u_d = u_d * f_u + c_u
  v_d = v_d * f_v + c_v

  # Mask of points that are in front of the camera and within image boundary
  masks = (camera_pov_points3d[..., 2] >= 1)
  masks = masks & (u_d >= 0) & (u_d < width) & (v_d >= 0) & (v_d < height)
  return np.stack([u_d, v_d], axis=-1), masks


tracks_xy, infront_cameras = project_points_to_video_frame(tracks_xyz, intrinsics, video.shape[1], video.shape[2])
print(f"  tracks_xy: {tracks_xy.shape}")
print(f"  infront_cameras: {infront_cameras.shape}")

  tracks_xy: (25, 256, 2)
  infront_cameras: (25, 256)


In [None]:
# @title Visualize 2D point trajectories

def plot_2d_tracks(video, points, visibles, infront_cameras=None, tracks_leave_trace=16, show_occ=False):
  """Visualize 2D point trajectories."""
  num_frames, num_points = points.shape[:2]

  # Precompute colormap for points
  color_map = matplotlib.colormaps.get_cmap('hsv')
  cmap_norm = matplotlib.colors.Normalize(vmin=0, vmax=num_points - 1)
  point_colors = np.zeros((num_points, 3))
  for i in range(num_points):
    point_colors[i] = np.array(color_map(cmap_norm(i)))[:3] * 255

  if infront_cameras is None:
    infront_cameras = np.ones_like(visibles).astype(bool)

  frames = []
  for t in range(num_frames):
    frame = video[t].copy()

    # Draw tracks on the frame
    line_tracks = points[max(0, t - tracks_leave_trace) : t + 1]
    line_visibles = visibles[max(0, t - tracks_leave_trace) : t + 1]
    line_infront_cameras = infront_cameras[max(0, t - tracks_leave_trace) : t + 1]
    for s in range(line_tracks.shape[0] - 1):
      img = frame.copy()

      for i in range(num_points):
        if line_visibles[s, i] and line_visibles[s + 1, i]:  # visible
          x1, y1 = int(round(line_tracks[s, i, 0])), int(round(line_tracks[s, i, 1]))
          x2, y2 = int(round(line_tracks[s + 1, i, 0])), int(round(line_tracks[s + 1, i, 1]))
          cv2.line(frame, (x1, y1), (x2, y2), point_colors[i], 1, cv2.LINE_AA)
        elif show_occ and line_infront_cameras[s, i] and line_infront_cameras[s + 1, i]:  # occluded
          x1, y1 = int(round(line_tracks[s, i, 0])), int(round(line_tracks[s, i, 1]))
          x2, y2 = int(round(line_tracks[s + 1, i, 0])), int(round(line_tracks[s + 1, i, 1]))
          cv2.line(frame, (x1, y1), (x2, y2), point_colors[i], 1, cv2.LINE_AA)

      alpha = (s + 1) / (line_tracks.shape[0] - 1)
      frame = cv2.addWeighted(frame, alpha, img, 1 - alpha, 0)

    # Draw end points on the frame
    for i in range(num_points):
      if visibles[t, i]:  # visible
        x, y = int(round(points[t, i, 0])), int(round(points[t, i, 1]))
        cv2.circle(frame, (x, y), 2, point_colors[i], -1)
      elif show_occ and infront_cameras[t, i]:  # occluded
        x, y = int(round(points[t, i, 0])), int(round(points[t, i, 1]))
        cv2.circle(frame, (x, y), 2, point_colors[i], 1)

    frames.append(frame)
  frames = np.stack(frames)
  return frames


video2d_viz = plot_2d_tracks(video, tracks_xy, visibility, infront_cameras, show_occ=True)
resized_video2d_viz = media.resize_video(video2d_viz, (480, 640))
media.show_video(resized_video2d_viz, fps=15)

0
This browser does not support the video tag.


In [None]:
# @title Visualize 3D point trajectories (takes a long time if there are lots of trajectories...)!

from matplotlib.figure import Figure
from matplotlib.backends.backend_agg import FigureCanvasAgg

def plot_3d_tracks(points, visibles, infront_cameras=None, tracks_leave_trace=16, show_occ=False):
  """Visualize 3D point trajectories."""
  num_frames, num_points = points.shape[0:2]

  color_map = matplotlib.colormaps.get_cmap('hsv')
  cmap_norm = matplotlib.colors.Normalize(vmin=0, vmax=num_points - 1)

  if infront_cameras is None:
    infront_cameras = np.ones_like(visibles).astype(bool)

  if show_occ:
    x_min, x_max = np.min(points[infront_cameras, 0]), np.max(points[infront_cameras, 0])
    y_min, y_max = np.min(points[infront_cameras, 2]), np.max(points[infront_cameras, 2])
    z_min, z_max = np.min(points[infront_cameras, 1]), np.max(points[infront_cameras, 1])
  else:
    x_min, x_max = np.min(points[visibles, 0]), np.max(points[visibles, 0])
    y_min, y_max = np.min(points[visibles, 2]), np.max(points[visibles, 2])
    z_min, z_max = np.min(points[visibles, 1]), np.max(points[visibles, 1])

  interval = np.max([x_max - x_min, y_max - y_min, z_max - z_min])
  x_min = (x_min + x_max) / 2 - interval / 2
  x_max = x_min + interval
  y_min = (y_min + y_max) / 2 - interval / 2
  y_max = y_min + interval
  z_min = (z_min + z_max) / 2 - interval / 2
  z_max = z_min + interval

  frames = []
  for t in range(num_frames):
    fig = Figure(figsize=(6.4, 4.8))
    canvas = FigureCanvasAgg(fig)
    ax = fig.add_subplot(111, projection='3d', computed_zorder=False)

    ax.set_xlim([x_min, x_max])
    ax.set_ylim([y_min, y_max])
    ax.set_zlim([z_min, z_max])

    ax.set_xticklabels([])
    ax.set_yticklabels([])
    ax.set_zticklabels([])

    ax.invert_zaxis()
    ax.view_init()

    for i in range(num_points):
      if visibles[t, i] or (show_occ and infront_cameras[t, i]):
        color = color_map(cmap_norm(i))
        line = points[max(0, t - tracks_leave_trace) : t + 1, i]
        ax.plot(xs=line[:, 0], ys=line[:, 2], zs=line[:, 1], color=color, linewidth=1)
        end_point = points[t, i]
        ax.scatter(xs=end_point[0], ys=end_point[2], zs=end_point[1], color=color, s=3)

    fig.subplots_adjust(left=-0.05, right=1.05, top=1.05, bottom=-0.05)
    fig.canvas.draw()
    frames.append(canvas.buffer_rgba())
  return np.array(frames)[..., :3]


video3d_viz = plot_3d_tracks(tracks_xyz, visibility, infront_cameras, show_occ=True)
resized_video3d_viz = media.resize_video(video3d_viz, (480, 640))
media.show_video(resized_video3d_viz, fps=15)

0
This browser does not support the video tag.


In [None]:
#@title Visualize 2D and 3D tracks side-by-side

resized_video2d_viz = media.resize_video(video2d_viz, (480, 640))
resized_video3d_viz = media.resize_video(video3d_viz, (480, 640))

media.show_video(np.concatenate([resized_video2d_viz, resized_video3d_viz], axis=2), fps=15)

0
This browser does not support the video tag.


In [None]:
# @title Visualize the point trajectories interactively in 3D (might require zooming in and out to get a clear visual with all points in viewing frame)

def create_axis(scene, n_lines=10, min_x = -2, max_x = 2, min_y = -1.5, max_y = 1.5, min_z = 1, max_z = 5):
  x_plane = scene.create_mesh("xplane")
  vert_start_XYZ = np.stack((min_x*np.ones(n_lines), min_y * np.ones(n_lines), np.linspace(min_z,max_z,n_lines)), axis=-1)
  vert_end_XYZ = np.stack((min_x*np.ones(n_lines), max_y * np.ones(n_lines), np.linspace(min_z,max_z,n_lines)), axis=-1)
  horiz_start_XYZ = np.stack((min_x*np.ones(n_lines), np.linspace(min_y,max_y,n_lines), min_z*np.ones(n_lines)), axis=-1)
  horiz_end_XYZ = np.stack((min_x*np.ones(n_lines), np.linspace(min_y,max_y,n_lines), max_z*np.ones(n_lines)), axis=-1)
  x_plane.add_lines(np.concatenate((vert_start_XYZ,horiz_start_XYZ), axis=0), np.concatenate((vert_end_XYZ,horiz_end_XYZ), axis=0), color=0.2*np.ones((3,1)))

  y_plane = scene.create_mesh("yplane")
  vert_start_XYZ = np.stack((min_x*np.ones(n_lines), max_y * np.ones(n_lines), np.linspace(min_z,max_z,n_lines)), axis=-1)
  vert_end_XYZ = np.stack((max_x*np.ones(n_lines), max_y * np.ones(n_lines), np.linspace(min_z,max_z,n_lines)), axis=-1)
  horiz_start_XYZ = np.stack((np.linspace(min_x,max_x,n_lines), max_y * np.ones(n_lines), min_z*np.ones(n_lines)), axis=-1)
  horiz_end_XYZ = np.stack((np.linspace(min_x,max_x,n_lines), max_y * np.ones(n_lines), max_z*np.ones(n_lines)), axis=-1)
  y_plane.add_lines(np.concatenate((vert_start_XYZ,horiz_start_XYZ), axis=0), np.concatenate((vert_end_XYZ,horiz_end_XYZ), axis=0), color=0.2*np.ones((3,1)))

  z_plane = scene.create_mesh("zplane")
  vert_start_XYZ = np.stack((np.linspace(min_x,max_x,n_lines), min_y * np.ones(n_lines), max_z*np.ones(n_lines)), axis=-1)
  vert_end_XYZ = np.stack((np.linspace(min_x,max_x,n_lines), max_y * np.ones(n_lines), max_z*np.ones(n_lines)), axis=-1)
  horiz_start_XYZ = np.stack((min_x * np.ones(n_lines), np.linspace(min_y,max_y,n_lines), max_z*np.ones(n_lines)), axis=-1)
  horiz_end_XYZ = np.stack((max_x * np.ones(n_lines), np.linspace(min_y,max_y,n_lines), max_z*np.ones(n_lines)), axis=-1)
  z_plane.add_lines(np.concatenate((vert_start_XYZ,horiz_start_XYZ), axis=0), np.concatenate((vert_end_XYZ,horiz_end_XYZ), axis=0), color=0.2*np.ones((3,1)))

  return x_plane, y_plane, z_plane

def get_interactive_3d_visualization(XYZ, h, w, fx, fy, cx, cy, framerate=15):
  n_frames, n_points = XYZ.shape[:2]
  fov_y = (np.arctan2(h/2, fy) * 180 / np.pi) * 2

  # compute colors
  cm = plt.get_cmap('hsv')
  Y = XYZ[0,:,1]
  XYZ = XYZ[:,np.argsort(Y),:]
  colors = cm(np.linspace(0,1,n_points))[:,:3]

  # create scene
  scene = sp.Scene()
  scene.framerate = framerate
  camera = sp.Camera(center=np.zeros(3), aspect_ratio=w/h, fov_y_degrees=fov_y, look_at=np.array([0.,0.,1.]), up_dir=np.array([0.,-1.,0.]))
  canvas = scene.create_canvas_3d(width=w, height=h, shading=sp.Shading(bg_color=sp.Colors.White), camera=camera)

  # create axis and frustrum
  x_plane, y_plane, z_plane = create_axis(scene)
  frustrum = scene.create_mesh("frustrum")
  frustrum.add_camera_frustum(camera, sp.Colors.Red, depth=0.5, thickness=0.002)

  # create track spheres
  spheres = scene.create_mesh("spheres")
  spheres.add_sphere(sp.Colors.White, transform=sp.Transforms.Scale(0.02))
  spheres.enable_instancing(XYZ[0], colors=colors)

  # create track trails
  lines_t = []
  for t in range(1, n_frames):
    start_XYZ = XYZ[t-1]
    end_XYZ = XYZ[t]
    start_colors = colors
    end_colors = colors
    mesh = scene.create_mesh(f"lines_{t}")
    mesh.add_lines(np.concatenate((start_XYZ, start_colors), axis=-1), np.concatenate((end_XYZ, end_colors), axis=-1))
    lines_t.append(mesh)

  # create scene frames
  for i in range(n_frames-1):
    frame = canvas.create_frame()
    frame.add_mesh(frustrum)
    for j in range(max(0, i-10), i):
      frame.add_mesh(lines_t[j])
    spheres_updated = scene.update_instanced_mesh("spheres", XYZ[i], colors=colors)
    frame.add_mesh(spheres_updated)
    frame.add_mesh(x_plane)
    frame.add_mesh(y_plane)
    frame.add_mesh(z_plane)

  scene.quantize_updates()

  # generate html
  SP_LIB = sp.js_lib_src()
  SP_SCRIPT = scene.get_script().replace(
      'window.onload = function()', 'function scenepic_main_function()'
  )
  HTML_string = (
      '<!DOCTYPE html>'
      '<html lang="en">'
      '<head>'
        '<meta charset="utf-8">'
        '<title>ScenePic </title>'
        f'<script>{SP_LIB}</script>'
        # '<script type="text/javascript" src="scenepic.min.js"></script>'
        f'<script>{SP_SCRIPT} scenepic_main_function();</script>'
      '</head>'
      f'<body onload="scenepic_main_function()"></body>'
      '</html>'
  )
  html_object = IPython.display.HTML(HTML_string)
  IPython.display.display(html_object)
  print('Press PLAY ▶ to start animation')
  print(' - Drag with mouse to rotate')
  print(' - Use mouse-wheel for zoom')
  print(' - Shift to pan')
  print(' - Use camera button 📷 to restore camera view')
get_interactive_3d_visualization(tracks_xyz, *video.shape[1:3], *intrinsics)


In [None]:
#@title Visualize camera extrinsics

import matplotlib.pyplot as plt
from scipy.spatial.transform import Rotation as R

def plot_camera_trajectory(camera_rotations, camera_positions, plot3d_elev=30, plot3d_azim=10, resolution=(256, 256)):
  num_frames = camera_positions.shape[0]

  # Convert quaternions to rotation matrices
  rotations = R.from_matrix(camera_rotations)
  camera_directions = rotations.apply(np.array([0, 0, -1]))  # assuming looking forward along -Z

  x_range = [min(camera_positions[..., 0]), max(camera_positions[..., 0])]
  y_range = [min(camera_positions[..., 1]), max(camera_positions[..., 1])]
  z_range = [min(camera_positions[..., 2]), max(camera_positions[..., 2])]

  differences = np.diff(camera_positions, axis=0)
  distances = np.linalg.norm(differences, axis=1)
  trajectory_length = np.sum(distances)
  quiver_len = trajectory_length*0.001

  dpi = 100
  figsize = (resolution[0] / dpi, resolution[1] / dpi)

  # Set up the plot
  fig = plt.figure(figsize=figsize, dpi=dpi)
  ax = fig.add_subplot(111, projection='3d')
  ax.set_xlim(x_range)
  ax.set_ylim(y_range)
  ax.set_zlim(z_range)
  ax.set_xlabel("X")
  ax.set_ylabel("Y")
  ax.set_zlabel("Z")
  ax.view_init(elev=plot3d_elev, azim=plot3d_azim)

  # Prepare frames for video
  frames = []
  for t in range(num_frames):
      ax.cla()
      ax.set_xlim(x_range)
      ax.set_ylim(y_range)
      ax.set_zlim(z_range)
      ax.set_xlabel("X")
      ax.set_ylabel("Y")
      ax.set_zlabel("Z")

      # Plot trajectory up to current frame
      ax.plot(camera_positions[:t+1, 0], camera_positions[:t+1, 1], camera_positions[:t+1, 2], 'b-', label='Camera Trajectory')

      # Plot camera position and orientation
      ax.quiver(camera_positions[t, 0], camera_positions[t, 1], camera_positions[t, 2],
                camera_directions[t, 0], camera_directions[t, 1], camera_directions[t, 2],
                color='r', length=quiver_len, normalize=True, label='Camera Orientation')

      # Capture the frame
      fig.canvas.draw()
      width, height = fig.canvas.get_width_height()
      try:
        rgb_array = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8).reshape(height, width, 3)
      except:
        rgb_array = np.frombuffer(fig.canvas.buffer_rgba(), dtype=np.uint8).reshape(height, width, 4)[:,:,:3]
      frames.append(rgb_array.copy())
  plt.close()
  return np.stack(frames)


if extrinsics_w2c is not None:
  extrinsics_c2w = np.linalg.inv(extrinsics_w2c)
  extrinsics_plot_video = plot_camera_trajectory(
      camera_rotations=extrinsics_c2w[:, :3, :3],
      camera_positions=extrinsics_c2w[:, :3, -1],
  )

  resized_video = media.resize_video(video, (256, 256))
  resized_extrinsics_plot_video = media.resize_video(extrinsics_plot_video, (256, 256))

  media.show_videos({
      "extrinsics": resized_extrinsics_plot_video,
      "rgb_video": resized_video,
    },
    fps=10,
    columns=5,
  )

else:
  print("No camera extrinsics available!")

  rgb_array = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8).reshape(height, width, 3)


0,1
extrinsics  This browser does not support the video tag.,rgb_video  This browser does not support the video tag.
