In [None]:
from waymo_open_dataset import dataset_pb2

import json
import os

import numpy as np
import tensorflow as tf
from PIL import Image
from tqdm import tqdm
from waymo_open_dataset import label_pb2
from waymo_open_dataset.protos import camera_segmentation_pb2 as cs_pb2
from waymo_open_dataset.utils import box_utils
from waymo_open_dataset.utils.frame_utils import parse_range_image_and_camera_projection


import matplotlib.pyplot as plt

In [None]:
from pathlib import Path


WOPD_DATA_ROOT = Path("/media/nvme1/waymo_perception/training")


tfrecords_file_list = list(WOPD_DATA_ROOT.glob("*.tfrecord"))


In [None]:
import io
from pyquaternion import Quaternion

from d123.common.geometry.base import StateSE3
from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE3


# Frame attributes:
#   context: <class 'waymo_open_dataset.dataset_pb2.Context'>
#   timestamp_micros: <class 'int'>
#   pose: <class 'waymo_open_dataset.dataset_pb2.Transform'>
#   images: List with 5 images
#   lasers: <class 'google._upb._message.RepeatedCompositeContainer'>
#     Length: 5
#   laser_labels: <class 'google._upb._message.RepeatedCompositeContainer'>
#     Length: 0
#   projected_lidar_labels: <class 'google._upb._message.RepeatedCompositeContainer'>
#     Length: 0
#   camera_labels: <class 'google._upb._message.RepeatedCompositeContainer'>
#     Length: 0
#   no_label_zones: <class 'google._upb._message.RepeatedCompositeContainer'>
#     Length: 0
#   map_features: <class 'google._upb._message.RepeatedCompositeContainer'>
#     Length: 0
#   map_pose_offset: <class 'waymo_open_dataset.protos.vector_pb2.Vector3d'>

file_idx = 0
pathname = tfrecords_file_list[file_idx]
dataset = tf.data.TFRecordDataset(pathname, compression_type="")
num_frames = sum(1 for _ in dataset)


def read_jpg_image(data: bytes) -> np.ndarray:
    """Read a JPEG image from bytes and return it as a numpy array."""
    image = Image.open(io.BytesIO(data))
    return np.array(image)




ego_state_se3s = []
front_images = []
dataset = tf.data.TFRecordDataset(pathname, compression_type="")

boxes = []

for frame_idx, data in enumerate(dataset):

    frame = dataset_pb2.Frame()
    frame.ParseFromString(data.numpy())
    # print(frame.camera_labels)
    for label in frame.laser_labels:
        boxes.append(
            BoundingBoxSE3(
                center=StateSE3(
                    x=label.box.center_x,
                    y=label.box.center_y,
                    z=label.box.center_z,
                    pitch=0.0,
                    roll=0.0,
                    yaw=label.box.heading,
                ),
                length=label.box.length,
                width=label.box.width,
                height=label.box.height,
            )
        )

    print(frame.context)

    # Print all attributes of the frame
    # print("Frame attributes:")
    # for field in frame.DESCRIPTOR.fields:
    #     field_name = field.name
    #     if hasattr(frame, field_name):
    #         value = getattr(frame, field_name)
    #         if field_name != "images":  # Don't print the whole image data
    #             print(f"  {field_name}: {type(value)}")
    #             if hasattr(value, "__len__") and not isinstance(value, (str, bytes)):
    #                 print(f"    Length: {len(value)}")
    #         else:
    #             print(f"  {field_name}: List with {len(value)} images")

    # Print information about the first image if available
    # if frame.images:
    #     print("\nFirst image details:")
    #     first_image = frame.images[0]
    #     for field in first_image.DESCRIPTOR.fields:
    #         field_name = field.name
    #         if hasattr(first_image, field_name):
    #             value = getattr(first_image, field_name)
    #             if field_name != "image":  # Don't print the binary data
    #                 print(f"  {field_name}: {value}")
    #             else:
    #                 print(f"  {field_name}: binary data of length {len(value)} bytes")

    # for image in frame.images:
    # print(image.name)

    # print([image.name for image in frame.images])
    # print(frame.images[0])

    # # 1. pose
    pose = np.array(frame.pose.transform).reshape(4, 4)
    yaw_pitch_roll = Quaternion(matrix=pose[:3, :3]).yaw_pitch_roll
    ego_state_se3s.append(
        np.array(
            [
                pose[0, 3],  # x
                pose[1, 3],  # y
                pose[2, 3],  # z
                yaw_pitch_roll[2],  # yaw
                yaw_pitch_roll[1],  # pitch
                yaw_pitch_roll[0],  # roll
            ],
            dtype=np.float64,
        )
    )

    # # plt.show()
    if frame_idx == 0:
        break

ego_state_se3s = np.array(ego_state_se3s, dtype=np.float64)

In [None]:
for frame_idx, data in enumerate(dataset):
    frame = dataset_pb2.Frame()
    frame.ParseFromString(data.numpy())
    if frame_idx == 3:
        break

print("Ego")
ego_transform = np.array(frame.pose.transform).reshape(4, 4)
print(ego_transform[:3, 3])

print("Frame")
for image in frame.images:
    image_transform = np.array(image.pose.transform).reshape(4, 4)
    print(image.name, image_transform[:3, 3])

print("Context")
for image in frame.context.camera_calibrations:
    image_transform = np.array(image.extrinsic.transform).reshape(4, 4)
    print(image.name, image_transform[:3, 3])

# 1 [ 1.5441613  -0.02302364  2.11557864]
# 2 [1.49672397 0.0954948  2.11616463]
# 3 [ 1.49442485 -0.09637497  2.11519385]
# 4 [1.43213651 0.11612398 2.11625087]
# 5 [ 1.42936162 -0.11545043  2.1150792 ]

In [None]:

from d123.common.datatypes.time.time_point import TimePoint


for frame_idx, data in enumerate(dataset):
    frame = dataset_pb2.Frame()
    frame.ParseFromString(data.numpy())
    if frame_idx == 4:
        break
    break



# for calibration in frame.context.camera_calibrations:

frame.timestamp_micros, frame.images[0].pose_timestamp
# frame.images[0]

frame_timestamp = TimePoint.from_us(frame.timestamp_micros)
image_timestamp = TimePoint.from_s(frame.images[0].pose_timestamp)
frame_timestamp.time_s, frame_timestamp.time_s

In [None]:
frame = next((dataset_pb2.Frame().FromString(data.numpy()) or dataset_pb2.Frame() for data in dataset), None)
# if frame is None:
#     raise ValueError(f"No frames found in TFRecord {tf_record_path}")

frame

In [None]:
from d123.common.datatypes.detection.detection_types import DetectionType
from d123.common.visualization.color.default import BOX_DETECTION_CONFIG, EGO_VEHICLE_CONFIG
from d123.common.visualization.matplotlib.observation import add_bounding_box_to_ax


ego_rear_axle = StateSE3.from_array(ego_state_se3s[0])

ego_rear_axle = StateSE3.from_array(np.zeros_like(ego_state_se3s[0]))

ego_box = BoundingBoxSE3(center=ego_rear_axle, length=4.0, width=1.8, height=1.6)

plot_config = BOX_DETECTION_CONFIG[DetectionType.VEHICLE]
fig, ax = plt.subplots(1, 1, figsize=(10, 10))


for box in boxes:
    add_bounding_box_to_ax(ax, box, plot_config)


add_bounding_box_to_ax(ax, ego_box, EGO_VEHICLE_CONFIG)
ax.set_aspect("equal")

In [None]:

fig, ax = plt.subplots(figsize=(10, 10))
ax.plot(ego_state_se3s[:, 0], ego_state_se3s[:, 1])
ax.set_aspect('equal', adjustable='box')

In [None]:
import cv2

# Define the output video path
output_video_path = str(f"front_camera_video_{file_idx}.mp4")

# Get the dimensions of the first image
height, width, channels = front_images[0].shape

# Create the video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # MP4 codec
fps = 10  # 10 frames per second as requested
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

# Write each frame to the video
for img in tqdm(front_images, desc="Creating video"):
    # Convert from RGB to BGR (OpenCV uses BGR)
    img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    out.write(img_bgr)

# Release the video writer
out.release()

print(f"Video saved to {output_video_path}")

In [None]:
# import pyarrow as pa
import pandas as pd
# parquet_file = "/home/daniel/Downloads/testing_location_vehicle_pose_10084636266401282188_1120_000_1140_000.parquet"

parquet_file = "/home/daniel/Downloads/validation_stats_10203656353524179475_7625_000_7645_000.parquet"

df = pd.read_parquet(parquet_file)

df

In [None]:
array