In [None]:
import os
import glob
from pathlib import Path
import numpy as np
import pyarrow as pa
import pyarrow.ipc as ipc
from PIL import Image

In [None]:
def process_images_to_arrow(input_folder, output_file, batch_size=100):
    """
    Process JPEG images from a folder and save them as arrays in Arrow IPC format.

    Args:
        input_folder (str): Path to folder containing JPEG images
        output_file (str): Path to output Arrow IPC file
        batch_size (int): Number of images to process in each batch
    """

    # Get all JPEG files in the folder
    jpeg_extensions = ["*.jpg", "*.jpeg", "*.JPG", "*.JPEG"]
    image_files = []

    for ext in jpeg_extensions:
        image_files.extend(glob.glob(os.path.join(input_folder, ext)))

    if not image_files:
        print(f"No JPEG files found in {input_folder}")
        return

    print(f"Found {len(image_files)} JPEG files")

    # Create Arrow schema
    schema = pa.schema(
        [
            # pa.field("filename", pa.string()),
            pa.field("image_array", pa.binary()),
            # pa.field("height", pa.int32()),
            # pa.field("width", pa.int32()),
            # pa.field("channels", pa.int32()),
        ]
    )

    # Open Arrow IPC writer
    with open(output_file, "wb") as f:
        writer = ipc.new_file(f, schema)

        # Process images in batches
        for i in range(0, len(image_files), batch_size):
            batch_files = image_files[i : i + batch_size]

            # Lists to store batch data
            filenames = []
            image_arrays = []
            image_bytes = []
            heights = []
            widths = []
            channels = []

            print(f"Processing batch {i//batch_size + 1}/{(len(image_files)-1)//batch_size + 1}")

            for img_path in batch_files:
                try:
                    # Load image
                    # with Image.open(img_path) as img:
                    #     # Convert to RGB if not already
                    #     if img.mode != "RGB":
                    #         img = img.convert("RGB")
                    with open(img_path, "rb") as f:
                        jpg_bytes = f.read()

                        # Convert to numpy array
                        # img_array = np.array(img)

                        # Store image data
                        filenames.append(os.path.basename(img_path))
                        image_bytes.append(jpg_bytes)
                        # image_arrays.append(img_array.flatten().tolist())
                        # heights.append(img_array.shape[0])
                        # widths.append(img_array.shape[1])
                        # channels.append(img_array.shape[2])

                except Exception as e:
                    print(f"Error processing {img_path}: {e}")
                    continue

            # Create Arrow arrays for this batch
            if filenames:  # Only create batch if we have valid images
                batch_data = pa.record_batch(
                    [
                        # pa.array(filenames),
                        pa.array(image_bytes),
                        # pa.array(heights),
                        # pa.array(widths),
                        # pa.array(channels),
                    ],
                    schema=schema,
                )

                # Write batch to file
                writer.write_batch(batch_data)

        writer.close()

    print(f"Successfully saved images to {output_file}")


def read_arrow_file(arrow_file):
    """
    Read and display info about the Arrow IPC file.

    Args:
        arrow_file (str): Path to Arrow IPC file
    """
    with open(arrow_file, "rb") as f:
        reader = ipc.open_file(f)

        print(f"Schema: {reader.schema}")
        print(f"Number of record batches: {reader.num_record_batches}")

        total_images = 0
        for i in range(reader.num_record_batches):
            batch = reader.get_batch(i)
            total_images += len(batch)
            print(f"Batch {i}: {len(batch)} images")

        print(f"Total images: {total_images}")

        # Show first few filenames as example
        if reader.num_record_batches > 0:
            first_batch = reader.get_batch(0)
            print(f"First few filenames: {first_batch['filename'][:5].to_pylist()}")


def reconstruct_image(arrow_file, filename, output_path):
    """
    Reconstruct and save an image from the Arrow file.

    Args:
        arrow_file (str): Path to Arrow IPC file
        filename (str): Name of the image file to reconstruct
        output_path (str): Path to save reconstructed image
    """
    with open(arrow_file, "rb") as f:
        reader = ipc.open_file(f)

        for i in range(reader.num_record_batches):
            batch = reader.get_batch(i)

            # Find the image by filename
            filenames = batch["filename"].to_pylist()
            if filename in filenames:
                idx = filenames.index(filename)

                # Get image data
                img_array = batch["image_array"][idx].to_pylist()
                height = batch["height"][idx].as_py()
                width = batch["width"][idx].as_py()
                channels = batch["channels"][idx].as_py()

                # Reconstruct image
                img_array = np.array(img_array, dtype=np.uint8)
                img_array = img_array.reshape(height, width, channels)

                # Save image
                img = Image.fromarray(img_array)
                img.save(output_path)
                print(f"Image reconstructed and saved to {output_path}")
                return

        print(f"Image {filename} not found in Arrow file")

In [None]:
# process_images_to_arrow("/mnt/nvme/nuplan/dataset/nuplan-v1.1/sensor/2021.07.01.20.35.47_veh-38_00016_00281/CAM_F0", "test.arrow", 10)

In [None]:
# Read IPC file
import io

import time

# with pa.OSFile("test.arrow", 'rb') as source:
#     with ipc.open_file(source) as reader:
#         table = reader.read_all()

with pa.ipc.open_file(
    pa.memory_map("/home/daniel/asim_workspace/data/nuplan_private_test/2021.07.25.16.16.23_veh-26_02446_02589.arrow")
) as reader:
    # This doesn't load data into memory yet!
    table = reader.read_all()


print(len(table))
start = time.time()
# Extract JPG data
jpg_data = table["front_cam_demo"][500].as_py()
read_image = Image.open(io.BytesIO(jpg_data))

# read_image = read_image.convert("RGB")  # Ensure it's in RGB format
read_image = np.array(read_image)
print(read_image.dtype)
print(f"Image loaded in {time.time() - start:.4f} seconds")

import matplotlib.pyplot as plt

plt.imshow(read_image)

In [None]:
from nuplan.database.nuplan_db_orm.nuplandb import NuPlanDB

NUPLAN_DATA_ROOT = Path(os.environ["NUPLAN_DATA_ROOT"])
log_path = "/mnt/nvme/nuplan/dataset/nuplan-v1.1/splits/private_test/2021.09.29.17.35.58_veh-44_01671_01819.db"
log_db = NuPlanDB(NUPLAN_DATA_ROOT, str(log_path), None)


log_db.image

In [None]:
import pickle
from nuplan.database.nuplan_db.nuplan_scenario_queries import get_images_from_lidar_tokens, get_cameras
from nuplan.planning.scenario_builder.nuplan_db.nuplan_scenario import NuPlanScenario, CameraChannel, LidarChannel
from pyquaternion import Quaternion


NUPLAN_DB_PATH = "/mnt/nvme/nuplan/dataset/nuplan-v1.1/splits/private_test"


def get_log_cam_info(log):
    log_name = log.logfile
    log_file = os.path.join(NUPLAN_DB_PATH, log_name + ".db")

    log_cam_infos = {}
    for cam in get_cameras(log_file, [str(CameraChannel.CAM_F0.value)]):
        intrinsics = np.array(pickle.loads(cam.intrinsic))
        translation = np.array(pickle.loads(cam.translation))
        rotation = np.array(pickle.loads(cam.rotation))
        print(rotation)
        rotation = Quaternion(rotation).rotation_matrix
        distortion = np.array(pickle.loads(cam.distortion))
        c = dict(
            intrinsic=intrinsics,
            distortion=distortion,
            translation=translation,
            rotation=rotation,
        )
        log_cam_infos[cam.token] = c

    return log_cam_infos


images = []
for lidar_pc in log_db.lidar_pc[::2]:

    front_image = get_images_from_lidar_tokens(log_path, [lidar_pc.token], [str(CameraChannel.CAM_F0.value)])
    parameters = get_log_cam_info(log_db.log)
    print(parameters)

    images.append(list(front_image))


for image in images[0]:
    print(image)

In [None]:
parameters["0872b6c896e85f9f"]["rotation"]


# intrinsics = np.array([[1.545e03, 0.000e00, 9.600e02], [0.000e00, 1.545e03, 5.600e02], [0.000e00, 0.000e00, 1.000e00]])
# distortion = np.array([-0.356123, 0.172545, -0.00213, 0.000464, -0.05231])
# translation = np.array([ 1.66433035e+00, -1.32379618e-03,  1.57190200e+00])
# rotation = np.array(
#     [
#         [-0.00395669, -0.03969443, 0.99920403],
#         [-0.99971496, -0.02336898, -0.00488707],
#         [0.02354437, -0.99893856, -0.03959065],
#     ]
# )
# distortion

np.array(
    [
        [-0.00395669, -0.03969443, 0.99920403],
        [-0.99971496, -0.02336898, -0.00488707],
        [0.02354437, -0.99893856, -0.03959065],
    ]
)
np.array(
    [
        [-0.00395669, -0.03969443, 0.99920403],
        [-0.99971496, -0.02336898, -0.00488707],
        [0.02354437, -0.99893856, -0.03959065],
    ]
)

In [None]:
import pickle


for cam in get_cameras(log_path, [str(channel.value) for channel in CameraChannel]):
    print(pickle.loads(cam.translation))
    print(pickle.loads(cam.translation))

In [None]:
9.600e02, 1920 / 2

In [None]:
import cv2

sensor_root = Path("/mnt/nvme/nuplan/dataset/nuplan-v1.1/sensor")

frames = []
for image in images:
    if len(image) == 0:
        continue

    jpg_name = image[0].filename_jpg
    jpg_path = sensor_root / jpg_name
    with open(jpg_path, "rb") as f:
        jpg_data = f.read()
    read_image = Image.open(io.BytesIO(jpg_data))
    read_image = np.array(read_image)
    # Convert RGB to BGR for OpenCV
    frame = cv2.cvtColor(read_image, cv2.COLOR_RGB2BGR)
    frames.append(frame)

# Define video writer
height, width, _ = frames[0].shape
out = cv2.VideoWriter(f"{log_db.name}.mp4", cv2.VideoWriter_fourcc(*"mp4v"), 20, (width, height))

for frame in frames:
    out.write(frame)
out.release()
print("Video saved as output.mp4")