In [126]:
import torch
import shutil
import os
import cv2
import numpy as np
import yaml
import re
from PIL import Image
from io import BytesIO
import torchvision.transforms as tf
import io

In [127]:
interval_size = 10
cam = "03"
first_frame = 0
source_folder = f"/home/angelika/pixelsplat_fisheye/datasets/kitti360/2013_05_28_drive_0000_sync/image_{cam}/data_rgb"

# Get the list of filenames in the source folder
filenames = sorted(os.listdir(source_folder))
last_frame = 1000
print(f"Number of frames: {last_frame}")
available_frames = []
nb_frames = last_frame - first_frame + 1
target_im_folder = f"test_images_cam{cam}_{first_frame}_{last_frame}_overlap_{interval_size}"
target_folder = f"/home/angelika/pixelsplat_fisheye/datasets/kitti360/{target_im_folder}"

if not os.path.exists(target_folder):
    os.makedirs(target_folder)

# Iterate over the filenames and copy images to the destination folder
for i, filename in enumerate(filenames):
    if i < nb_frames and i%interval_size==0:
        available_frames.append(i)
        source_path = os.path.join(source_folder, filename)
        filename_dest = filename
        destination_path = os.path.join(target_folder, filename_dest)
        shutil.copy2(source_path, destination_path)


Number of frames: 1000


In [128]:
# Copy camera poses
pose_file = f"/home/angelika/pixelsplat_fisheye/datasets/kitti360/2013_05_28_drive_0000_sync/all_cam{cam[1:]}_to_world.txt"

# Extract poses
cam2world = []

# Read cam2world matrices
for index,line in enumerate(open(pose_file, 'r').readlines()):
    if index % interval_size == 0:
        value = list(map(float, line.strip().split(" ")))
        cam2world.append(np.array(value[1:]))

cam2world = np.array(cam2world)
cam2world = cam2world[first_frame:last_frame+1]
print(cam2world.shape)
# Save only the poses to a .txt file
with open(f'{target_folder}/camera_poses.txt', 'w') as file:
    for i, row in enumerate(cam2world):
        if i % interval_size == 0:
            mapped_index = i // interval_size
            if mapped_index < len(available_frames): 
                line = [str(available_frames[mapped_index])] + [str(x) for x in row]
                line = ' '.join(line) + '\n'
                file.write(line)

(1001, 16)


In [129]:
image_dict = {}

image_dict['url'] = ""
image_dict['key'] = f"kitti360_cam{cam}_{first_frame}_{last_frame}"

timestamps = torch.as_tensor(available_frames)
image_dict['timestamps'] = timestamps

In [130]:
cam2world12 = torch.tensor(cam2world[:, :12])
two_zeros = torch.zeros((nb_frames, 2))
intrinsics = torch.zeros((nb_frames, 4))


In [131]:
def readYAMLFile(fileName):
    '''make OpenCV YAML file compatible with python'''
    ret = {}
    skip_lines=1    # Skip the first line which says "%YAML:1.0". Or replace it with "%YAML 1.0"
    with open(fileName) as fin:
        for i in range(skip_lines):
            fin.readline()
        yamlFileOut = fin.read()
        myRe = re.compile(r":([^ ])")   # Add space after ":", if it doesn't exist. Python yaml requirement
        yamlFileOut = myRe.sub(r': \1', yamlFileOut)
        ret = yaml.safe_load(yamlFileOut)
    return ret

intrinsics_file = f'/home/angelika/datasets/kitti_360/calibration/image_{cam}.yaml'
intrinsics_dict = readYAMLFile(intrinsics_file)

intrinsics[:, 0] = torch.tensor(intrinsics_dict['projection_parameters']['gamma1']).repeat(nb_frames)
intrinsics[:, 1] = torch.tensor(intrinsics_dict['projection_parameters']['gamma2']).repeat(nb_frames)
intrinsics[:, 2] = torch.tensor(intrinsics_dict['projection_parameters']['u0']).repeat(nb_frames)
intrinsics[:, 3] = torch.tensor(intrinsics_dict['projection_parameters']['v0']).repeat(nb_frames)


In [132]:
# Camera parameters
poses = torch.cat((cam2world12, two_zeros, intrinsics), dim=1)
poses.shape

image_dict['cameras'] = poses

In [133]:
# Convert images and save them to image_dict

flattened_images = []

for i, filename in enumerate(filenames):
    if i < nb_frames:
        source_path = os.path.join(source_folder, filename)
        image = Image.open(source_path)
        
        # Convert image to bytes
        byte_stream = BytesIO()
        image.save(byte_stream, format='PNG')  # Choose appropriate format, e.g., JPEG, PNG, etc.
        image_bytes = byte_stream.getvalue()
        byte_stream.close()

        # Convert bytes to tensor
        frameTensor = torch.tensor(np.frombuffer(image_bytes, dtype=np.uint8))
        
        flattened_images.append(frameTensor)
        

In [134]:
# Test one image
# byte_stream = BytesIO()
# image.save(byte_stream, format='PNG')  # Choose appropriate format, e.g., JPEG, PNG, etc.
# image_bytes = byte_stream.getvalue()
# byte_stream.close()

# frameTensor = torch.tensor(np.frombuffer(image_bytes, dtype=np.uint8))

# Check if the image is loaded correctly
# image2 = Image.open(BytesIO(frameTensor.numpy().tobytes()))
# image2

In [135]:
# Save the images
image_dict['images'] = flattened_images

# Save the image_dict to a .torch file
data = [image_dict]

image_dict_name = "000000"
torch.save(data, f'{target_folder}/{image_dict_name}.torch')
torch.save(data, f'/home/angelika/pixelsplat_fisheye/datasets/kitti360/test/{image_dict_name}.torch')


In [136]:
image_dict

{'url': '',
 'key': 'kitti360_cam03_0_1000',
 'timestamps': tensor([   0,   10,   20,   30,   40,   50,   60,   70,   80,   90,  100,  110,
          120,  130,  140,  150,  160,  170,  180,  190,  200,  210,  220,  230,
          240,  250,  260,  270,  280,  290,  300,  310,  320,  330,  340,  350,
          360,  370,  380,  390,  400,  410,  420,  430,  440,  450,  460,  470,
          480,  490,  500,  510,  520,  530,  540,  550,  560,  570,  580,  590,
          600,  610,  620,  630,  640,  650,  660,  670,  680,  690,  700,  710,
          720,  730,  740,  750,  760,  770,  780,  790,  800,  810,  820,  830,
          840,  850,  860,  870,  880,  890,  900,  910,  920,  930,  940,  950,
          960,  970,  980,  990, 1000]),
 'cameras': tensor([[ 4.0327e-01, -2.3195e-02, -9.1479e-01,  ...,  1.4849e+03,
           6.9888e+02,  6.9815e+02],
         [ 3.5016e-01, -2.7439e-02, -9.3629e-01,  ...,  1.4849e+03,
           6.9888e+02,  6.9815e+02],
         [ 2.8934e-01, -2.5256e