In [1]:
import torch
import shutil
import os
import cv2
import numpy as np
import yaml
import re
from PIL import Image
from io import BytesIO
import torchvision.transforms as tf
import io
from einops import rearrange, repeat


In [2]:
cam = "02"
first_frame = 0
last_frame = 99
nb_frames = last_frame - first_frame + 1
target_im_folder = f"test_images_cam{cam}_{first_frame}_{last_frame}"
h_new = 360
w_new = 640
h_resize = 800
w_resize = 800

source_folder = f"/home/angelika/pixelsplat_fisheye/datasets/kitti360/2013_05_28_drive_0000_sync/image_{cam}/data_rgb"
target_folder = f"/home/angelika/pixelsplat_fisheye/datasets/kitti360/{target_im_folder}"

if not os.path.exists(target_folder):
    os.makedirs(target_folder)


# Get the list of filenames in the source folder
filenames = sorted(os.listdir(source_folder))
available_frames = []

# Iterate over the filenames and copy images to the destination folder
for i, filename in enumerate(filenames):
    if i < nb_frames:
        available_frames.append(i)
        source_path = os.path.join(source_folder, filename)
        filename_dest = filename
        destination_path = os.path.join(target_folder, filename_dest)
        
        shutil.copy2(source_path, destination_path)


In [3]:
# Copy camera poses
pose_file = f"/home/angelika/pixelsplat_fisheye/datasets/kitti360/2013_05_28_drive_0000_sync/all_cam{cam[1:]}_to_world.txt"

# Extract poses
cam2world = []

# Read cam2world matrices
for line in open(pose_file, 'r').readlines():
    value = list(map(float, line.strip().split(" ")))
    cam2world.append(np.array(value[1:]))

cam2world = np.array(cam2world)
cam2world = cam2world[first_frame:last_frame+1]
print(cam2world.shape)
# Save only the poses to a .txt file
with open(f'{target_folder}/camera_poses.txt', 'w') as file:
    for i, row in enumerate(cam2world):
        # line = [filenames[available_frames[i]]] + [str(x) for x in row]
        line = [str(available_frames[i])] + [str(x) for x in row]
        line = ' '.join(line) + '\n'
        file.write(line)

(100, 16)


In [4]:
image_dict = {}

image_dict['url'] = ""
image_dict['key'] = f"kitti360_cam{cam}_{first_frame}_{last_frame}"

timestamps = torch.as_tensor(available_frames)
image_dict['timestamps'] = timestamps

In [5]:
cam2world12 = torch.tensor(cam2world[:, :12])
two_zeros = torch.zeros((nb_frames, 2))
intrinsics = torch.zeros((nb_frames, 4))

In [6]:
# Calculate the world2cam matrix from cam2world12
# similary as in pixelsplat_data.ipynb.
### Code in pixelsplat_data.ipynb:
### w2c = repeat(torch.eye(4, dtype=torch.float32), "h w -> b h w", b=b).clone()   # shape: (256, 4, 4)
### w2c[:, :3] = rearrange(poses[:, 6:], "b (h w) -> b h w", h=3, w=4)  # shape: (256, 3, 4)
### extrinsics =  w2c.inverse()

w2c12 = repeat(torch.eye(4, dtype=torch.float32), "h w -> b h w", b=nb_frames).clone()
w2c12[:, :3] = rearrange(cam2world12, "nb_frames (h w) -> nb_frames h w", h=3, w=4)
world2cam123 = w2c12.inverse()
world2cam16 = world2cam123.reshape(nb_frames, 16)
world2cam12 = world2cam16[:, :12]

In [7]:
def readYAMLFile(fileName):
    '''make OpenCV YAML file compatible with python'''
    ret = {}
    skip_lines=1    # Skip the first line which says "%YAML:1.0". Or replace it with "%YAML 1.0"
    with open(fileName) as fin:
        for i in range(skip_lines):
            fin.readline()
        yamlFileOut = fin.read()
        myRe = re.compile(r":([^ ])")   # Add space after ":", if it doesn't exist. Python yaml requirement
        yamlFileOut = myRe.sub(r': \1', yamlFileOut)
        ret = yaml.safe_load(yamlFileOut)
    return ret

intrinsics_file = f'/home/angelika/datasets/kitti_360/calibration/image_{cam}.yaml'
intrinsics_dict = readYAMLFile(intrinsics_file)

h_old = intrinsics_dict['image_height']
w_old = intrinsics_dict['image_width']

intrinsics[:, 0] = torch.tensor(intrinsics_dict['projection_parameters']['gamma1']).repeat(nb_frames) / w_old
intrinsics[:, 1] = torch.tensor(intrinsics_dict['projection_parameters']['gamma2']).repeat(nb_frames) / h_old
intrinsics[:, 2] = torch.tensor(intrinsics_dict['projection_parameters']['u0']).repeat(nb_frames) / w_old
intrinsics[:, 3] = torch.tensor(intrinsics_dict['projection_parameters']['v0']).repeat(nb_frames) / h_old


In [8]:
# Camera parameters
poses = torch.cat((world2cam12, two_zeros, intrinsics), dim=1)
poses.shape

image_dict['cameras'] = poses

In [9]:
# Convert images and save them to image_dict

flattened_images = []

for i, filename in enumerate(filenames):
    if i < nb_frames:
        source_path = os.path.join(source_folder, filename)
        image = Image.open(source_path)
        im_resize = image.resize((800, 800))
        im_crop = im_resize.crop((80, 220, 720, 580))
        
        # Convert image to bytes
        byte_stream = BytesIO()
        im_crop.save(byte_stream, format='PNG')  # Choose appropriate format, e.g., JPEG, PNG, etc.
        image_bytes = byte_stream.getvalue()
        byte_stream.close()

        # Convert bytes to tensor
        frameTensor = torch.tensor(np.frombuffer(image_bytes, dtype=np.uint8))
        
        flattened_images.append(frameTensor)
        

In [10]:
# Test one image
# byte_stream = BytesIO()
# image.save(byte_stream, format='PNG')  # Choose appropriate format, e.g., JPEG, PNG, etc.
# image_bytes = byte_stream.getvalue()
# byte_stream.close()

# frameTensor = torch.tensor(np.frombuffer(image_bytes, dtype=np.uint8))

# Check if the image is loaded correctly
# image2 = Image.open(BytesIO(frameTensor.numpy().tobytes()))
# image2

In [11]:
# Save the images
image_dict['images'] = flattened_images

# Save the image_dict to a .torch file
data = [image_dict]

image_dict_name = "000000"
torch.save(data, f'{target_folder}/{image_dict_name}.torch')
torch.save(data, f'/home/angelika/pixelsplat_fisheye/datasets/kitti360/test/{image_dict_name}.torch')


In [12]:
image_dict

{'url': '',
 'key': 'kitti360_cam02_0_99',
 'timestamps': tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
         18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
         36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
         54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
         72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
         90, 91, 92, 93, 94, 95, 96, 97, 98, 99]),
 'cameras': tensor([[-3.5136e-01, -9.3494e-01,  4.9443e-02,  ...,  9.5413e-01,
           5.1210e-01,  5.0412e-01],
         [-3.5136e-01, -9.3494e-01,  4.9443e-02,  ...,  9.5413e-01,
           5.1210e-01,  5.0412e-01],
         [-3.4567e-01, -9.3705e-01,  4.9520e-02,  ...,  9.5413e-01,
           5.1210e-01,  5.0412e-01],
         ...,
         [ 8.5305e-01, -5.2184e-01,  4.3954e-04,  ...,  9.5413e-01,
           5.1210e-01,  5.0412e-01],
         [ 8.6450e-01, -5.0263e-01,  1.413