In [None]:
import os
import sys
from copy import deepcopy

project_dir = os.path.dirname(os.getcwd())
print(project_dir)
sys.path.append(project_dir)

import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
from PIL import Image
import torch
from torchvision.transforms import ToPILImage, ToTensor, Normalize
from training.dataset.transforms import ComposeAPI, NormalizeAPI
from tqdm import tqdm

from dataset.collate_fn import collate_fn
from dataset.mini_dataset import MiniDataset

In [None]:
# Dataset
object_labels = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 24, 25, 26, 27, 28, 29]
len_video = 1
input_image_size = 512
batch_size = 1
shuffle = False
multiview = True
mean = [0.3551, 0.3500, 0.3469]
std = [0.2921, 0.2716, 0.2742]
transforms = [ComposeAPI([NormalizeAPI(mean=mean, std=std, v2=True)])]
revert_mean=[-.3551/.2921, -.3500/.2716, -.3469/.2742]
revert_std=[1/.2921, 1/.2716, 1/.2742]
revert_transform = Normalize(mean=revert_mean, std=revert_std)
test_dataset = MiniDataset('train',
                           num_frames=len_video,
                           input_image_size=input_image_size,
                           object_labels=object_labels,
                           transforms=transforms,
                           collate_fn=collate_fn,
                           batch_size=batch_size,
                           shuffle=shuffle,
                           multiview=multiview,)
print(f'Lenght of the dataset! {len(test_dataset)}')

In [None]:
idx = 3 * 1000
test_dataset.images[idx], test_dataset.images[idx+1], test_dataset.images[idx+2], len(test_dataset.images), len(test_dataset)
test_dataset.segmentation_masks[idx], test_dataset.segmentation_masks[idx+1], test_dataset.segmentation_masks[idx+2], len(test_dataset.segmentation_masks), len(test_dataset)

In [None]:
for i in range(0, len(test_dataset.images), 3):
    # print(f'Image {i}: {test_dataset.images[i]}')
    # print(f'Image {i+1}: {test_dataset.images[i+1]}')
    # print(f'Image {i+2}: {test_dataset.images[i+2]}')
    # print('---')
    name1 = str(test_dataset.images[i][0]).split('/')[-1].split('_')[0]
    timestamp1 = str(test_dataset.images[i][0]).split('/')[-1].split('_')[1]
    assert name1 == 'camera01'
    name2 = str(test_dataset.images[i+1][0]).split('/')[-1].split('_')[0]
    timestamp2 = str(test_dataset.images[i+1][0]).split('/')[-1].split('_')[1]
    assert name2 == 'camera04'
    name3 = str(test_dataset.images[i+2][0]).split('/')[-1].split('_')[0]
    timestamp3 = str(test_dataset.images[i+2][0]).split('/')[-1].split('_')[1]
    assert name3 == 'camera05'
    assert timestamp1 == timestamp2 == timestamp3

Multiview Point Cloud Registration

In [None]:
cam_int_ext = [(mtrx[0].numpy(), mtrx[1].numpy()) for mtrx in test_dataset.camera_features['001_PKA']]

In [None]:
cam_int = [mtrx[0] for mtrx in cam_int_ext]
cam_ext = [mtrx[1] for mtrx in cam_int_ext]

In [None]:
def process_img(mv_idx, img_indices):
    # Get image path
    img_paths = test_dataset.images[img_indices, :].squeeze(1)  # get image paths for the multiview indices
    # Load video data batch
    video_data_batch = test_dataset[mv_idx]
    # Inverse mean std normalization
    img0 = revert_transform(video_data_batch[0][0].frames[0].data).numpy()
    img1 = revert_transform(video_data_batch[0][1].frames[0].data).numpy()
    img2 = revert_transform(video_data_batch[0][2].frames[0].data).numpy()
    img = (img0, img1, img2)
    # Depth path
    depth_images = []
    for img_path in img_paths:
        last_part = str(img_path.parts[-1].split('/')[-1].replace('.jpg', '.tiff').replace('color', 'depth'))
        depth_path = img_path.parents[1] / 'depthimage' / last_part
        # Load depth image
        gt_depth_image = Image.open(depth_path)
        # Resize depth image to 512x512
        gt_depth_image = gt_depth_image.resize((gt_depth_image.size[0]//2, gt_depth_image.size[1]//2))
        depth_images.append(np.array(gt_depth_image))
    return img, depth_images, depth_path

In [None]:
len_dataset = len(test_dataset)
print(f'Length of the dataset: {len_dataset}')
mv_indices = [700, 1115, 1442]
img_views = []
depth_views = []

for mv_idx in tqdm(mv_indices):
    # Get image path and camera index
    img_indices = [mv_idx * 3 + kk for kk in range(3)]
    img, gt_depth_image, depth_path = process_img(mv_idx, img_indices)
    img = np.stack(img, axis=0)  # Stack images along a new dimension
    gt_depth_image = np.stack(gt_depth_image, axis=0)  # Stack depth images along a new dimension
    img_views.append(img)
    depth_views.append(gt_depth_image)

In [None]:
_flag_dump = False
if _flag_dump:
    with open('../temp/mv_img_views.npy', 'wb') as f:
        np.save(f, np.array(img_views))
    with open('../temp/mv_depth_views.npy', 'wb') as f:
        np.save(f, np.array(depth_views))
    with open('../temp/camera_int.npy', 'wb') as f:
        np.save(f, np.array(cam_int))
    with open('../temp/camera_ext.npy', 'wb') as f:
        np.save(f, np.array(cam_ext))

Remove Black Padding

In [None]:
import open3d as o3d

cam_imgs = np.array(img_views)
depth_imgs = np.array(depth_views)
camera_ext = np.array(cam_ext)
camera_int = np.array(cam_int)

In [None]:
H,W = depth_imgs.shape[-2:]
buffer = (W-H) // 2
cam_imgs = cam_imgs[:,:,:, buffer:-buffer, :]
assert cam_imgs.shape[-2:] == depth_imgs.shape[-2:]

Remove Padding in Intrinsics

In [None]:
camera_int[:, 1, 2] = camera_int[:, 1, 2] - buffer

In [None]:
pcds = []
num_images = cam_imgs.shape[0]
num_images = 1

for idx in range(num_images):
  for view_idx in range(3):
    # Get images
    rgb = (cam_imgs[idx, view_idx].transpose(1,2,0) * 256).astype(np.uint8)
    rgb = np.ascontiguousarray(rgb)
    d_tmp = depth_imgs[idx, view_idx]
    rgb = o3d.geometry.Image(rgb)
    d = o3d.geometry.Image(d_tmp)
    # Get extrinsics
    extrinsics = camera_ext[view_idx]
    # Get intrinsics
    intrinsics = camera_int[view_idx]
    intrinsic_o3d = o3d.camera.PinholeCameraIntrinsic(width=W, height=H, fx=intrinsics[0, 0], fy=intrinsics[1, 1], cx=intrinsics[0, 2], cy=intrinsics[1, 2])
    # Compute RGBD
    rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(rgb, d, convert_rgb_to_intensity=False, depth_scale=1, depth_trunc=10000000)
    # Create PC
    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd, intrinsic_o3d)
    pcd.points = o3d.utility.Vector3dVector(np.asarray(pcd.points) / 1000)
    # Rotate
    pcd.transform(extrinsics)
    pcds.append(pcd)

In [None]:
for i, pcd in enumerate(pcds):
    o3d.io.write_point_cloud(f"point_cloud_{i}.pcd", pcd)