In [1]:
import sys
sys.path.remove('/home/aicenteruav/catkin_ws/devel/lib/python2.7/dist-packages')

import cv2
import pandas as pd
import matplotlib.pyplot as plt
import torchvision.transforms.functional as F
import torch
import numpy as np
import datasets.utils as utils
from datasets.scannetV2 import make_transforms
from util.box_ops import box_cxcywh_to_xyxy
from pathlib import Path

In [2]:
root = Path('./data/scannet')
assert root.exists(), f'provided ScanNet path {root} does not exist'

PATHS = {
    "train": (root / "train.txt", root / "scannetv2-labels.combined.tsv"),
    "val": (root / "val.txt", root / "scannetv2-labels.combined.tsv")
}

image_set='train'
data_list, tsv_map = PATHS[image_set]
with open(data_list, "r") as f:
    scene_list = f.read().splitlines()

### Check indexing

In [3]:
def get_data_Id(index, num_frames, num_sceneData):
    sum = 0
    for i, num_data in enumerate(num_sceneData):
        num_data -= num_frames - 1
        sum += num_data
        if sum > index:
            return i, index + (i + 1) * (num_frames - 1)

num_frames = 6
data_len = 0
num_sceneData = []
scene_start_index = []
for scene in scene_list:
    color_images, depth_images, labels, instances, poses, intrinsic = utils.get_filenames_scannet(root, scene)
    scene_start_index.append(data_len)
    data_len += len(color_images)
    num_sceneData.append(len(color_images)) 

index_len = data_len - (num_frames - 1) * len(num_sceneData)
id_dict = {'sceneId': [], 'dataId': []}
for index in range(index_len):
    sceneId, dataId = get_data_Id(index, num_frames, num_sceneData)
    id_dict["sceneId"].append(sceneId)
    id_dict["dataId"].append(dataId)

df = pd.DataFrame(data=id_dict)
print("num_frames: {}".format(num_frames))
print("num_sceneData: {}".format(num_sceneData))
print("scene_start_index: {}".format(scene_start_index))
df.loc[scene_start_index]
            

num_frames: 6
num_sceneData: [1020, 2160, 2056]
scene_start_index: [0, 1020, 3180]


Unnamed: 0,sceneId,dataId
0,0,5
1020,1,1030
3180,2,3195


### Convert to 3D coordinates

In [4]:
def plot_3d(xdata, ydata, zdata, color=None, b_min=2, b_max=8, view=(45, 45)):
    fig, ax = plt.subplots(subplot_kw={"projection": "3d"}, dpi=200)
    ax.view_init(view[0], view[1])

    ax.set_xlim(b_min, b_max)
    ax.set_ylim(b_min, b_max)
    ax.set_zlim(b_min, b_max)

    ax.scatter3D(xdata, ydata, zdata, c=color, cmap='rgb', s=0.1)

MAX_INDEX = len(color_images)  # take up to this index of images
SKIP = 50  # take one image of every SKIP to speed up the processing
SCENE = 2
check_coords = False

if check_coords:
    color_images, depth_images, labels, instances, poses, intrinsic_path = utils.get_filenames_scannet(root, scene_list[SCENE])
    coords_list = []
    rgb_list = []
    for i in range(0, MAX_INDEX, SKIP):
        rgb = utils.load_rgb(color_images[i])
        rgb = F.normalize(rgb / 255.0, [0.,0.,0.], [1.,1.,1.])
        rgb = F.resize(rgb, (480, 640))
        _, coordinates = utils.load_depth_coords(poses[i], depth_images[i], intrinsic_path, load_mode="coords")
        rgb_list.append(rgb)
        coords_list.append(coordinates)
    rgbs = torch.flatten(torch.stack(rgb_list, dim=1), start_dim=1)
    coords = torch.flatten(torch.stack(coords_list, dim=1), start_dim=1)

    print("rgbs shape: {}".format(rgbs.size()))
    print("coords shape: {}".format(coords.size()))
    plot_3d(coords[0], coords[1], coords[2], color=rgbs.T)

### Check Target

In [5]:
def check_masks_bboxes(insts, masks, bboxes):
    for i in range(len(insts)):
        image = masks[i].numpy().astype('uint8')
        bbox = bboxes[i].numpy()
        RGB_img = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) * 255
        cv2.rectangle(RGB_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 0, 0), 2)
        cv2.imshow('My Image', RGB_img)
        cv2.waitKey(0)
        cv2.destroyAllWindows()


SCENE = 2
check_target = False
if check_target:
    preprocessing_map = utils.get_preprocessing_map(tsv_map)
    _, _, label_paths, instance_paths, _, _ = utils.get_filenames_scannet(root, scene_list[SCENE])
    label_list = []
    for label, instance in zip(label_paths, instance_paths):
        target = utils.load_target(label, instance, preprocessing_map)
        check_masks_bboxes(target['insts'], target['masks'], target['boxes'])
        break

### Check data loader and transform

In [6]:
MAX_INDEX = len(color_images)  # take up to this index of images
SKIP = 50  # take one image of every SKIP to speed up the processing
SCENE = 2
load_mode='coords'

transform = make_transforms('train')
color_images, depth_images, labels, instances, poses, intrinsic_path = utils.get_filenames_scannet(
    root, scene_list[SCENE])
preprocessing_map = utils.get_preprocessing_map(tsv_map)
for rgb, depth, label, instance, pose in zip(color_images, depth_images, labels, instances, poses):
    path_set = dict({'rgb': rgb, 'depth': depth, 'pose': pose,
                     'label': label, 'instance': instance, 'intrinsic': intrinsic_path})
    rgb, depth, coords, target = utils.scannet_loader(path_set, load_mode, preprocessing_map)
    rgb, depth, coords, target = transform(rgb, depth, coords, target)
    print("rgb shape: {}".format(rgb.shape))
    print("depth shape: {}".format(depth.shape))
    print("coords shape: {}".format(coords.shape))
    print("rgb min: {}, max: {}".format(torch.min(rgb), torch.max(rgb)))
    print("depth min: {}, max: {}".format(torch.min(depth), torch.max(depth)))
    print("coords min: {}, max: {}".format(torch.min(coords), torch.max(coords)))
    bbox = torch.round(box_cxcywh_to_xyxy(target['boxes'] * torch.Tensor([640, 480, 640, 480])))
    check_masks_bboxes(target['insts'], target['masks'], bbox)
    break 
            

rgb shape: torch.Size([3, 480, 640])
depth shape: torch.Size([1, 480, 640])
coords shape: torch.Size([3, 480, 640])
rgb min: -2.1179039478302, max: 2.640000581741333
depth min: 0.0, max: 4.218999862670898
coords min: 0.11424517631530762, max: 10.029654502868652
torch.Size([13, 480, 640])
