# Unseen Object Instance Segmentation

In tabletop environments!

In [None]:
import sys, os
import json
from time import time
import glob

import torch
import numpy as np
import matplotlib.pyplot as plt
import scipy
import scipy.io
import cv2

# My libraries
import src.data_loader as data_loader
import src.data_augmentation as data_augmentation
import src.segmentation as segmentation
import src.evaluation as evaluation
import src.util.utilities as util_
import src.util.flowlib as flowlib

os.environ['CUDA_VISIBLE_DEVICES'] = "0" # TODO: Change this if you have more than 1 GPU

In [None]:
def torch_to_numpy(torch_tensor, is_standardized_image = False):
    """ Converts torch tensor (NCHW) to numpy tensor (NHWC) for plotting
    
        If it's an rgb image, it puts it back in [0,255] range (and undoes ImageNet standardization)
    """
    np_tensor = torch_tensor.cpu().clone().detach().numpy()
    if np_tensor.ndim == 4: # NCHW
        np_tensor = np_tensor.transpose(0,2,3,1)
    if is_standardized_image:
        _mean=[0.485, 0.456, 0.406]; _std=[0.229, 0.224, 0.225]
        for i in range(3):
            np_tensor[...,i] *= _std[i]
            np_tensor[...,i] += _mean[i]
        np_tensor *= 255
            
    return np_tensor

## Depth Seeding Network parameters

In [None]:
dsn_params = {
    
    # Sizes
    'feature_dim' : 64,
    
    # algorithm parameters
    'lr' : 1e-2, # learning rate
    'iter_collect' : 20, # Collect results every _ iterations
    'max_iters' : 100000,
    
    # architecture parameters
    'use_coordconv' : False,

    # Loss function parameters
    'lambda_fg' : 1,
    'lambda_direction' : 1.,

    # Hough Voting parameters
    'skip_pixels' : 10, 
    'inlier_threshold' : 0.9, 
    'angle_discretization' : 100,
    'inlier_distance' : 20,
    'percentage_threshold' : 0.5, # this depends on skip_pixels, angle_discretization, inlier_distance. just gotta try it to see if it works
    'object_center_kernel_radius' : 10,

}

## Region Refinement Network parameters

In [None]:
rrn_params = {
    
    # Sizes
    'feature_dim' : 64,
    
    # algorithm parameters
    'lr' : 1e-2, # learning rate
    'iter_collect' : 20, # Collect results every _ iterations
    'max_iters' : 100000,
    
    # architecture parameters
    'use_coordconv' : False,
    
}

# Tabletop Segmentor parameters

In [None]:
tts_params = {
    
    # Padding for Region Refinement Network
    'padding_percentage' : 0.25,
    
    # Open/Close Morphology for IMP (Initial Mask Processing) module
    'use_open_close_morphology' : True,
    'open_close_morphology_ksize' : 9,
    
    # Closest Connected Component for IMP module
    'use_closest_connected_component' : True,
    
}
checkpoint_dir = '...' # TODO: change this to directory of downloaded models
dsn_filename = checkpoint_dir + 'DepthSeedingNetwork_TOD_checkpoint.pth'
rrn_filename = checkpoint_dir + 'RRN_TOD_checkpoint.pth'
tts_params['final_close_morphology'] = 'TOD' in rrn_filename
tabletop_segmentor = segmentation.TableTopSegmentor(tts_params, 
                                                    dsn_filename,
                                                    dsn_params,
                                                    rrn_filename,
                                                    rrn_params
                                                   )

## Run on example images

We provided some example RGB-D images of scenarios in our lab environments. The following code loads those images and runs the network on them.

In [None]:
example_images_dir = os.path.abspath('.') + '/example_images/'
image_files = sorted(glob.glob(example_images_dir + '/image_*.npy'))
N = len(image_files)

camera_params = json.load(open(example_images_dir + 'camera_params.json'))

rgb_imgs = np.zeros((N, 480, 640, 3), dtype=np.float32)
xyz_imgs = np.zeros((N, 480, 640, 3), dtype=np.float32)
for i, img_file in enumerate(image_files):
    d = np.load(img_file, allow_pickle=True, encoding='bytes').item()
    
    # RGB
    rgb_img = d['rgb']
    rgb_imgs[i] = data_augmentation.standardize_image(rgb_img)

    # Depth
    depth_img = d['depth']
    depth_img = (depth_img / 1000.).astype(np.float32) # millimeters -> meters

    # Compute xyz ordered point cloud
    xyz_img = data_loader.compute_xyz(depth_img,camera_params)
    xyz_imgs[i] = xyz_img
    
batch = {
    'rgb' : data_augmentation.array_to_tensor(rgb_imgs),
    'xyz' : data_augmentation.array_to_tensor(xyz_imgs),
}

In [None]:
print("Number of images: {0}".format(N))

### Compute segmentation masks ###
st_time = time()
fg_masks, direction_predictions, initial_masks, seg_masks = tabletop_segmentor.run_on_batch(batch)
total_time = time() - st_time
print('Total time taken for Segmentation: {0} seconds'.format(round(total_time, 3)))
print('FPS: {0}'.format(round(N / total_time,3)))

# Get results in numpy
seg_masks = seg_masks.cpu().numpy()
fg_masks = fg_masks.cpu().numpy()
direction_predictions = direction_predictions.cpu().numpy().transpose(0,2,3,1)
initial_masks = initial_masks.cpu().numpy()

In [None]:
rgb_imgs = torch_to_numpy(batch['rgb'].cpu(), is_standardized_image=True)
total_subplots = 6

fig_index = 1
for i in range(N):
    
    fig = plt.figure(fig_index); fig_index += 1
    fig.set_size_inches(20,5)

    # Plot image
    plt.subplot(1,total_subplots,1)
    plt.imshow(rgb_imgs[i,...].astype(np.uint8))
    plt.title('Image {0}'.format(i+1))

    # Plot Depth
    plt.subplot(1,total_subplots,2)
    plt.imshow(xyz_imgs[i,...,2])
    plt.title('Depth')
    
    # Plot initial table mask
    plt.subplot(1,total_subplots,3)
    plt.imshow(util_.get_color_mask(fg_masks[i,...]))
    plt.title("Foreground Table Mask")
    
    # Plot direction predictions
    plt.subplot(1,total_subplots,4)
    plt.imshow(flowlib.flow_to_image(direction_predictions[i,...]))
    plt.title("Center Direction Predictions")
    
    # Plot initial masks
    plt.subplot(1,total_subplots,5)
    plt.imshow(util_.get_color_mask(initial_masks[i,...]))
    plt.title(f"Initial Masks. #objects: {np.unique(initial_masks[i,...]).shape[0]-1}")
    
    # Plot Masks
    plt.subplot(1,total_subplots,6)
    plt.imshow(util_.get_color_mask(seg_masks[i,...]))
    plt.title(f"Refined Masks. #objects: {np.unique(seg_masks[i,...]).shape[0]-1}")   

## Run on example OSD/OCID images

We provide a few [OSD](https://www.acin.tuwien.ac.at/en/vision-for-robotics/software-tools/osd/) and [OCID](https://www.acin.tuwien.ac.at/en/vision-for-robotics/software-tools/object-clutter-indoor-dataset/) images and run the network on them. Evaluation metrics are shown for each of the images.

In [None]:
example_images_dir = os.path.abspath('.') + '/example_images/'

OSD_image_files = sorted(glob.glob(example_images_dir + '/OSD_*.npy'))
OCID_image_files = sorted(glob.glob(example_images_dir + '/OCID_*.npy'))
N = len(OSD_image_files) + len(OCID_image_files)

rgb_imgs = np.zeros((N, 480, 640, 3), dtype=np.float32)
xyz_imgs = np.zeros((N, 480, 640, 3), dtype=np.float32)
label_imgs = np.zeros((N, 480, 640), dtype=np.uint8)

for i, img_file in enumerate(OSD_image_files + OCID_image_files):
    d = np.load(img_file, allow_pickle=True, encoding='bytes').item()
    
    # RGB
    rgb_img = d['rgb']
    rgb_imgs[i] = data_augmentation.standardize_image(rgb_img)

    # XYZ
    xyz_imgs[i] = d['xyz']

    # Label
    label_imgs[i] = d['label']
    
batch = {
    'rgb' : data_augmentation.array_to_tensor(rgb_imgs),
    'xyz' : data_augmentation.array_to_tensor(xyz_imgs),
}

In [None]:
print("Number of images: {0}".format(N))

### Compute segmentation masks ###
st_time = time()
fg_masks, direction_predictions, initial_masks, seg_masks = tabletop_segmentor.run_on_batch(batch)
total_time = time() - st_time
print('Total time taken for Segmentation: {0} seconds'.format(round(total_time, 3)))
print('FPS: {0}'.format(round(N / total_time,3)))

# Get results in numpy
seg_masks = seg_masks.cpu().numpy()
fg_masks = fg_masks.cpu().numpy()
direction_predictions = direction_predictions.cpu().numpy().transpose(0,2,3,1)
initial_masks = initial_masks.cpu().numpy()

In [None]:
rgb_imgs = torch_to_numpy(batch['rgb'].cpu(), is_standardized_image=True)
total_subplots = 4

fig_index = 1
for i in range(N):
    
    fig = plt.figure(fig_index); fig_index += 1
    fig.set_size_inches(20,5)

    # Plot image
    plt.subplot(1,total_subplots,1)
    plt.imshow(rgb_imgs[i,...].astype(np.uint8))
    plt.title(f"Image {i+1}")

    # Plot Depth
    plt.subplot(1,total_subplots,2)
    plt.imshow(xyz_imgs[i,...,2])
    plt.title('Depth')
    
    num_objs = max(np.unique(seg_masks[i,...]).max(), np.unique(label_imgs[i,...]).max()) + 1
    
    # Plot Predicted Masks
    plt.subplot(1,total_subplots,3)
    plt.imshow(util_.get_color_mask(seg_masks[i,...], nc=num_objs))
    plt.title(f"Predicted Masks. #objects: {np.unique(seg_masks[i,...]).shape[0]-1}")
    
    # Plot GT Masks
    plt.subplot(1,total_subplots,4)
    plt.imshow(util_.get_color_mask(label_imgs[i,...], nc=num_objs))
    plt.title(f"Ground Truth. #objects: {np.unique(label_imgs[i,...]).shape[0]-1}")
    
    # Run evaluation metric
    eval_metrics = evaluation.multilabel_metrics(seg_masks[i,...], label_imgs[i])
    print(f"Image {i+1} Metrics:")
    print(eval_metrics)

Note: table label is not considered in evaluation metrics.