# Run MASK-RCNN on all of my real RGBD Images from TableTop_Dataset

In [None]:
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab

import open3d

import os
import glob
import requests
from io import BytesIO
from PIL import Image
import numpy as np
import json

import cv2
import torch

from importlib import reload

In [None]:
# this makes our figures bigger
pylab.rcParams['figure.figsize'] = 20, 12

Those are the relevant imports for the detection model

In [None]:
use_rgb = True
use_depth = True
use_pretrained = False
# Must be a combo of: [rgb, depth, rgb+depth, pretrained]

In [None]:
from maskrcnn_benchmark.config import cfg # Restart kernel everytime you want to load a new config...
import predictor
predictor = reload(predictor)
from maskrcnn_benchmark.data.datasets.tabletop_object_dataset import compute_xyz

In [None]:
from maskrcnn_benchmark.data.datasets.tabletop_object_dataset import data_loading_params
import maskrcnn_benchmark.data.datasets.data_augmentation as data_augmentation
import maskrcnn_benchmark.data.datasets.util as util_

if use_rgb and not use_depth:
    config_file = "../configs/e2e_mask_rcnn_R_50_FPN_1x_TTOD_RGB.yaml"
elif not use_rgb and use_depth:
    config_file = "../configs/e2e_mask_rcnn_R_50_FPN_1x_TTOD_Depth.yaml"
elif use_rgb and use_depth:
    config_file = "../configs/e2e_mask_rcnn_R_50_FPN_1x_TTOD_RGBD.yaml"
elif use_pretrained:
    config_file = "../configs/caffe2/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x_caffe2.yaml"
    
cfg.merge_from_file(config_file)
cfg['INPUT']['USE_RGB'] = use_rgb
cfg['INPUT']['USE_DEPTH'] = use_depth
if use_rgb or use_depth:
    demo = predictor.Tabletop_Object_Demo(
        cfg,
        confidence_threshold=0.7,
        show_mask_map_nice=False,
        show_mask_map_raw=True,    
    )
elif use_pretrained:
    demo = predictor.COCODemo(
        cfg,
        min_image_size=800,
        confidence_threshold=0.7,
        show_mask_map_nice=False,
        show_mask_map_raw=True,
    )

Let's define a helper function

In [None]:
def imshow(img):
    # Plot a BGR image
    plt.imshow(img[:, :, [2, 1, 0]])
    plt.axis("off")

### Computing the predictions

We provide a `run_on_opencv_image` function, which takes an image as it was loaded by OpenCV (in `BGR` format), and computes the predictions on them, returning an image with the predictions overlayed on the image.

We save the outputs to file.

Note:
* Input for `demo.run_on_opencv_image()` is a BGR uint8 image.
* Output of `demo.run_on_opencv_image()` is a BGR uint8 image.

## Predict on one image

In [None]:
synth_camera_params = {
    'img_width' : 640, 
    'img_height' : 480,
    'near' : 0.01,
    'far' : 100,
    'fov' : 60, # vertical field of view in angles
}

scene_num = 58
view_num = 3
synth_img_filename = f'scene_{scene_num:05d}/' + \
                     ('rgb' if use_rgb else 'depth') + \
                     f'_{view_num:05d}.' + \
                     ('jpeg' if use_rgb else 'png')
if use_rgb:
    # img_filename = '/data/tabletop_dataset_v2/real_RGBD_images/rgb_00002.jpeg'
    img_filename = '/data/tabletop_dataset_v3/test_set/' + synth_img_filename
    img = cv2.imread(img_filename)
    rgb_image = img.copy()
elif use_depth:
    img_filename = '/data/tabletop_dataset_v3/test_set/' + synth_img_filename
    img = cv2.imread(img_filename, cv2.IMREAD_ANYDEPTH)
    img = (img / 1000.).astype(np.float32)
    img = compute_xyz(img, synth_camera_params)
    rgb_filename = img_filename.replace('depth', 'rgb').replace('png', 'jpeg')
    rgb_image = cv2.imread(rgb_filename)    
    
predictions = demo.run_on_opencv_image(img, rgb_image) # BGR format
imshow(predictions)

## All predictions on Real RGBD Images

In [None]:
if use_rgb:
    rgb_images = sorted(glob.glob('/data/tabletop_dataset_v5/real_RGBD_images/rgb*'))
    save_dir = '/home/chrisxie/projects/ssc/external/maskrcnn/detections/tabletop_object_train_RGB/'
    N = len(rgb_images)
    
if use_depth:
    depth_images = sorted(glob.glob('/data/tabletop_dataset_v5/real_RGBD_images/depth*'))
    save_dir = '/home/chrisxie/projects/ssc/external/maskrcnn/detections/tabletop_object_train_Depth/'
    
    # Get camera parameters
    camera_params_filename = '/data/tabletop_dataset_v5/real_RGBD_images/camera_params.json'
    camera_params_dict = json.load(open(camera_params_filename))
    
    N = len(depth_images)

if use_pretrained:
    rgb_images = sorted(glob.glob('/data/tabletop_dataset_v5/real_RGBD_images/rgb*'))
    save_dir = '/home/chrisxie/projects/ssc/external/maskrcnn/detections/coco_train/'
    N = len(rgb_images)
    
if use_rgb and use_depth:
    save_dir = '/home/chrisxie/temp/'
    N = len(rgb_images)

In [None]:
for i in range(N):
    if (use_rgb and not use_depth):
        img = cv2.imread(rgb_images[i])
        rgb_img = img.copy()
        
        # Preprocess
        rgb_img_tensor = rgb_img.astype(np.float32)
        rgb_img_tensor = data_augmentation.BGR_image(rgb_img_tensor)
        img = data_augmentation.array_to_tensor(rgb_img_tensor)
        
    elif (not use_rgb and use_depth):
        depth_img = cv2.imread(depth_images[i], cv2.IMREAD_ANYDEPTH)
        depth_img = (depth_img / 1000.).astype(np.float32)
        depth_img = compute_xyz(depth_img, camera_params_dict) # Shape: [H x W x 3], dtype=np.float32
        base_dir = '/'.join(image.split('/')[:-1]) + '/'
        rgb_filename = depth_images[i].split('/')[-1].replace('depth', 'rgb').replace('png', 'jpeg')
        rgb_filename = base_dir + rgb_filename
        rgb_img = cv2.imread(rgb_filename)
        
        # Preprocess
        img = data_augmentation.array_to_tensor(xyz_img)
        
    elif use_rgb and use_depth:
        rgb_img = cv2.imread(rgb_images[i])
        depth_img = cv2.imread(depth_images[i], cv2.IMREAD_ANYDEPTH)
        depth_img = (depth_img / 1000.).astype(np.float32)
        xyz_img = compute_xyz(depth_img, camera_params_dict) # Shape: [H x W x 3], dtype=np.float32
        
        # Preprocess
        rgb_img_tensor = rgb_img.astype(np.float32)
        rgb_img_tensor = data_augmentation.BGR_image(rgb_img_tensor)
        rgb_img_tensor = data_augmentation.array_to_tensor(rgb_img_tensor)
        xyz_img = data_augmentation.array_to_tensor(xyz_img)
        img = torch.cat([rgb_img_tensor, xyz_img], dim=0)
        
    elif use_pretrained:
        img = cv2.imread(image)
        rgb_image = img.copy()
        
    predictions = demo.run_on_opencv_image(img, rgb_img) # BGR format for both input and output.
    
    save_filename = save_dir + f'maskrcnn_{i}.png'
    cv2.imwrite(save_filename, predictions)

## Predict on TODv5 Test Set

In [None]:
if use_rgb and not use_depth:
    save_dir = '/home/chrisxie/projects/ssc/external/TODv5_results/test_set/Mask_RCNN/RGB/'
elif not use_rgb and use_depth:
    save_dir = '/home/chrisxie/projects/ssc/external/TODv5_results/test_set/Mask_RCNN/Depth/'
elif use_rgb and use_depth:
    save_dir = '/home/chrisxie/projects/ssc/external/TODv5_results/test_set/Mask_RCNN/RGBD/'
elif use_pretrained:
    save_dir = '/home/chrisxie/projects/ssc/external/TODv5_results/test_set/Mask_RCNN/Pretrained/'
    
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

In [None]:
import maskrcnn_benchmark.data.datasets.tabletop_object_dataset as TOD
TOD = reload(TOD)
TOD_test_filepath = '/data/tabletop_dataset_v5/test_set/'
dl = TOD.TOD_test_dataloader(TOD_test_filepath, batch_size=1, num_workers=8)

In [None]:
from tqdm import tqdm
progress = tqdm(dl)
for batch in progress:
    
    rgb_img = batch['rgb'][0]  # [3, H, W]
    xyz_img = batch['xyz'][0]  # [3, H, W]
    
    # Create input to Mask RCNN. Preprocessing is done here (exactly like in maskrcnn_benchmark.data.datasets.tabletop_object_dataset.py)
    if (use_rgb and not use_depth):
        img = rgb_img
    elif (use_depth and not use_rgb):
        img = xyz_img
    elif (use_rgb and use_depth):
        img = torch.cat([rgb_img, xyz_img], dim=0)
    elif use_pretrained:
        img = rgb_img.copy()
    
    # Run model
    rgb_img_np = rgb_img.permute(1,2,0).numpy()
    rgb_img_np = cv2.cvtColor(rgb_img_np, cv2.COLOR_BGR2RGB)
    predictions = demo.run_on_opencv_image(img, rgb_img_np) # if show_mask_maps_raw=True, this should be shape: [H x W]

    # Write results to disk
    file_path = save_dir + batch['label_abs_path'][0].rsplit('/', 1)[0] + '/'
    if not os.path.exists(file_path):
        os.makedirs(file_path)
    file_name = file_path + batch['label_abs_path'][0].rsplit('/', 1)[1].rsplit('.', 1)[0] + '.png'
    util_.imwrite_indexed(file_name, predictions.astype(np.uint8))

## Predict on OCID images

In [None]:
if use_rgb and not use_depth:
    save_dir = '/home/chrisxie/projects/ssc/external/OCID_results/Mask_RCNN/RGB/'
elif not use_rgb and use_depth:
    save_dir = '/home/chrisxie/projects/ssc/external/OCID_results/Mask_RCNN/Depth/'
elif use_rgb and use_depth:
    save_dir = '/home/chrisxie/projects/ssc/external/OCID_results/Mask_RCNN/RGBD/'
elif use_pretrained:
    save_dir = '/home/chrisxie/projects/ssc/external/OCID_results/Mask_RCNN/Pretrained/'
    
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
    
f = open('/data/OCID-dataset/pcd_files.txt', 'r')
pcd_files = [x.strip() for x in f.readlines()]
# pcd_files = pcd_files[0:1]

In [None]:
from tqdm import tqdm
for pcd_filename in tqdm(pcd_files):
#     print(pcd_filename)

    ### Process .pcd file ###
    temp_idx = pcd_filename.split('/').index('OCID-dataset') # parse something like this: /data/OCID-dataset/YCB10/table/top/curved/seq36/pcd/result_2018-08-24-15-13-13.pcd
    label_abs_path = '/'.join(pcd_filename.split('/')[temp_idx+1:])
    point_cloud = open3d.read_point_cloud(pcd_filename)
    
    # Fill in missing pixel values for RGB
    num_missing = 480*640 - np.asarray(point_cloud.colors).shape[0]
    filled_in_rgb_img = np.concatenate([np.asarray(point_cloud.colors), np.zeros((num_missing,3))])
    rgb_img = np.round(255 * filled_in_rgb_img.reshape(480,640,3)).astype(np.uint8)
        
    if use_depth:
        # Fill in missing xyz values
        num_missing = 480*640 - np.asarray(point_cloud.points).shape[0]
        filled_in_points = np.concatenate([np.asarray(point_cloud.points), np.zeros((num_missing,3))])
        xyz_img = np.asarray(filled_in_points).reshape(480,640,3)
        xyz_img[np.isnan(xyz_img)] = 0
        
    # Create input to Mask RCNN. Preprocessing is done here (exactly like in maskrcnn_benchmark.data.datasets.tabletop_object_dataset.py)
    if (use_rgb and not use_depth):
        rgb_img_tensor = rgb_img.astype(np.float32)
        rgb_img_tensor = data_augmentation.BGR_image(rgb_img_tensor)
        img = data_augmentation.array_to_tensor(rgb_img_tensor)
        
    elif (use_depth and not use_rgb):
        img = data_augmentation.array_to_tensor(xyz_img)
        
    elif (use_rgb and use_depth):
        rgb_img_tensor = rgb_img.astype(np.float32)
        rgb_img_tensor = data_augmentation.BGR_image(rgb_img_tensor)
        rgb_img_tensor = data_augmentation.array_to_tensor(rgb_img_tensor)
        xyz_img = data_augmentation.array_to_tensor(xyz_img)
        img = torch.cat([rgb_img_tensor, xyz_img], dim=0)
        
    elif use_pretrained:
        img = rgb_img.copy()
    
    
    ### Run the thing ###
    # Note: img is a pytorch Tensor, rgb_img is np.array of type np.uint8
    predictions = demo.run_on_opencv_image(img, rgb_img) # if show_mask_maps_raw=True, this should be shape: [H x W]
    
    
    ### Write out the results ###
    file_path = save_dir + label_abs_path.rsplit('/', 1)[0] + '/'
    if not os.path.exists(file_path):
        os.makedirs(file_path)
    file_name = file_path + label_abs_path.rsplit('/', 1)[1].rsplit('.', 1)[0] + '.png'
    util_.imwrite_indexed(file_name, predictions.astype(np.uint8))

## Predict on OSD images

In [None]:
if use_rgb and not use_depth:
    save_dir = '/home/chrisxie/projects/ssc/external/OSD_results/Mask_RCNN/RGB/'
elif not use_rgb and use_depth:
    save_dir = '/home/chrisxie/projects/ssc/external/OSD_results/Mask_RCNN/Depth/'
elif use_rgb and use_depth:
    save_dir = '/home/chrisxie/projects/ssc/external/OSD_results/Mask_RCNN/RGBD/'
elif use_pretrained:
    save_dir = '/home/chrisxie/projects/ssc/external/OSD_results/Mask_RCNN/Pretrained/'
    
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
    
f = open('/data/OSD/pcd_files.txt', 'r')
pcd_files = [x.strip() for x in f.readlines()]

In [None]:
from tqdm import tqdm
for pcd_filename in tqdm(pcd_files):
    
#     print(pcd_filename)
    
    ### Process .pcd file ###
    temp_idx = pcd_filename.split('/').index('OSD') # parse something like this: /data/OSD/OSD-0.2/pcd/learn44.pcd
    label_abs_path = '/'.join(pcd_filename.split('/')[temp_idx+1:])
    point_cloud = open3d.read_point_cloud(pcd_filename)
    
    # Fill in missing pixel values for RGB
    num_missing = 480*640 - np.asarray(point_cloud.colors).shape[0]
    filled_in_rgb_img = np.concatenate([np.asarray(point_cloud.colors), np.zeros((num_missing,3))])
    rgb_img = np.round(255 * filled_in_rgb_img.reshape(480,640,3)).astype(np.uint8)
        
    if use_depth:
        # Fill in missing xyz values
        num_missing = 480*640 - np.asarray(point_cloud.points).shape[0]
        filled_in_points = np.concatenate([np.asarray(point_cloud.points), np.zeros((num_missing,3))])
        xyz_img = np.asarray(filled_in_points).reshape(480,640,3)
        xyz_img[np.isnan(xyz_img)] = 0
        
    # Create input to Mask RCNN. Preprocessing is done here (exactly like in maskrcnn_benchmark.data.datasets.tabletop_object_dataset.py)
    if (use_rgb and not use_depth):
        rgb_img_tensor = rgb_img.astype(np.float32)
        rgb_img_tensor = data_augmentation.BGR_image(rgb_img_tensor)
        img = data_augmentation.array_to_tensor(rgb_img_tensor)
        
    elif (use_depth and not use_rgb):
        img = data_augmentation.array_to_tensor(xyz_img)
        
    elif (use_rgb and use_depth):
        rgb_img_tensor = rgb_img.astype(np.float32)
        rgb_img_tensor = data_augmentation.BGR_image(rgb_img_tensor)
        rgb_img_tensor = data_augmentation.array_to_tensor(rgb_img_tensor)
        xyz_img = data_augmentation.array_to_tensor(xyz_img)
        img = torch.cat([rgb_img_tensor, xyz_img], dim=0)
        
    elif use_pretrained:
        img = rgb_img.copy()
    
    
    ### Run the thing ###
    # Note: img is a pytorch Tensor, rgb_img is np.array of type np.uint8
    predictions = demo.run_on_opencv_image(img, rgb_img) # if show_mask_maps_raw=True, this should be shape: [H x W]
    
    
    ### Write out the results ###
    file_path = save_dir + label_abs_path.rsplit('/', 1)[0] + '/'
    if not os.path.exists(file_path):
        os.makedirs(file_path)
    file_name = file_path + label_abs_path.rsplit('/', 1)[1].rsplit('.', 1)[0] + '.png'
    util_.imwrite_indexed(file_name, predictions.astype(np.uint8))

## Predict on ClearGrasp Images

In [None]:
if use_rgb and not use_depth:
    save_dir = '/home/chrisxie/projects/ssc/external/cleargrasp_results/Mask_RCNN/RGB/'
elif not use_rgb and use_depth:
    save_dir = '/home/chrisxie/projects/ssc/external/cleargrasp_results/Mask_RCNN/Depth/'
elif use_rgb and use_depth:
    save_dir = '/home/chrisxie/projects/ssc/external/cleargrasp_results/Mask_RCNN/RGBD/'
elif use_pretrained:
    save_dir = '/home/chrisxie/projects/ssc/external/cleargrasp_results/Mask_RCNN/Pretrained/'
    
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

In [None]:
# Run model on entire dataset
import maskrcnn_benchmark.data.datasets.cleargrasp_object as cg_dl
cg_dl = reload(cg_dl)
dl = cg_dl.get_CG_dataloader(batch_size=1, num_workers=6, shuffle=True)

In [None]:
from tqdm import tqdm
progress = tqdm(dl)
for batch in progress:
    
    rgb_img = batch['rgb'].numpy()[0]
    xyz_img = batch['xyz'][0]
        
    # Create input to Mask RCNN. Preprocessing is done here (exactly like in maskrcnn_benchmark.data.datasets.tabletop_object_dataset.py)
    if (use_rgb and not use_depth):
        rgb_img_tensor = rgb_img.astype(np.float32)
        rgb_img_tensor = data_augmentation.BGR_image(rgb_img_tensor)
        img = data_augmentation.array_to_tensor(rgb_img_tensor)
        
    elif (use_depth and not use_rgb):
        img = xyz_img
        
    elif (use_rgb and use_depth):
        rgb_img_tensor = rgb_img.astype(np.float32) # Shape: [H x W x 3]
        rgb_img_tensor = data_augmentation.BGR_image(rgb_img_tensor)
        rgb_img_tensor = data_augmentation.array_to_tensor(rgb_img_tensor)
        img = torch.cat([rgb_img_tensor, xyz_img], dim=0)
        
    elif use_pretrained:
        img = rgb_img.copy()
    
    
    # Run model
    predictions = demo.run_on_opencv_image(img, rgb_img) # if show_mask_maps_raw=True, this should be shape: [H x W]

    if 'cleargrasp' in dl.dataset.name:
        predictions = torch.from_numpy(predictions)[None,...].float()
        predictions = cg_dl.filter_labels(predictions, batch['bbox'])
        predictions = predictions.numpy()

    # Write results to disk
    file_path = save_dir + batch['label_abs_path'][0].rsplit('/', 1)[0] + '/'
    if not os.path.exists(file_path):
        os.makedirs(file_path)
    file_name = file_path + batch['label_abs_path'][0].rsplit('/', 1)[1].rsplit('.', 1)[0] + '.png'
    util_.imwrite_indexed(file_name, predictions[0].astype(np.uint8))

### Visualize Results

In [None]:
temp = save_dir + '/cleargrasp-dataset-test-val/real-val/d435/'
files = sorted(os.listdir(temp))

i = 30

# UOIS-Net-3D prediction
filename = os.path.join(temp, files[i])
pred_img = util_.imread_indexed(filename)

# RGB image
rgb_filename = filename.replace(save_dir, '/data/cleargrasp/')
rgb_filename = rgb_filename.replace('mask.png', 'transparent-rgb-img.jpg')
rgb_img = cv2.cvtColor(cv2.imread(rgb_filename), cv2.COLOR_BGR2RGB)
rgb_img = util_.resize_image(rgb_img, (640,368), interpolation='zoom')

from scipy.ndimage.measurements import label as connected_components

gt_label_filename = filename.replace(save_dir, '/data/cleargrasp/')
label_img = util_.imread_indexed(gt_label_filename)
if len(label_img.shape) == 3:
    label_img = label_img[:, :, 0]

label_img, num_components = connected_components(label_img == 255)
label_img = util_.resize_image(label_img, (640,368), interpolation='nearest')
label_img[label_img > 0] = label_img[label_img > 0] + 1 # so values are in [0, 2, 3, ...] (e.g. no table label)


fig = plt.figure(1, figsize=(15,5))

plt.subplot(1,3,1)
plt.imshow(rgb_img)
plt.title('RGB')

plt.subplot(1,3,2)
plt.imshow(pred_img)
plt.title('Mask RCNN Prediction')

plt.subplot(1,3,3)
plt.imshow(label_img)
plt.title('GT Label')