# MaskRCNN Example

Method for plotting images

In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt

import torchvision.transforms.functional as F

plt.rcParams["savefig.bbox"] = 'tight'

def show(imgs):
    if not isinstance(imgs, list):
        imgs = [imgs]
    fig, axs = plt.subplots(ncols=len(imgs), squeeze=False)
    for i, img in enumerate(imgs):
        img = img.detach()
        img = F.to_pil_image(img)
        axs[0, i].imshow(np.asarray(img))
        axs[0, i].set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])

  from .autonotebook import tqdm as notebook_tqdm


Setting up Dataloader and MaskRCNN model

In [2]:
import os
import pathlib
from src.config import default_maskrcnn_cfg
from src.data.MaskRCNNDataset import MaskRCNNDataset
from torch.utils.data import DataLoader
from torchvision.models.detection import maskrcnn_resnet50_fpn, MaskRCNN_ResNet50_FPN_Weights
from torchvision.utils import draw_segmentation_masks

cfg = default_maskrcnn_cfg()
name = pathlib.PurePath(os.getcwd()).name
if(name == 'notebooks'):
    os.chdir('..')
rootdir = os.getcwd()
rootdir = os.path.join(rootdir, "data/interim/trajectories/train")
scene_names = []
for subdir, dirs, files in os.walk(rootdir):
    name = pathlib.PurePath(subdir).name
    if(name == 'train'):
        scene_names = dirs

weights = MaskRCNN_ResNet50_FPN_Weights.DEFAULT
transforms = weights.transforms()

for name in scene_names:
    path = os.path.join(rootdir, name+'/RGB')
    train_data = MaskRCNNDataset(path, 388, transform=transforms)
    train_loader = DataLoader(train_data, cfg.BATCHSIZE, cfg.SHUFFLE)

model = maskrcnn_resnet50_fpn(weights=weights)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device=device)
model.eval()
print('All set up!!!')

All set up!!!


Generate semantic images list

In [3]:
score_threshold = 0.5
proba_threshold = 0.5
semantic_images_list = []
for _, batch in enumerate(train_loader):
    batch = batch.to(device=device)
    outputs = model(transforms(batch))
    batch = batch.to(device='cpu')
    bool_masks = [
        out['masks'][out['scores']>score_threshold] > proba_threshold
        for out in outputs
    ]
    img_with_masks = [
        draw_segmentation_masks(img, mask.squeeze(1))
        for img, mask in zip(batch, bool_masks)
    ]
    semantic_images_list.extend(img_with_masks)




In [9]:
labels, masks = outputs[0]['labels'], outputs[0]['masks']
print(masks)

tensor([[[[0.0122, 0.0190, 0.0240,  ..., 0.0000, 0.0000, 0.0000],
          [0.0173, 0.0270, 0.0342,  ..., 0.0000, 0.0000, 0.0000],
          [0.0220, 0.0343, 0.0435,  ..., 0.0000, 0.0000, 0.0000],
          ...,
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]]],


        [[[0.0149, 0.0250, 0.0351,  ..., 0.0000, 0.0000, 0.0000],
          [0.0186, 0.0312, 0.0439,  ..., 0.0000, 0.0000, 0.0000],
          [0.0222, 0.0374, 0.0526,  ..., 0.0000, 0.0000, 0.0000],
          ...,
          [0.0318, 0.0535, 0.0753,  ..., 0.0000, 0.0000, 0.0000],
          [0.0162, 0.0272, 0.0382,  ..., 0.0000, 0.0000, 0.0000],
          [0.0005, 0.0009, 0.0012,  ..., 0.0000, 0.0000, 0.0000]]],


        [[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..

In [23]:
dictionary = {28: 1, 82: 0, 85: 0}
print(type(dictionary))
print(labels)
print(type(dictionary.get(labels[0].item())))

<class 'dict'>
tensor([28, 82, 85], device='cuda:0')
<class 'int'>


In [25]:
seal_labels = labels.cpu().apply_(dictionary.get)
print(type(seal_labels))

<class 'torch.Tensor'>


In [31]:
semantic_map_2d = torch.zeros((masks.shape[2], masks.shape[3], 7))
ma = masks.squeeze(1)[seal_labels == 0].detach().cpu()
print(ma.shape)
print(ma)

torch.Size([2, 256, 256])
tensor([[[0.0149, 0.0250, 0.0351,  ..., 0.0000, 0.0000, 0.0000],
         [0.0186, 0.0312, 0.0439,  ..., 0.0000, 0.0000, 0.0000],
         [0.0222, 0.0374, 0.0526,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.0318, 0.0535, 0.0753,  ..., 0.0000, 0.0000, 0.0000],
         [0.0162, 0.0272, 0.0382,  ..., 0.0000, 0.0000, 0.0000],
         [0.0005, 0.0009, 0.0012,  ..., 0.0000, 0.0000, 0.0000]],

        [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]]])


In [34]:
for category in range(1,7):
    if category in seal_labels:
        semantic_map_2d[:,:,category] = (masks[seal_labels == category].detach().cpu().max(dim=0)[0])[0]
        print(semantic_map_2d)

tensor([[[0.0000, 0.0122, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0190, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0240, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

        [[0.0000, 0.0173, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0270, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0342, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

        [[0.0000, 0.0220, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0343, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0435, 0.0000,  ..., 0.0000, 0.0000, 0.

In [None]:
semantic_map_2d = semantic_map_2d[:,:,1:]

In [None]:
semantic_map_2d.shape

torch.Size([256, 256, 6])

In [None]:
from src.utils.datatypes import SemanticMap2D
from src.utils.category_mappings import load_mask_instance_to_maskcat
import pandas as pd


label_mapping = load_mask_instance_to_maskcat(cfg)

semantic_masks: SemanticMap2D = np.array((256, 256, len(label_mapping.index)))
print(outputs[0]['boxes'])
for out in outputs:
	print(out['labels'])
	print(label_mapping)
	#labels = out['labels'].cpu().numpy()
	labels = out['labels']
	for i in labels:
		label = 63 #labels[i]
		if(label in label_mapping.index):
			channel = label_mapping.loc[label]['maskcat']
			print(channel)
			semantic_masks[:,:,channel] = out['masks']


tensor([[0.0000e+00, 7.1143e-02, 9.5931e+01, 1.1912e+02],
        [2.2214e-01, 0.0000e+00, 1.1445e+02, 2.5028e+02],
        [1.0774e+02, 4.9580e+00, 1.5373e+02, 3.7637e+01]], device='cuda:0',
       grad_fn=<StackBackward0>)
tensor([28, 82, 85], device='cuda:0')
              maskCatName  maskcat
maskInstNum                       
chair                  62        0
couch                  63        1
potted plant           64        2
bed                    65        3
toilet                 70        4
tv                     72        5


Show images

In [None]:
print(len(weights.meta['categories']))
print(MaskRCNN_ResNet50_FPN_Weights.DEFAULT.meta['categories'])
mask_categories = MaskRCNN_ResNet50_FPN_Weights.DEFAULT.meta['categories']
print(mask_categories[65])

In [None]:
total_frames = 0
max_frames = len(semantic_images_list)
i = 3
for img in semantic_images_list:
   if(i == 0):
      break
   i -= 1
   show(img)


In [None]:
print(len(weights.meta['categories']))
# print(MaskRCNN_ResNet50_FPN_Weights.DEFAULT.meta['categories'])
mask_categories = MaskRCNN_ResNet50_FPN_Weights.DEFAULT.meta['categories']
print(mask_categories[72])

In [None]:
import pandas as pd

tracked_categories_names = {(65,'bed'), (62,'chair'), (63,'couch'), (64,'potted plant'), (70,'toilet'), (72,'tv')}
mask_instance_to_mascat = pd.DataFrame(tracked_categories_names, columns={'maskNum', 'maskcat'}).sort_values('maskcat', ignore_index=True).set_index('maskNum')
print(mask_instance_to_mascat)