# MaskRCNN

### Category mapping

In [1]:
import os
import pathlib

import cv2
import matplotlib.pyplot as plt
import numpy as np
import open3d as o3d
import torch
import torchvision.transforms.functional as F
from torchvision.utils import draw_bounding_boxes
from tqdm import tqdm
from yacs.config import CfgNode

from src.config import default_map_builder_cfg, default_sim_cfg
from src.features.mapping import SemanticMap3DBuilder
from src.model.perception import map_processing
from src.model.perception.labeler import LabelGenerator
from src.utils import category_mapping
from src.utils.category_mapping import get_instance_index_to_reseal_name_dict
from src.utils.misc import get_semantic_map
from src.visualisation import instance_map_visualization
from src.visualisation.instance_map_visualization import visualize_2d_categorical_instance_map
from src.visualisation.semantic_map_visualization import (
    visualize_categorical_label_map,
    visualize_semantic_map,
)
from src.model.perception.model_wrapper import ModelWrapper

if pathlib.PurePath(os.getcwd()).name == 'notebooks':
    print(pathlib.PurePath(os.getcwd()).name)
    os.chdir('..')

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.
notebooks


In [2]:
TRAJECTORY = "00006-HkseAnWCgqk"
ROOT = f"./data/interim/trajectories/train/{TRAJECTORY}"
DEPTH_MAP_DIR = f"./data/interim/trajectories/train/{TRAJECTORY}/D"
RGB_IMAGE_DIR = f"./data/interim/trajectories/train/{TRAJECTORY}/RGB"
POSITIONS_FILE = f"./data/interim/trajectories/train/{TRAJECTORY}/positions.npy"
ROTATIONS_FILE = f"./data/interim/trajectories/train/{TRAJECTORY}/rotations.npy"
SEMANTIC_MAP_DIR = f"./data/interim/trajectories/train/{TRAJECTORY}/Semantic"
trajectory_name = TRAJECTORY.split("-")[1]
SEMANTIC_INFO_FILE = f"./data/raw/train/scene_datasets/hm3d/train/{TRAJECTORY}/{trajectory_name}.semantic.txt"

sim_cfg = default_sim_cfg()
map_builder_cfg = default_map_builder_cfg()
map_builder_cfg.NUM_SEMANTIC_CLASSES = 6
map_builder_cfg.RESOLUTION = 0.05
map_builder_cfg.MAP_SIZE = [25, 1.5, 25]
map_builder_cfg.GET_ENTIRE_MAP = True
map_builder = SemanticMap3DBuilder(map_builder_cfg, sim_cfg)

model_config = CfgNode()
model_config.USE_INITIAL_TRANSFORMS = True
model_config.SCORE_THRESHOLD = 0.5
model_config.MASK_THRESHOLD = 0.5
model = ModelWrapper(model_config)
model.cuda()


In [3]:
rotations = np.load(ROTATIONS_FILE).view(dtype=np.quaternion)
positions = np.load(POSITIONS_FILE)
scene_index_to_category_index_map = category_mapping.get_scene_index_to_reseal_index_vectorized(SEMANTIC_INFO_FILE)

map_builder.clear()

def load_image(path):
    image = cv2.imread(path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return image / 255



for i in tqdm(range(0,400)):
    depth_map = np.load(f"{DEPTH_MAP_DIR}/{i}.npy")
    rgb_image = load_image(f"{RGB_IMAGE_DIR}/{i}.png")
    map = model(rgb_image)
    # saved_semantics = np.load(f"{SEMANTIC_MAP_DIR}/{i}.npy")
    # map = get_semantic_map(saved_semantics, scene_index_to_category_index_map, map_builder_cfg.NUM_SEMANTIC_CLASSES)
    pose = (positions[i], rotations[i])
    map_builder.update_point_cloud(map, depth_map, pose)
    if i % 10 == 1:
        map_builder.update_semantic_map()

map_builder.update_semantic_map()

100%|██████████| 400/400 [00:51<00:00,  7.83it/s]


In [4]:
semantic_map = map_builder.semantic_map
map_processor_cfg = CfgNode()
map_processor_cfg.NO_OBJECT_CONFIDENCE_THRESHOLD = 0.5
map_processor_cfg.HOLE_VOXEL_THRESHOLD = 2000
map_processor_cfg.OBJECT_VOXEL_THRESHOLD = 200
map_processor_cfg.DILATE = True

grid_index_of_origin = map_builder.get_grid_index_of_origin()

label_generator = LabelGenerator(semantic_map, grid_index_of_origin, map_builder_cfg, map_processor_cfg, sim_cfg.SENSOR_CFG)

In [5]:
from src.data.MaskRCNNDataset import MaskRCNNDataset
from torch.utils.data.dataloader import DataLoader
from src.config import default_maskrcnn_cfg
import torchvision
maskrcnn_cfg = default_maskrcnn_cfg()
transforms = torchvision.models.detection.MaskRCNN_ResNet50_FPN_V2_Weights.DEFAULT.transforms()
mask_dataset = MaskRCNNDataset(ROOT, transforms=transforms, label_generator=label_generator)
train_dataloader = DataLoader(mask_dataset, maskrcnn_cfg.BATCH_SIZE, maskrcnn_cfg.SHUFFLE)

params =  [p for p in model.maskrcnn.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=maskrcnn_cfg.LEARNING_RATE,
							momentum=maskrcnn_cfg.OPTIM_MOMENTUM, 
							weight_decay=maskrcnn_cfg.OPTIM_WEIGHT_DECAY)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 
												step_size=maskrcnn_cfg.OPTIM_STEP_SIZE,
												gamma=maskrcnn_cfg.OPTIM_GAMMA)

In [6]:
model.train()
for epoch in range(maskrcnn_cfg.NUM_EPOCHS):
	for image, target in tqdm(train_dataloader):
		
		print("image shape after loading")
		print(image.shape)
		#device = 'cuda'
		#image, target = image.to(device), target.to(device)
		if target['boxes'].shape[1] == 0:
			continue
		loss = model(model_input=image, labels=target)['loss_mask']

		optimizer.zero_grad()
		loss.backward()
		optimizer.step()
		lr_scheduler.step()


  0%|          | 0/451 [00:00<?, ?it/s]

image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  0%|          | 1/451 [00:02<17:22,  2.32s/it]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  0%|          | 2/451 [00:03<12:23,  1.66s/it]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  1%|          | 3/451 [00:04<10:28,  1.40s/it]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  1%|          | 4/451 [00:05<10:02,  1.35s/it]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  1%|          | 5/451 [00:06<09:18,  1.25s/it]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  1%|▏         | 6/451 [00:07<08:38,  1.17s/it]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  2%|▏         | 7/451 [00:08<07:49,  1.06s/it]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  2%|▏         | 8/451 [00:09<07:00,  1.05it/s]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  2%|▏         | 9/451 [00:10<06:04,  1.21it/s]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  2%|▏         | 10/451 [00:10<05:25,  1.36it/s]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  2%|▏         | 11/451 [00:11<05:16,  1.39it/s]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  3%|▎         | 12/451 [00:11<05:02,  1.45it/s]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  3%|▎         | 13/451 [00:12<05:24,  1.35it/s]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  3%|▎         | 14/451 [00:13<05:59,  1.22it/s]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  3%|▎         | 15/451 [00:14<06:47,  1.07it/s]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  4%|▎         | 16/451 [00:16<07:06,  1.02it/s]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  4%|▍         | 17/451 [00:17<07:12,  1.00it/s]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  4%|▍         | 18/451 [00:18<07:20,  1.02s/it]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  4%|▍         | 19/451 [00:19<06:59,  1.03it/s]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  4%|▍         | 20/451 [00:20<07:07,  1.01it/s]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  5%|▍         | 21/451 [00:20<06:54,  1.04it/s]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  5%|▍         | 22/451 [00:21<06:53,  1.04it/s]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  5%|▌         | 23/451 [00:22<06:33,  1.09it/s]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  5%|▌         | 24/451 [00:23<05:59,  1.19it/s]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  6%|▌         | 25/451 [00:24<05:52,  1.21it/s]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  6%|▌         | 26/451 [00:24<05:25,  1.31it/s]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  6%|▌         | 27/451 [00:25<05:44,  1.23it/s]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  6%|▌         | 28/451 [00:26<05:39,  1.25it/s]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  6%|▋         | 29/451 [00:27<05:48,  1.21it/s]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  7%|▋         | 30/451 [00:28<05:36,  1.25it/s]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  7%|▋         | 31/451 [00:28<05:41,  1.23it/s]

torch.Size([0])
image type
<class 'PIL.Image.Image'>
type after transform
<class 'torch.Tensor'>
torch.Size([3, 256, 256])


  7%|▋         | 31/451 [00:29<06:42,  1.04it/s]

torch.Size([1, 4])





ValueError: Unknown image shape: torch.Size([3, 256, 256]) for torch tensor! Must be B,C,H,W.