# MaskRCNN

### Category mapping

In [1]:
import os
import pathlib

import cv2
import matplotlib.pyplot as plt
import numpy as np
import open3d as o3d
import torch
import torchvision.transforms.functional as F
from torchvision.utils import draw_bounding_boxes
from tqdm import tqdm
from yacs.config import CfgNode

from src.config import default_map_builder_cfg, default_sim_cfg
from src.features.mapping import SemanticMap3DBuilder
from src.model.perception import map_processing
from src.model.perception.labeler import LabelGenerator
from src.utils import category_mapping
from src.utils.category_mapping import get_instance_index_to_reseal_name_dict
from src.utils.misc import get_semantic_map
from src.visualisation import instance_map_visualization
from src.visualisation.instance_map_visualization import visualize_2d_categorical_instance_map
from src.visualisation.semantic_map_visualization import (
    visualize_categorical_label_map,
    visualize_semantic_map,
)
from src.model.perception.model_wrapper import ModelWrapper

if pathlib.PurePath(os.getcwd()).name == 'notebooks':
    print(pathlib.PurePath(os.getcwd()).name)
    os.chdir('..')

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.
notebooks


In [2]:
TRAJECTORY = "00006-HkseAnWCgqk"
ROOT = f"./data/interim/trajectories/train/{TRAJECTORY}"
DEPTH_MAP_DIR = f"./data/interim/trajectories/train/{TRAJECTORY}/D"
RGB_IMAGE_DIR = f"./data/interim/trajectories/train/{TRAJECTORY}/RGB"
POSITIONS_FILE = f"./data/interim/trajectories/train/{TRAJECTORY}/positions.npy"
ROTATIONS_FILE = f"./data/interim/trajectories/train/{TRAJECTORY}/rotations.npy"
SEMANTIC_MAP_DIR = f"./data/interim/trajectories/train/{TRAJECTORY}/Semantic"
trajectory_name = TRAJECTORY.split("-")[1]
SEMANTIC_INFO_FILE = f"./data/raw/train/scene_datasets/hm3d/train/{TRAJECTORY}/{trajectory_name}.semantic.txt"

sim_cfg = default_sim_cfg()
map_builder_cfg = default_map_builder_cfg()
map_builder_cfg.NUM_SEMANTIC_CLASSES = 6
map_builder_cfg.RESOLUTION = 0.05
map_builder_cfg.MAP_SIZE = [25, 1.5, 25]
map_builder_cfg.GET_ENTIRE_MAP = True
map_builder = SemanticMap3DBuilder(map_builder_cfg, sim_cfg)

model_config = CfgNode()
model_config.USE_INITIAL_TRANSFORMS = True
model_config.SCORE_THRESHOLD = 0.5
model_config.MASK_THRESHOLD = 0.5
model = ModelWrapper(model_config)
model.cuda()


In [3]:
rotations = np.load(ROTATIONS_FILE).view(dtype=np.quaternion)
positions = np.load(POSITIONS_FILE)
scene_index_to_category_index_map = category_mapping.get_scene_index_to_reseal_index_vectorized(SEMANTIC_INFO_FILE)

map_builder.clear()

def load_image(path):
    image = cv2.imread(path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return image / 255



for i in tqdm(range(0,400)):
    depth_map = np.load(f"{DEPTH_MAP_DIR}/{i}.npy")
    rgb_image = load_image(f"{RGB_IMAGE_DIR}/{i}.png")
    map = model(rgb_image)
    # saved_semantics = np.load(f"{SEMANTIC_MAP_DIR}/{i}.npy")
    # map = get_semantic_map(saved_semantics, scene_index_to_category_index_map, map_builder_cfg.NUM_SEMANTIC_CLASSES)
    pose = (positions[i], rotations[i])
    map_builder.update_point_cloud(map, depth_map, pose)
    if i % 10 == 1:
        map_builder.update_semantic_map()

map_builder.update_semantic_map()

100%|██████████| 400/400 [00:53<00:00,  7.52it/s]


In [4]:
semantic_map = map_builder.semantic_map
map_processor_cfg = CfgNode()
map_processor_cfg.NO_OBJECT_CONFIDENCE_THRESHOLD = 0.5
map_processor_cfg.HOLE_VOXEL_THRESHOLD = 2000
map_processor_cfg.OBJECT_VOXEL_THRESHOLD = 200
map_processor_cfg.DILATE = True

grid_index_of_origin = map_builder.get_grid_index_of_origin()

label_generator = LabelGenerator(semantic_map, grid_index_of_origin, map_builder_cfg, map_processor_cfg, sim_cfg.SENSOR_CFG)

In [5]:
from src.data.MaskRCNNDataset import MaskRCNNDataset
from torch.utils.data.dataloader import DataLoader
from src.config import default_maskrcnn_cfg
import torchvision
maskrcnn_cfg = default_maskrcnn_cfg()
transforms = torchvision.models.detection.MaskRCNN_ResNet50_FPN_V2_Weights.DEFAULT.transforms()
mask_dataset = MaskRCNNDataset(ROOT, transforms=transforms, label_generator=label_generator)
train_dataloader = DataLoader(mask_dataset, maskrcnn_cfg.BATCH_SIZE, maskrcnn_cfg.SHUFFLE)

params =  [p for p in model.maskrcnn.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=maskrcnn_cfg.LEARNING_RATE,
							momentum=maskrcnn_cfg.OPTIM_MOMENTUM, 
							weight_decay=maskrcnn_cfg.OPTIM_WEIGHT_DECAY)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 
												step_size=maskrcnn_cfg.OPTIM_STEP_SIZE,
												gamma=maskrcnn_cfg.OPTIM_GAMMA)

In [10]:
model.train()
for epoch in range(maskrcnn_cfg.NUM_EPOCHS):
	for image, target in tqdm(train_dataloader):
		
		#device = 'cuda'
		#image, target = image.to(device), target.to(device)
		if target['boxes'].shape[1] == 0:
			continue
		print(target.keys())
		target['boxes'] = target['boxes'][0]
		target['labels'] =	target['labels'][0] 
		target['masks'] = target['masks'][0]
		loss = model(model_input=image, labels=target)['loss_mask']

		optimizer.zero_grad()
		loss.backward()
		optimizer.step()
		lr_scheduler.step()


SyntaxError: invalid syntax (2684777602.py, line 11)