# Train and Evaluate Mask R-CNN with PointRend

In [1]:
## general libraries
import numpy as np
import os
import cv2
import csv
import random
import operator
from collections import OrderedDict
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

## detectron2-libraries 
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_test_loader
from detectron2.data.datasets import register_coco_instances
from detectron2.engine import DefaultTrainer
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.modeling import build_model
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.engine.hooks import HookBase
import detectron2.utils.comm as comm

# import PointRend project
from detectron2.projects import point_rend

import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
pylab.rcParams['figure.figsize'] = 10,10
def imshow(img):
    plt.imshow(img[:, :, [2, 1, 0]])
    plt.axis("off")
    plt.show()
       
## run on gpu 1
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [2]:
def check_direxcist(dir):
    if dir is not None:
        if not os.path.exists(dir):
            os.makedirs(dir)  # make new folder

### Organize the LabelMe dataset as follows

./datasets/potato

    img_001.png
    
    img_001.json


In [3]:
dataroot = "/mnt/data2/PieterBlok/Potato/Images/train_val_test"
training_id = "PointRend_Potato_20230808"
weightsfolder = os.path.join("./weights", training_id)
resultsfolder = os.path.join("./results", training_id)

check_direxcist(weightsfolder)
check_direxcist(resultsfolder)

### Prepare the Mask R-CNN dataset (annotated with the LabelMe software, individual jsons)

In [4]:
from utils.mrcnn_tools import prepare_dataset

classes = ['Potato']
train_val_test_split = [0.6, 0.2, 0.2]
prepare_dataset(dataroot, classes, train_val_test_split)

100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  3.16it/s]


126 valid images and annotations found!
Converting annotations...

train.json


100%|███████████████████████████████████████████| 75/75 [00:01<00:00, 48.95it/s]



val.json


100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 49.18it/s]



test.json


100%|███████████████████████████████████████████| 26/26 [00:00<00:00, 44.52it/s]


### Initialize the output-file

In [5]:
segm_strings = [c.replace(c, 'AP-' + c) for c in classes]
write_strings = ['train_size', 'AP'] + segm_strings
csv_name = training_id + '.csv'
with open(os.path.join(resultsfolder, csv_name), 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile, delimiter=',',quotechar='|', quoting=csv.QUOTE_MINIMAL)
    csvwriter.writerow(write_strings)

### Register the datasets for Mask R-CNN training and evaluation

In [6]:
register_coco_instances("train", {}, os.path.join(dataroot, "train.json"), dataroot)
register_coco_instances("val", {}, os.path.join(dataroot, "val.json"), dataroot)
register_coco_instances("test", {}, os.path.join(dataroot, "test.json"), dataroot)

train_metadata = MetadataCatalog.get("train")
val_metadata = MetadataCatalog.get("val")
test_metadata = MetadataCatalog.get("test")

dataset_dicts_train = DatasetCatalog.get("train")
dataset_dicts_val = DatasetCatalog.get("val")
dataset_dicts_test = DatasetCatalog.get("test")

[32m[09/06 13:22:53 d2.data.datasets.coco]: [0mLoaded 75 images in COCO format from /mnt/data2/PieterBlok/Potato/Images/train_val_test/train.json
[32m[09/06 13:22:53 d2.data.datasets.coco]: [0mLoaded 25 images in COCO format from /mnt/data2/PieterBlok/Potato/Images/train_val_test/val.json
[32m[09/06 13:22:53 d2.data.datasets.coco]: [0mLoaded 26 images in COCO format from /mnt/data2/PieterBlok/Potato/Images/train_val_test/test.json


### Initialize the training and inference parameters

In [7]:
## Hook to automatically save the best checkpoint
class BestCheckpointer(HookBase):
    def __init__(self, eval_period, val_value, metric):
        self._period = eval_period
        self.val_value = val_value
        self.metric = metric
        self.logger = setup_logger(name="d2.checkpointer.best")
        
    def store_best_model(self):
        metric = self.trainer.storage._latest_scalars

        try:
            current_value = metric[self.metric][0]
            try:
                highest_value = metric['highest_value'][0]
            except:
                highest_value = self.val_value

            self.logger.info("current-value ({:s}): {:.2f}, highest-value ({:s}): {:.2f}".format(self.metric, current_value, self.metric, highest_value))

            if current_value > highest_value:
                self.logger.info("saving best model...")
                self.trainer.checkpointer.save("best_model")
                self.trainer.storage.put_scalar('highest_value', current_value)
                comm.synchronize()
        except:
            pass

    def after_step(self):
        next_iter = self.trainer.iter + 1
        is_final = next_iter == self.trainer.max_iter
        if is_final or (self._period > 0 and next_iter % self._period == 0):
            self.store_best_model()
        self.trainer.storage.put_scalars(timetest=12)


## CustomTrainer with evaluator and automatic checkpoint-saver
class CustomTrainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        if output_folder is None:
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
        return COCOEvaluator(dataset_name, ("bbox", "segm"), False, output_folder)

    def build_hooks(self):
        hooks = super().build_hooks()
        hooks.insert(-1, BestCheckpointer(cfg.TEST.EVAL_PERIOD, 0.0, 'segm/AP'))
        return hooks
    
    
cfg = get_cfg()
point_rend.add_pointrend_config(cfg)
cfg.merge_from_file("./detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_X_101_32x8d_FPN_3x_coco.yaml")
cfg.DATASETS.TRAIN = ("train",)
cfg.DATASETS.TEST = ("val",)
cfg.DATALOADER.SAMPLER_TRAIN = "TrainingSampler"

# solver file settings extracted from: https://github.com/facebookresearch/Detectron/blob/master/configs/04_2018_gn_baselines/scratch_e2e_mask_rcnn_R-101-FPN_3x_gn.yaml
cfg.NUM_GPUS = len(os.environ["CUDA_VISIBLE_DEVICES"])
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = "detectron2://PointRend/InstanceSegmentation/pointrend_rcnn_X_101_32x8d_FPN_3x_coco/28119989/model_final_ba17b9.pkl"
cfg.SOLVER.IMS_PER_BATCH = 1
cfg.SOLVER.WEIGHT_DECAY = 0.0001
cfg.SOLVER.LR_POLICY = 'steps_with_decay'
cfg.SOLVER.BASE_LR = 0.01
cfg.SOLVER.GAMMA = 0.1
cfg.SOLVER.WARMUP_ITERS = 1000
cfg.SOLVER.MAX_ITER = 10000
cfg.SOLVER.STEPS = (5000, 8000)
cfg.SOLVER.CHECKPOINT_PERIOD = cfg.SOLVER.MAX_ITER+1
cfg.TEST.EVAL_PERIOD = 500
cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(classes)
cfg.MODEL.POINT_HEAD.NUM_CLASSES = len(classes)

cfg.OUTPUT_DIR = weightsfolder
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = CustomTrainer(cfg) 

[32m[09/06 13:23:03 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

[32m[09/06 13:23:03 d2.data.datasets.coco]: [0mLoaded 75 images in COCO format from /mnt/data2/PieterBlok/Potato/Images/train_val_test/train.json
[32m[09/06 13:23:03 d2.data.build]: [0mRemoved 0 images with no usable annotations. 75 images left.
[32m[09/06 13:23:03 d2.data.build]: [0mDistribution of instances among all 1 categories:
[36m|  category  | #instances   |
|:----------:|:-------------|
|   Potato   | 2482         |
|            |              |[0m
[32m[09/06 13:23:03 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in training: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'), RandomFlip()]
[32m[09/06 13:23:03 d2.data.build]: [0mUsing training sampler TrainingSampler
[32m[09/06 13:23:03 d2.data.common]: [0mSerializing 75 elements to byte tensors and concatenating them all ...
[32m[09/06 13:23:03 d2.data.common]: [0mSerialized dataset takes 3.34 MiB


## Start the training 

In [8]:
trainer.resume_or_load(resume=False)
trainer.train()



Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (2, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (2,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.coarse_head.prediction.weight' to the model due to incompatible shapes: (3920, 1024) in the checkpoint but (49, 10

[32m[09/06 13:23:17 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[09/06 13:23:31 d2.utils.events]: [0m eta: 2:10:38  iter: 19  total_loss: 2.819  loss_cls: 0.5355  loss_box_reg: 0.7432  loss_mask: 0.6762  loss_mask_point: 0.6641  loss_rpn_cls: 0.1875  loss_rpn_loc: 0.07203  time: 0.7298  data_time: 0.0173  lr: 0.00019981  max_mem: 4728M
[32m[09/06 13:23:35 d2.engine.hooks]: [0mOverall training speed: 22 iterations in 0:00:16 (0.7604 s / it)
[32m[09/06 13:23:35 d2.engine.hooks]: [0mTotal training time: 0:00:16 (0:00:00 on hooks)
[32m[09/06 13:23:35 d2.utils.events]: [0m eta: 2:10:30  iter: 24  total_loss: 2.705  loss_cls: 0.4984  loss_box_reg: 0.7376  loss_mask: 0.6567  loss_mask_point: 0.6529  loss_rpn_cls: 0.0342  loss_rpn_loc: 0.07033  time: 0.7237  data_time: 0.0058  lr: 0.00023977  max_mem: 4728M


KeyboardInterrupt: 

### Evaluation

In [9]:
cfg.MODEL.WEIGHTS = os.path.join(weightsfolder, "best_model.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.8   # set the testing threshold for this model
cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.3
cfg.DATASETS.TEST = ("test",)

model = build_model(cfg)
checkpointer = DetectionCheckpointer(model)
checkpointer.load(cfg.MODEL.WEIGHTS)
predictor = DefaultPredictor(cfg)

[32m[09/06 13:23:54 d2.checkpoint.c2_model_loading]: [0mFollowing weights matched with model:
| Names in Model                                            | Names in Checkpoint                                                                                  | Shapes                                          |
|:----------------------------------------------------------|:-----------------------------------------------------------------------------------------------------|:------------------------------------------------|
| backbone.bottom_up.res2.0.conv1.*                         | backbone.bottom_up.res2.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,64,1,1)        |
| backbone.bottom_up.res2.0.conv2.*                         | backbone.bottom_up.res2.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,8,3,3)         |
| backbone.bottom_up.res2.0.conv3.*         

[32m[09/06 13:23:56 d2.checkpoint.c2_model_loading]: [0mFollowing weights matched with model:
| Names in Model                                            | Names in Checkpoint                                                                                  | Shapes                                          |
|:----------------------------------------------------------|:-----------------------------------------------------------------------------------------------------|:------------------------------------------------|
| backbone.bottom_up.res2.0.conv1.*                         | backbone.bottom_up.res2.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,64,1,1)        |
| backbone.bottom_up.res2.0.conv2.*                         | backbone.bottom_up.res2.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,8,3,3)         |
| backbone.bottom_up.res2.0.conv3.*         



### Evaluate the algorithm

In [10]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
evaluator = COCOEvaluator("test", ("bbox", "segm"), False, output_dir=resultsfolder)
val_loader = build_detection_test_loader(cfg, "test")
eval_results = inference_on_dataset(trainer.model, val_loader, evaluator)

[32m[09/06 13:24:02 d2.data.datasets.coco]: [0mLoaded 26 images in COCO format from /mnt/data2/PieterBlok/Potato/Images/train_val_test/test.json
[32m[09/06 13:24:02 d2.data.build]: [0mDistribution of instances among all 1 categories:
[36m|  category  | #instances   |
|:----------:|:-------------|
|   Potato   | 864          |
|            |              |[0m
[32m[09/06 13:24:02 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[09/06 13:24:02 d2.data.common]: [0mSerializing 26 elements to byte tensors and concatenating them all ...
[32m[09/06 13:24:02 d2.data.common]: [0mSerialized dataset takes 1.16 MiB
[32m[09/06 13:24:02 d2.evaluation.evaluator]: [0mStart inference on 26 batches
[32m[09/06 13:24:07 d2.evaluation.evaluator]: [0mInference done 11/26. Dataloading: 0.0009 s/iter. Inference: 0.3248 s/iter. Eval: 0.1408 s/iter. Total: 0.4664 s/iter. ETA=0

### Write the data to the csv file

In [11]:
segm_strings = [c.replace(c, 'AP-' + c) for c in classes]

if len(classes) == 1:
    segm_values = [round(eval_results['segm']['AP'], 1) for s in segm_strings]
else:
    segm_values = [round(eval_results['segm'][s], 1) for s in segm_strings]
    
write_values = [len(dataset_dicts_train), round(eval_results['segm']['AP'], 1)] + segm_values

with open(os.path.join(resultsfolder, csv_name), 'a', newline='') as csvfile:
    csvwriter = csv.writer(csvfile, delimiter=',',quotechar='|', quoting=csv.QUOTE_MINIMAL)
    csvwriter.writerow(write_values)