# Train custom instance segmentation model using Detectron2 - on your own dataset

Create your own dataset by annotating for object detection using your favorite annotation software that can export annotations as COCO JSON format. I have used https://www.makesense.ai/ for my tutorial. I used the polygon tool to annotate objects and exported annotations as, "Single file in COCO JSON format"

## Install Detectron2

In [None]:
!python -m pip install pyyaml==5.1
import sys, os, distutils.core
# Note: This is a faster way to install detectron2 in Colab, but it does not include all functionalities (e.g. compiled operators).
# See https://detectron2.readthedocs.io/tutorials/install.html for full installation instructions
!git clone 'https://github.com/facebookresearch/detectron2'
dist = distutils.core.run_setup("./detectron2/setup.py")
!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
sys.path.insert(0, os.path.abspath('./detectron2'))
!pip install onnx

'''------------------------------------------------------------------------------------'''
# Properly install detectron2. (Please do not install twice in both ways)
# !python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

In [None]:
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

In [None]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random
from google.colab.patches import cv2_imshow
import torch
import onnx

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

# Use for export onnx
from detectron2.modeling import build_model
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.export import TracingAdapter
from detectron2.data import build_detection_test_loader


# Train on a custom dataset

## Prepare

Import the necessary function to register datasets in the COCO format. Let us register both the training and validation datasets. Please note that we are working with training (and validation) data that is is the coco format where we have a single JSON file that describes all the annotations from all training images. <p>
Here, we are naming our training data as 'my_dataset_train' and the validation data as 'my_dataset_val'.



In [None]:
import os
import shutil

class_name = 'tooth'
dataset_dir = f'{class_name}_dataset_3'
model_name = 'mask_rcnn_R_50_FPN_3x'
# model_name = 'mask_rcnn_R_101_FPN_3x'
# model_name = 'mask_rcnn_R_50_C4_1x'
# model_name = 'mask_rcnn_R_50_C4_3x'

print("train images count: ", len(os.listdir("AHE_dataset/" + dataset_dir + f"/train_image")))
print("valid images count: ", len(os.listdir("AHE_dataset/" + dataset_dir + f"/valid_image")))
print("valid masks count: ", len(os.listdir("AHE_dataset/" + dataset_dir + f"/valid_mask")))

# def clear_directory(dir_path):
#     for filename in os.listdir(dir_path):
#         file_path = os.path.join(dir_path, filename)
#         try:
#             if os.path.isfile(file_path) or os.path.islink(file_path):
#                 os.unlink(file_path)  # 刪除檔案或符號連結
#             elif os.path.isdir(file_path):
#                 shutil.rmtree(file_path)  # 刪除資料夾
#         except Exception as e:
#             print(f'無法刪除 {file_path}。原因: {e}')
# clear_directory("AHE_dataset/" + dataset_dir + f"/valid_results_instance({model_name})")
# os.makedirs("AHE_dataset/" + dataset_dir + f"/valid_results_instance({model_name})", exist_ok=True)
# print("valid instance masks count: ", len(os.listdir("AHE_dataset/" + dataset_dir + f"/valid_results_instance({model_name})")))

In [None]:
from detectron2.data.datasets import register_coco_instances
register_coco_instances("my_dataset_train", {}, f"./detectron2/AHE_dataset/{dataset_dir}/coco/{class_name}_annotations_aug.json", f"./detectron2/AHE_dataset/{dataset_dir}/train_image")
register_coco_instances("my_dataset_val", {}, f"./detectron2/AHE_dataset/{dataset_dir}/coco/{class_name}_annotations.json", f"./detectron2/AHE_dataset/{dataset_dir}/valid_image")


Let us extract the metadata and dataset dictionaries for both training and validation datasets. These can be used later for other purposes, like **visualization**, **model training**, **evaluation**, etc. We will see a visualization example right away.

In [None]:
train_metadata = MetadataCatalog.get("my_dataset_train")
train_dataset_dicts = DatasetCatalog.get("my_dataset_train")

In [None]:
val_metadata = MetadataCatalog.get("my_dataset_val")
val_dataset_dicts = DatasetCatalog.get("my_dataset_val")

In [None]:
from matplotlib import pyplot as plt

# Visualize some random samples
for d in random.sample(train_dataset_dicts, 2):
    print("影像檔名：", d["file_name"])  # 印出影像名稱
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=train_metadata, scale=0.5)
    vis = visualizer.draw_dataset_dict(d)
    plt.imshow(vis.get_image()[:, :, ::-1])
    plt.show()

## Train

Now we are ready to train a Mask R-CNN model using the Detectron2 library. We start by setting up a configuration file (.cfg) for the model. The configuration file contains many details including the output directory path, training dataset information, pre-trained weights, base learning rate, maximum number of iterations, etc.

In [None]:
from detectron2.engine import DefaultTrainer

# COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml

cfg = get_cfg()
cfg.OUTPUT_DIR = f"./detectron2/AHE_dataset/{dataset_dir}/model"
cfg.merge_from_file(model_zoo.get_config_file(f"COCO-InstanceSegmentation/{model_name}.yaml"))
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(f"COCO-InstanceSegmentation/{model_name}.yaml")
cfg.DATASETS.TRAIN = ("my_dataset_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.SOLVER.IMS_PER_BATCH = 2  # This is the real "batch size" commonly known to deep learning people
cfg.SOLVER.MAX_ITER = 1000    # 1000 iterations seems good enough for this dataset
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256   # Default is 512, using 256 for this dataset.
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 4  # We have 4 classes.
# NOTE: this config means the number of classes, without the background. Do not use num_classes+1 here.
if LR == "1":
  cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
elif LR == "2":
  cfg.SOLVER.OPTIMIZER = "Adam"
  cfg.SOLVER.BASE_LR = 0.0025
  cfg.SOLVER.WARMUP_ITERS = 200
  cfg.SOLVER.WARMUP_FACTOR = 1.0 / 1000
  cfg.SOLVER.LR_SCHEDULER_NAME = "WarmupCosineLR"  # 加這行指定 scheduler

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg) #Create an instance of of DefaultTrainer with the given congiguration
trainer.resume_or_load(resume=False) #Load a pretrained model if available (resume training) or start training from scratch if no pretrained model is available


Train

In [None]:
trainer.train() #Start the training process

In [None]:
os.makedirs(("AHE_dataset/" + dataset_dir + f"/valid_results_instance({model_name})"), exist_ok=True)
print("images count: ", len(os.listdir("AHE_dataset/" + dataset_dir + "/valid_results")))
print("masks count: ", len(os.listdir("AHE_dataset/" + dataset_dir + f"/valid_results_instance({model_name})")))
os.rename(f"./detectron2/AHE_dataset/{dataset_dir}/model/metrics.json", f"./detectron2/AHE_dataset/{dataset_dir}/model/{class_name}_metrics({model_name}).json")

Save the config file, for potential future use

In [None]:
import yaml
# Save the configuration to a config.yaml file
# Save the configuration to a config.yaml file
config_yaml_path = f"./detectron2/AHE_dataset/{dataset_dir}/model/config.yaml"
with open(config_yaml_path, 'w') as file:
    yaml.dump(cfg, file)

# Inference & evaluation using the trained model

In [None]:
# Inference should use the config with parameters that are used in training
# cfg now already contains everything we've set previously. We changed it a little bit for inference:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

Verify segmentation on random validation images

In [None]:
from detectron2.utils.visualizer import ColorMode

for d in random.sample(val_dataset_dicts, 1):    #select number of images for display
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)
    v = Visualizer(im[:, :, ::-1],
                   metadata=val_metadata,
                   scale=0.7,
                   instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels. This option is only available for segmentation models
    )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    cv2_imshow(out.get_image()[:, :, ::-1])

Check average precision and recall. (Need more validation data than just 2 images with handful of annotations)

In [None]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
evaluator = COCOEvaluator("my_dataset_val", output_dir=f"./AHE_dataset/{dataset_dir}/output")
val_loader = build_detection_test_loader(cfg, "my_dataset_val")
print(inference_on_dataset(predictor.model, val_loader, evaluator))
# another equivalent way to evaluate the model is to use `trainer.test`

**Process multiple images in a directory and save the results in an output directory**

In [None]:
# Directory path to the input images folder
input_images_directory = f"./detectron2/AHE_dataset/{dataset_dir}/valid_image"

# Output directory where the segmented images will be saved
output_directory = f"./detectron2/AHE_dataset/{dataset_dir}/valid_results"  # Replace this with the path to your desired output directory
os.makedirs(output_directory, exist_ok=True)

# Loop over the images in the input folder
for image_filename in os.listdir(input_images_directory):
    print("image: ", image_filename)
    image_path = os.path.join(input_images_directory, image_filename)

    image_name, image_ext = os.path.splitext(image_path)
    if image_ext != ".png":
      continue

    new_im = cv2.imread(image_path)

    # Perform prediction on the new image
    outputs = predictor(new_im)  # Format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format

    # We can use `Visualizer` to draw the predictions on the image.
    v = Visualizer(new_im[:, :, ::-1], metadata=train_metadata)
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))

    # Create the output filename with _result extension
    result_filename = os.path.splitext(image_filename)[0] + "_result.png"
    output_path = os.path.join(output_directory, result_filename)

    # Save the segmented image
    print("output_path: ", output_path)
    cv2.imwrite(output_path, out.get_image()[:, :, ::-1])

print("Segmentation of all images completed.")


**Segment images and save object level information into a csv file.**




In [None]:
import csv
from skimage.measure import regionprops, label


# Assuming you have already defined the 'predictor' object and loaded the model.
# Also, make sure 'metadata' is defined appropriately.

# Directory path to the input images folder
input_images_directory = f"./detectron2/AHE_dataset/{dataset_dir}/valid_image"

# Output directory where the CSV file will be saved
output_csv_path = f"./detectron2/AHE_dataset/{dataset_dir}/valid_results/output_objects.csv"  # Replace this with the path to your desired output CSV file

# Open the CSV file for writing
with open(output_csv_path, 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)

    # Write the header row in the CSV file
    csvwriter.writerow(["File Name", "Class Name", "Object Number", "Area", "Centroid", "BoundingBox"])  # Add more columns as needed for other properties

    # Loop over the images in the input folder
    for image_filename in os.listdir(input_images_directory):
        image_path = os.path.join(input_images_directory, image_filename)

        '''------------注意---------------'''
        image_name, image_ext = os.path.splitext(image_path)
        if image_ext != ".png":
          continue
        '''------------注意---------------'''

        new_im = cv2.imread(image_path)

        # Perform prediction on the new image
        outputs = predictor(new_im)  # Format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format

        # Convert the predicted mask to a binary mask
        mask = outputs["instances"].pred_masks.to("cpu").numpy().astype(bool)

        # Get the predicted class labels
        class_labels = outputs["instances"].pred_classes.to("cpu").numpy()

        # Debugging: print class_labels and metadata.thing_classes
        #print("Class Labels:", class_labels)
        #print("Thing Classes:", train_metadata.thing_classes)

        # Use skimage.measure.regionprops to calculate object parameters
        labeled_mask = label(mask)
        props = regionprops(labeled_mask)

        # Write the object-level information to the CSV file
        for i, prop in enumerate(props):
            object_number = i + 5  # Object number starts from 1
            area = prop.area
            centroid = prop.centroid
            bounding_box = prop.bbox

            # Check if the corresponding class label exists
            if i < len(class_labels):
                class_label = class_labels[i]
                class_name = train_metadata.thing_classes[class_label]
            else:
                # If class label is not available (should not happen), use 'Unknown' as class name
                class_name = 'Unknown'

            # Write the object-level information to the CSV file
            csvwriter.writerow([image_filename, class_name, object_number, area, centroid, bounding_box])  # Add more columns as needed for other properties

print("Object-level information saved to CSV file.")


**Saving binary (actually multinary) images for each class for further processing.** Here, for each input image we will save n images corresponding to the number of classes. In our example, we will save 4 images for each image corresponding to the 4 classes. Each of these images will contain objects numbered 1, 2, 3, etc. - basically instance segmentation like images. These images can be used for further downstream processing.

In [None]:
import os
import cv2
import numpy as np
import torch
from detectron2.utils.visualizer import Visualizer

# Directory path to the input images folder
input_images_directory = f"./detectron2/AHE_dataset/{dataset_dir}/valid_image"

# Output directory where the segmented images will be saved
output_directory = f"./detectron2/AHE_dataset/{dataset_dir}" + f"/valid_results_instance({model_name})"  # Replace this with the path to your desired output directory
os.makedirs(output_directory, exist_ok=True)

# Loop over the images in the input folder
for image_filename in os.listdir(input_images_directory):
    image_path = os.path.join(input_images_directory, image_filename)

    '''------------注意---------------'''
    image_name, image_ext = os.path.splitext(image_path)
    if image_ext != ".png":
        continue
    '''------------注意---------------'''

    new_im = cv2.imread(image_path)

    # Perform prediction on the new image
    outputs = predictor(new_im)  # Format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format

    # Create a dictionary to store the mask for each class with unique integer labels
    class_masks = {class_name: torch.zeros_like(outputs["instances"].pred_masks[0], dtype=torch.uint8, device=torch.device("cuda:0"))
                   for class_name in train_metadata.thing_classes}

    # Assign a unique integer label to each object in the mask
    for i, pred_class in enumerate(outputs["instances"].pred_classes):
        class_name = train_metadata.thing_classes[pred_class]
        class_masks[class_name] = torch.where(outputs["instances"].pred_masks[i].to(device=torch.device("cuda:0")),
                                              i + 50,
                                              class_masks[class_name])

    # Save the masks for each class with unique integer labels
    for class_name, class_mask in class_masks.items():
        # Convert the tensor to a NumPy array and then to a regular (CPU) array
        class_mask_np = class_mask.cpu().numpy()

        # Create the output filename with _class_name_result.png extension
        class_filename = os.path.splitext(image_filename)[0] + f"_{class_name}_result.png"
        class_output_path = os.path.join(output_directory, class_filename)

        # Save the image with unique integer labels
        cv2.imwrite(class_output_path, class_mask_np.astype(np.uint8))

print("Segmentation of all images completed.")
