In [None]:
!git clone https://github.com/Auto-annotation-of-Pathology-Images/AAPI_code

In [None]:
# install dependencies: 
!pip install pyyaml==5.1
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())
!gcc --version
# opencv is pre-installed on colab

In [None]:
# install detectron2: (Colab has CUDA 10.1 + torch 1.7)
# See https://detectron2.readthedocs.io/tutorials/install.html for instructions
import torch
assert torch.__version__.startswith("1.7")
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.7/index.html
# exit(0)  # After installation, you need to "restart runtime" in Colab. This line can also restart runtime

In [None]:
from AAPI_code.Collage_generator import collage_generator
from AAPI_code.format_converter import format_converter
from AAPI_code.Collage_generator.utils import *
from PIL import Image
from tqdm.notebook import tqdm
import json

In [None]:
# if you want
from google.colab import drive
drive.mount('/content/drive')

# collage_generator setting

In [None]:
col_gen = collage_generator(label_list= [], 
                            canvas_size=(3000,3000),
                            example_image = "/content/AAPI_code/data/vignettes/background/11000_16000_0_background.png",
                            gaussian_noise_constant = 5,
                            cluster_size = (2200,2200)
                            )

In [None]:
col_gen.import_images_from_directory_original_size(root_path="/content/AAPI_code/data/vignettes/train")

# Generation Example, please have format = "COCO"

In [None]:
collage,mask, color_dict, label_dict = col_gen.generate(item_num= 5,
                                                        ratio_dict={"cluster":0.2, "artery": 0.5, 'arteriole': 0.3},
                                                        background_color = False,
                                                        format = 'COCO'
                                                        )

# Keep the mask as image

It makes further cut possible, once we need to run Detectron2, we parse all the data into the correct format

Original saving:
1. Collage
2. Mask(saved in different color, to distinguish individual instance)
3. Dict(the dict of each individual color to the corresponding class)

In [None]:
fc = format_converter()

In [None]:
fc.save_coco_raw(collage = collage, 
                 mask = mask, 
                 color_dict = color_dict, 
                 root_path ="output", 
                 name = 'train')

In [None]:
collage, mask, color_dict = fc.read_coco_raw(root_path = "output", name = 'train')

Save the collage, mask into patches, in a rolling window practice

in the saving path, there're:
1. each individual patch with it's mask
2. the copy of color dict

This method can be used on both training and testing

In [None]:
# this size and offset is using normal human understanding (width, height), not pixel size
# feel free to use np.array([1024,768])
fc.save_sliding_window(collage = collage,
                       mask = mask,
                       color_dict = color_dict,
                       image_name = "image_2",
                       saving_path = "window_slide",
                       window_size = np.array([1024,1024]),
                       offset = np.array([256,256]))

For each time training with Detectron, parse the saved files, it's okay to save multiple {image_name}'s file into one path, parse_detectron can handle that

In [None]:
training_set = fc.parse_detectron(path = "window_slide")
test_set = fc.parse_detectron(path = "window_slide_1")

# Detectron2

In [None]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random
from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

Detectron use string as the id of dataset，you need：
1. register(str name, func dataset) onto Datacatalog, f need to be a function returning json list
2. get your dataset on MetadataCatalog with the name，set its class，notice that our label is 1-5，you need a background in the front
3. prepare the metadata

In [None]:
DatasetCatalog.register("AAPI_train",lambda p="window_slide": training_set)
MetadataCatalog.get("AAPI_train").set(thing_classes=['background','arteriole', 'artery', 'distal_tubule', 'glomerulus', 'proximal_tubule'])
AAPI_metadata = MetadataCatalog.get("AAPI_train")

Preview some input

In [None]:
def preview(dataset_dicts, metadata, preview = 3):
    for d in random.sample(dataset_dicts, preview):
        img = cv2.imread(d["file_name"])
        visualizer = Visualizer(img[:, :, ::-1], metadata=AAPI_metadata, scale=0.5)
        out = visualizer.draw_dataset_dict(d)
        cv2_imshow(out.get_image()[:, :, ::-1])

preview(dataset_dicts = training_set, metadata = AAPI_metadata, preview = 3)

settings and training

In [None]:
from detectron2.engine import DefaultTrainer

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("AAPI_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = 1000    # 1000 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 32   # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 6  # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

In [None]:
# Look at training curves in tensorboard:
%load_ext tensorboard
%tensorboard --logdir output

Save the model: model is automatically saved at the following path

In [None]:
# Inference should use the config with parameters that are used in training
# cfg now already contains everything we've set previously. We changed it a little bit for inference:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
print(cfg.MODEL.WEIGHTS)

Run prediction

In [None]:
DatasetCatalog.register("AAPI_test",lambda p="window_slide_test": test_test)
MetadataCatalog.get("AAPI_test").set(thing_classes=['background','arteriole', 'artery', 'distal_tubule', 'glomerulus', 'proximal_tubule'])
AAPI_metadata = MetadataCatalog.get("AAPI_test")# train and test doesn't matter

In [None]:
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

In [None]:
from detectron2.utils.visualizer import ColorMode

for d in random.sample(test_set, 3):    
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
    v = Visualizer(im[:, :, ::-1],
                   metadata=AAPI_metadata, 
                   scale=0.5, 
                   instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels. This option is only available for segmentation models
    )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    cv2_imshow(out.get_image()[:, :, ::-1])

Predict on any image

In [None]:
def predict(image):
    """
    image can ba a path or a np.ndarray
    """
    if isinstance(image, str):
        image = cv2.imread(image)
    outputs = predictor(image)
    instances = outputs["instances"]
    return instances

In [None]:
pred_instance = predict(im)

In [None]:
category_3_detections = pred_instance[pred_instance.pred_classes == 3]
confident_detections = pred_instance[pred_instance.scores > 0.9]

In [None]:
category_3_detections.pred_masks

In [None]:
plt.imshow(output.cpu().numpy())