In [1]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
from PIL import Image
from datetime import datetime
import matplotlib.pyplot as plt
from skimage import measure
import os, json, cv2, random, pathlib, shutil

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.structures import BoxMode
from detectron2.utils.visualizer import ColorMode

In [2]:
random.seed(0)
CATEGORIES = ["Open", "close", "Unknown"]
INST_CATEGORIES = ["stomata"]
COLORS = {
    "Open": (0, 255, 0),  # Green
    "close": (255, 0, 0),  # Red
    "Unknown": (0, 0, 255)  # Blue
}

def random_color():
    return [random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)]


def get_detectron2_dicts(img_dir, json_filename, delta=5):
    labelstudio_annotations = filter_annotations(img_dir, json_filename)

    dataset_dicts = []
    for idx, v in enumerate(labelstudio_annotations):
        record = {}
        img_filename = v["image"]
        annotations = v["annotations"]
        img_h, img_w = cv2.imread(img_filename).shape[:2]
        
        record["file_name"] = img_filename
        record["image_id"] = idx
        record["height"] = img_h
        record["width"] = img_w

        objs = []
        for anno in annotations:
            if anno["original_width"] != record["width"] or anno["original_height"] != record["height"]:
                print("Generate record error!")
                return []
            
            success, poly, _, category = gen_polygon_w_boundingbox_from_annotation(anno, delta=delta)
            # Convert from [[x1, y1], [x2, y2], ...] to [x1, y1, x2, y2, ...]
            px = [x for x, _ in poly]
            py = [y for _, y in poly]
            poly = [(float(x), float(y)) for x, y in poly]
            poly = [p for x in poly for p in x]
            if success:
                if len(poly) <= 4:
                    continue
                obj = {
                    "bbox": [np.min(px), np.min(py), np.max(px), np.max(py)],
                    "bbox_mode": BoxMode.XYXY_ABS,
                    "segmentation": [poly],
                    "category_id": 0,  # single category
                }
                objs.append(obj)
            else:
                print("Generate record error!")
                return []
        record["annotations"] = objs
        dataset_dicts.append(record)
    
    return dataset_dicts


def get_inference_dicts(img_dir, extensions=None):
    if extensions is None:
        extensions = [".png", ".tif", "jpg"]
        
    dataset_dicts = []
    img_idx = 0
    for ext in extensions:
        for fname in os.listdir(img_dir):
            # check the file ends with the extension
            if fname.endswith(ext):
                img_filepath = os.path.join(img_dir, fname)
                record = {}
                img_h, img_w = cv2.imread(img_filepath).shape[:2]
                record["file_name"] = img_filepath
                record["image_id"] = img_idx
                record["height"] = img_h
                record["width"] = img_w
                
                dataset_dicts.append(record)
                img_idx += 1
    return dataset_dicts

def binary_mask_to_polygon(binary_mask, tolerance=0):
    r""" Converts a binary mask to COCO polygon representation
    Args:
        binary_mask: a 2D binary numpy array where '1's represent the object
        tolerance: Maximum distance from original points of polygon to approximated polygonal chain. If tolerance is 0, the original coordinate array is returned.
    
    """
    def close_contour(contour):
        if not np.array_equal(contour[0], contour[-1]):
            contour = np.vstack((contour, contour[0]))
        return contour
    
    polygons = []
    # pad mask to close contours of shapes which start and end at an edge
    padded_binary_mask = np.pad(binary_mask, pad_width=1, mode='constant', constant_values=0)
    contours = measure.find_contours(padded_binary_mask, 0.5)
    for contour in contours:
        contour = close_contour(contour)
        if len(contour) < 3:
            continue
        contour = np.flip(contour, axis=1)
        segmentation = contour.ravel().tolist()
        # after padding and subtracting 1 we may get -0.5 points in our segmentation
#         segmentation = [0 if i < 0 else i for i in segmentation]
        polygons.append(segmentation)
    return polygons

def fit_polygens_to_rotated_bboxes(polygons):
    rbboxes = []
    for p in polygons:
        pts_x = p[::2]
        pts_y = p[1::2]
        pts = [[x, y] for x,y in zip(pts_x, pts_y)]
        pts = np.array(pts, np.float32)
        rect = cv2.minAreaRect(pts)  #  ((cx, cy), (w, h), a)
        rbboxes.append(rect)
    return rbboxes

def draw_polygons(img_filename, polygons, texts, thickness=1):
    img = cv2.imread(img_filename)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    tl = thickness
    tf = max(tl-1, 1)
    for p, text in zip(polygons, texts):
        color = random_color()
        pts_x = p[::2]
        pts_y = p[1::2]
        pts = [[x, y] for x,y in zip(pts_x, pts_y)]
        pts = np.array(pts, np.int32)
        cv2.polylines(img, [pts], isClosed=True, thickness=thickness, color=color)
        # bounding box format: (tlx, tly, w, h)
        x, y, w, h = cv2.boundingRect(pts)
        cv2.rectangle(img, pt1=(x, y), pt2=(x+w, y+h), color=color, thickness=thickness, lineType=cv2.LINE_AA)
        t_size = cv2.getTextSize(text, 0, fontScale=tl/3, thickness=thickness)[0]
        c1 = (x, y)
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] -3
        cv2.rectangle(img, c1, c2, color=color, thickness=-1, lineType=cv2.LINE_AA)  # filled
        cv2.putText(img, text, (c1[0], c1[1]-2), 0, tl/3, [255,255,255], thickness=tf, lineType=cv2.LINE_AA)
    return img

def draw_rotated_bboxes(img_filename, rboxes, texts, thickness=1, color=None):
    img = cv2.imread(img_filename)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = draw_rotated_bboxes_on_image(img, rboxes, texts, thickness, color)
    return img

def draw_rotated_bboxes_on_image(img, rboxes, texts, thickness=1, color=None):
    img_draw = img.copy()
    tl = thickness
    tf = max(tl-1, 1)
    for rb, text in zip(rboxes, texts):
        c = random_color() if color is None else color
        box = cv2.boxPoints(rb)
        box = np.int0(box)
        cv2.drawContours(img_draw, [box], 0, color=c, thickness=thickness)
        t_size = cv2.getTextSize(text, 0, fontScale=tl/3, thickness=thickness)[0]
        pt = np.amin(box, axis=0)
        c1 = (pt[0], pt[1])
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] -3
        cv2.rectangle(img_draw, c1, c2, color=color, thickness=-1, lineType=cv2.LINE_AA)  # filled
        cv2.putText(img_draw, text, (c1[0], c1[1]-2), 0, tl/3, [255,255,255], thickness=tf, lineType=cv2.LINE_AA)
    return img_draw


In [None]:
prj_path = "../../google-drive/"
dataset_name = "stomata100"

json_filename = os.path.join(prj_path, "{}/labels/labels.json".format(dataset_name))

# Get Detectron2 ground truth
for d in ["train", "val"]:
    catelog_name = "{}_{}".format(dataset_name, d)
    if catelog_name in DatasetCatalog:
        DatasetCatalog.remove(catelog_name)
    if catelog_name in MetadataCatalog:
        MetadataCatalog.remove(catelog_name)
    
    img_dir = os.path.join(prj_path, catelog_name)
    DatasetCatalog.register(catelog_name, 
        lambda d=d: get_detectron2_dicts(os.path.join(prj_path, "{}/{}".format(dataset_name, d)), json_filename))
    MetadataCatalog.get(catelog_name).set(thing_classes=INST_CATEGORIES)

stomata_metadata = MetadataCatalog.get("{}_train".format(dataset_name))

In [None]:
# Inference should use the config with parameters that are used in training
# cfg now already contains everything we've set previously. We changed it a little bit for inference:

dataset_name = "stomata100"

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("{}_train".format(dataset_name),)
cfg.DATASETS.TEST = ("{}_val".format(dataset_name), )
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2  # This is the real "batch size" commonly known to deep learning people
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = 1000    # you may need to train longer for a practical dataset
cfg.SOLVER.STEPS = []        # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # The "RoIHead batch size". 128 is faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (stomata). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.
cfg.MODEL.DEVICE = "cpu"    # train with CPU


model_folder_name = "stomata100_output_ep8000_2022_10_06_04_36_29"
cfg.OUTPUT_DIR = "./{}".format(model_folder_name)
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained


In [None]:
# dataset_name = "2021_Aono_PlosOne_Maize"
# dataset_name = "2021_Zhu_FPS_Wheat_10x"
dataset_name = "2021_Zhu_FPS_Wheat_20x"
# dataset_name = "2022_Li_PC_LeafNet"

thickness = {
    "2021_Aono_PlosOne_Maize": 5,
    "2021_Zhu_FPS_Wheat_10x": 2,
    "2021_Zhu_FPS_Wheat_20x": 2,
    "2022_Li_PC_LeafNet": 2
}


cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3   # set a custom testing threshold

save_dir = os.path.join(
    prj_path, 
    "{}/{}_inference/thres_{}".format(dataset_name, model_folder_name, cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST))
shutil.rmtree(save_dir, ignore_errors=True)
os.makedirs(save_dir)

predictor = DefaultPredictor(cfg)

prj_path = pathlib.Path().absolute().parent
full_img_dir = os.path.join(prj_path, "{}/images".format(dataset_name))

# Get inference dicts
infer_dicts = get_inference_dicts(img_dir=full_img_dir)

for idx, d in enumerate(infer_dicts):
    img_filename = d["file_name"] 
    im = cv2.imread(img_filename)
    outputs = predictor(im)  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format

    v = Visualizer(im[:, :, ::-1],
                    metadata=stomata_metadata,
                    scale=1,
#                     instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels. This option is only available for segmentation models
        )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    
    # Show predictions
    fig = plt.figure(figsize=(12, 8), dpi=600)
    fig.add_subplot(1, 2, 1)
    plt.imshow(out.get_image())
    plt.axis('off')
    plt.title("Pred: {}".format(os.path.basename(d["file_name"])))
    
    filename, file_extension = os.path.splitext(os.path.basename(d["file_name"]))
   # Set file extension to JPEG
    file_extension = ".jpg"
    save_filename = os.path.join(
        save_dir, 
        "{}_thres_{}_inst_seg{}".format(filename, cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST, file_extension))
    cv2.imwrite(save_filename, cv2.cvtColor(out.get_image(), cv2.COLOR_BGR2RGB))
    
    # Draw estimated rotated bounding boxes on images
    pred_masks = outputs['instances'].pred_masks
    pred_categories = outputs['instances'].pred_classes
    pred_scores = outputs['instances'].scores
    polygons, categories = [], []
    for mask, category, score in zip(pred_masks, pred_categories, pred_scores):
        po = binary_mask_to_polygon(mask)
        polygons += po
        categories += ["{} {:.0%}".format(INST_CATEGORIES[category], score)] * len(po)
    
#     img_draw = draw_polygons(img_filename, polygons, categories, thickness=thickness[dataset_name])
#     fig.add_subplot(3, 1, 2)
#     plt.imshow(img_draw)
#     plt.axis('off')
#     plt.title("Pred_poly: {}".format(os.path.basename(d["file_name"])))

    # Draw fitted bounding boxes
    fitted_rbboxs = fit_polygens_to_rotated_bboxes(polygons)
#     print("Fitted rotated bounding boxes: ")
#     for i, rbox in enumerate(fitted_rbboxs):
#         print(i, rbox)
    img_draw = draw_rotated_bboxes(img_filename, fitted_rbboxs, categories, thickness=thickness[dataset_name], color=None)
    fig.add_subplot(1, 2, 2)
    plt.imshow(img_draw)
    plt.axis('off')
    plt.title("Pred_rotated_bboxes: {}".format(os.path.basename(d["file_name"])))
    save_filename = os.path.join(
        save_dir, 
        "{}_thres_{}_rotated_bbox{}".format(filename, cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST, file_extension))
    cv2.imwrite(save_filename, cv2.cvtColor(img_draw, cv2.COLOR_BGR2RGB))
    
    
#     # Show original image
#     fig.add_subplot(1, 3, 3)
#     rgb_im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
#     plt.imshow(rgb_im)
#     plt.axis('off')
#     plt.title("Original: {}".format(os.path.basename(d["file_name"])))
    
    save_filename = os.path.join(
        save_dir, 
        "benchmark_{}_thres_{}{}".format(filename, cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST, file_extension))
    fig.savefig(save_filename, bbox_inches='tight')