In [24]:
# Library imports
from mrcnn.config import Config
from mrcnn import model as modellib
from mrcnn import visualize
from mrcnn import utils
from imutils import paths
import numpy as np
import imutils
import random
import cv2
import os
import json
import matplotlib.pyplot as plt
import time

In [25]:
# for a bigger display
import matplotlib
matplotlib.rcParams['figure.figsize']= (7,7)

In [26]:
# path variables
model_path= "neoexp_logs/neoplast20210228T1521/mask_rcnn_neoplast_0049.h5"
test_img_path= "datset/val_img_paths.txt"

In [27]:
# for loading configurations
img_path= "datset/images"
mask_path= "datset/masks"
CLASS_NAMES = {1:"NeoPlast"}
CLASS_COLORS={1:(1.0,0.0,0.0)}
COCO_PATH = "mask_rcnn_coco.h5"
LOGS_AND_MODEL_DIR = "neoexp_logs"

In [28]:
# load data
data_split= 0.8
exp_split= 1.0
random.seed(37)

In [29]:
img_path_list= sorted(list(paths.list_images(img_path)))

In [30]:
exp_img_num= int(len(img_path_list)*exp_split)
exp_ids= [random.randint(0, len(img_path_list)-1) for i in range(exp_img_num)]
exp_img_pathlist= np.array(img_path_list)[exp_ids]

In [31]:
idx= list(range(0, len(exp_img_pathlist)))
random.shuffle(idx)
num_train= int((len(exp_img_pathlist)-500)* data_split)
trainIdxs= idx[:num_train]
valIdxs= idx[num_train:len(idx)-500]
testIdxs= idx[len(idx)-500:]

In [32]:
# create a sub-class and inherit the primary configuration class
class NeoPlastConfig(Config):
    # give the configuration a name
    NAME = "NeoPlast"

    # set the number of GPUs to use training along with the number of
    # images per GPU which acts as the batch size
    # set other hyperparameters in config file
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    LEARNING_RATE= 0.001
    RPN_NMS_THRESHOLD= 0.8
    WEIGHT_DECAY= 0.0001
    USE_MINI_MASK= False

    # set the number of steps per training epoch and validation cycle
    STEPS_PER_EPOCH = len(trainIdxs) // (IMAGES_PER_GPU * GPU_COUNT)
    VALIDATION_STEPS = len(valIdxs) // (IMAGES_PER_GPU * GPU_COUNT)

    # number of classes (+1 for the background)
    NUM_CLASSES = len(CLASS_NAMES) + 1

In [33]:
# class inherited from the initial config file
# and stores inference configurations
class NeoPlastInferenceConfig(NeoPlastConfig):
    # set the number of GPUs and images per GPU
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

    # set the minimum detection confidence (used to prune out false
    # positive detections)
    DETECTION_MIN_CONFIDENCE = 0.8

In [34]:
# primary data loader class
class NeoPlastDataset(utils.Dataset):
    def __init__(self, imagePaths, classNames, width=256):
        # call the parent constructor
        super().__init__(self)

        # store the image paths and class names along with the width details
        # for further image processing
        self.imagePaths = imagePaths
        self.classNames = classNames
        self.width = width

    def load_cells(self, idxs):
        # loop over all class names and add each to the dataset
        for (classID, label) in self.classNames.items():
            self.add_class("NeoPlast", classID, label)

        # loop over the image path indices
        for i in idxs:
            # extract the image filename to serve as the unique ID
            imagePath = self.imagePaths[i]
            filename = imagePath.split(os.path.sep)[-1]
            # add the image to the dataset
            self.add_image("NeoPlast", image_id=filename,
                path=imagePath)

    def load_image(self, imageID):
        # grab the image path, load it, and convert it from BGR to
        # RGB color channel ordering because of OpenCV
        p = self.image_info[imageID]["path"]
        image = cv2.imread(p)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # resize the image, preserving the aspect ratio
        image = imutils.resize(image, width=self.width)

        # return the image
        return image

    def load_mask(self, imageID):
        # from image filename extract mask filename and path
        filename= self.image_info[imageID]["id"].split(".")[0]
        filenum= filename.split("_")[1]
        annotPath= os.path.sep.join([mask_path, "mask_"+filenum+".png"])
        
        # load mask and resize to width using nearest-neighbour interpolation
        annotMask = cv2.imread(annotPath)
        annotMask = cv2.split(annotMask)[0]
        annotMask = imutils.resize(annotMask, width=self.width,
                                   inter=cv2.INTER_NEAREST)

        # determine the number of unique classes (instances) which is only 2
        classIDs = np.unique(annotMask)

        # remove background class
        classIDs = np.delete(classIDs, [0])

        # create a final mask container to store all the instances of image
        masks = np.zeros((annotMask.shape[0], annotMask.shape[1], int(len(classIDs))),
                         dtype="uint8")

        # loop over the class IDs
        for (i, classID) in enumerate(classIDs):
            # construct a mask for only the current instance
            classMask = np.zeros(annotMask.shape, dtype="uint8")
            classMask[annotMask == classID] = 1

            # store the class mask in the masks container
            masks[:, :, i] = classMask

        # return the mask array and class IDs as respective data-types
        return (masks.astype("bool"), np.ones((masks.shape[-1],), dtype="int32"))

In [35]:
testDataset = NeoPlastDataset(exp_img_pathlist, CLASS_NAMES)
testDataset.load_cells(testIdxs)
testDataset.prepare()

In [36]:
inference_config = NeoPlastInferenceConfig()
model = modellib.MaskRCNN(mode="inference", 
                          config=inference_config,
                          model_dir=LOGS_AND_MODEL_DIR)
model.load_weights(model_path, by_name=True)

Re-starting from epoch 49


## Intersection over Union metric explanation
It is the measure of the ratio between the intersection and the union of the predicted bounding-box and the ground-truth bounding-box for a detected object in an image, by the model.
<br>
<img src="ref_images/iou_equation.png" width="500px">
<br>
So depending upon this score, a bounding-box prediction can be classified as either bad or good as shown below
<br>
<img src="ref_images/IOU_classes.png" width="500px">

### Mean Average Precision metric explanation
<br>
This metric is computed in 3 steps

- Compute the precision, which is number of correctly predicted objects
- Compute the recall, which measures how good of a job we did finding all the objects
- Average together the maximum precision value across all recall levels in steps of size s

In [37]:
num_images= 400

In [38]:
# Compute VOC-Style mAP @ IoU=0.5
start_time= time.time()
image_ids = np.random.choice(testDataset.image_ids, num_images)
APs = []
for image_id in image_ids:
    # Load image and ground truth data
    image, image_meta, gt_class_id, gt_bbox, gt_mask =\
        modellib.load_image_gt(testDataset, inference_config,
                               image_id)
    molded_images = np.expand_dims(modellib.mold_image(image, inference_config), 0)
    # Run object detection
    results = model.detect([image], verbose=0)
    r = results[0]
    # Compute AP
    AP, precisions, recalls, overlaps =\
        utils.compute_ap(gt_bbox, gt_class_id, gt_mask,
                         r["rois"], r["class_ids"], r["scores"], r['masks'])
    APs.append(AP)
    
print("mAP: {}\n".format(np.mean(APs)))
print("Operated on {} images in {:.3f}s".format(num_images, time.time()-start_time))

mAP: 0.752067281211984

Operated on 400 images in 558.712s


### Panoptic Quality metrics
<br>
It is similar to mAP but also includes IoU in the actual metric calculation. It is a combination of IoU and Fn-Score generated from the model and hence measures the model's precison+recall performance and also the quality of segmentation done by the model.
<br>
<img src='ref_images/pan_metrics.png' width='500px'>

In [40]:
def remap_label(pred, by_size=False):
    pred_id = list(np.unique(pred))
    pred_id.remove(0)
    if len(pred_id) == 0:
        return pred # no label
    if by_size:
        pred_size = []
        for inst_id in pred_id:
            size = (pred == inst_id).sum()
            pred_size.append(size)
        # sort the id by size in descending order
        pair_list = zip(pred_id, pred_size)
        pair_list = sorted(pair_list, key=lambda x: x[1], reverse=True)
        pred_id, pred_size = zip(*pair_list)

    new_pred = np.zeros(pred.shape, np.int32)
    for idx, inst_id in enumerate(pred_id):
        new_pred[pred == inst_id] = idx + 1    
    return new_pred

In [41]:
def get_fast_pq(true, pred, match_iou=0.5):
    assert match_iou >= 0.0, "Cant' be negative"
    
    true = np.copy(true)
    pred = np.copy(pred)
    true_id_list = list(np.unique(true))
    pred_id_list = list(np.unique(pred))

    true_masks = [None,]
    for t in true_id_list[1:]:
        t_mask = np.array(true == t, np.uint8)
        true_masks.append(t_mask)
    
    pred_masks = [None,]
    for p in pred_id_list[1:]:
        p_mask = np.array(pred == p, np.uint8)
        pred_masks.append(p_mask)

    # prefill with value
    pairwise_iou = np.zeros([len(true_id_list) -1, 
                             len(pred_id_list) -1], dtype=np.float64)

    # caching pairwise iou
    for true_id in true_id_list[1:]: # 0-th is background
        t_mask = true_masks[true_id]
        pred_true_overlap = pred[t_mask > 0]
        pred_true_overlap_id = np.unique(pred_true_overlap)
        pred_true_overlap_id = list(pred_true_overlap_id)
        for pred_id in pred_true_overlap_id:
            if pred_id == 0: # ignore
                continue # overlaping background
            p_mask = pred_masks[pred_id]
            total = (t_mask + p_mask).sum()
            inter = (t_mask * p_mask).sum()
            iou = inter / (total - inter)
            pairwise_iou[true_id-1, pred_id-1] = iou
    #
    if match_iou >= 0.5:
        paired_iou = pairwise_iou[pairwise_iou > match_iou]
        pairwise_iou[pairwise_iou <= match_iou] = 0.0
        paired_true, paired_pred = np.nonzero(pairwise_iou)
        paired_iou = pairwise_iou[paired_true, paired_pred]
        paired_true += 1 # index is instance id - 1
        paired_pred += 1 # hence return back to original
    else:  # * Exhaustive maximal unique pairing
        #### Munkres pairing with scipy library
        # the algorithm return (row indices, matched column indices)
        # if there is multiple same cost in a row, index of first occurence 
        # is return, thus the unique pairing is ensure
        # inverse pair to get high IoU as minimum   
        paired_true, paired_pred = linear_sum_assignment(-pairwise_iou)
        ### extract the paired cost and remove invalid pair 
        paired_iou = pairwise_iou[paired_true, paired_pred]

        # now select those above threshold level
        # paired with iou = 0.0 i.e no intersection => FP or FN
        paired_true = list(paired_true[paired_iou > match_iou] + 1)
        paired_pred = list(paired_pred[paired_iou > match_iou] + 1)
        paired_iou = paired_iou[paired_iou > match_iou]

    # get the actual FP and FN
    unpaired_true = [idx for idx in true_id_list[1:] if idx not in paired_true]
    unpaired_pred = [idx for idx in pred_id_list[1:] if idx not in paired_pred]
    # print(paired_iou.shape, paired_true.shape, len(unpaired_true), len(unpaired_pred))

    #
    tp = len(paired_true)
    fp = len(unpaired_pred)
    fn = len(unpaired_true)
    # get the F1-score i.e DQ
    dq = tp / (tp + 0.5 * fp + 0.5 * fn)
    # get the SQ, no paired has 0 iou so not impact
    sq = paired_iou.sum() / (tp + 1.0e-6)

    return [dq, sq, dq * sq], [paired_true, paired_pred, unpaired_true, unpaired_pred]

In [42]:
with open("datset/val_img_paths.txt", "r") as f:
    arr= f.readlines()
    f.close()
    
with open("datset/val_img_types.txt", "r") as f:
    arr_types= f.readlines()
    f.close()  

In [43]:
num_chosen_img= 400

In [44]:
img_type_list= [i.splitlines()[0] for i in arr_types]
img_type_list= img_type_list[:num_chosen_img]

In [45]:
tissue_types = [
                'gland',
                'Bile-duct',
                'Bladder',
                'Breast',
                'Cervix',
                'Colon',
                'Esophagus',
                'HeadNeck',
                'Kidney',
                'Liver',
                'Lung',
                'Ovarian',
                'Pancreatic',
                'Prostate',
                'Skin',
                'Stomach',
                'Testis',
                'Thyroid',
                'Uterus'
                ]

In [46]:
bPQ=[]

for i in range(num_chosen_img):
    image = cv2.imread(arr[i].splitlines()[0])
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = imutils.resize(image, width=256)
    mask_inf_path= "datset/masks\\mask_"+arr[i].splitlines()[0].split('\\')[-1].split('_')[-1]
    r = model.detect([image], verbose=0)[0]
    img_mask= cv2.imread(mask_inf_path)[:,:,0]
    remaped_gt= remap_label(img_mask)
    temp_mask= np.zeros((256, 256), dtype='int32')
    count=1
    for i in range(r['masks'].shape[2]):
        tmp_mask= r['masks'][:,:,i]
        tmp_mask_c= 1- tmp_mask
        temp_mask*=tmp_mask_c
        temp_mask+=count*tmp_mask
        count+=1
    [_, _, pq_bin], _ = get_fast_pq(remaped_gt, temp_mask)
    bPQ.append([pq_bin])

bPQ_final = np.nanmean([pq_bin[0] for pq_bin in bPQ])
print("Binary Panoptic Quality-> {}\n".format(bPQ_final))
print("Panoptic Quality per Tissue Type:-\n")

for tissue_name in tissue_types:
    indices = [i for i, x in enumerate(img_type_list) if x == tissue_name]
    tissue_PQ = [bPQ[i] for i in indices]
    print('{} PQ: {} '.format(tissue_name, np.nanmean(tissue_PQ)))

Binary Panoptic Quality-> 0.6708943908513941

Panoptic Quality per Tissue Type:-

gland PQ: 0.7282411973028374 
Bile-duct PQ: 0.6732049788341709 
Bladder PQ: 0.7943104529246043 
Breast PQ: 0.6682740679074229 
Cervix PQ: 0.6574118006836814 
Colon PQ: 0.5697431448381328 
Esophagus PQ: 0.7280822814734743 
HeadNeck PQ: 0.632452340651145 
Kidney PQ: 0.6469073520634757 
Liver PQ: 0.7257249424863959 
Lung PQ: 0.605158889030525 
Ovarian PQ: 0.7436484045044601 
Pancreatic PQ: 0.5914464468713281 
Prostate PQ: 0.7470174022050943 
Skin PQ: 0.6955592990718643 
Stomach PQ: 0.7751847258972315 
Testis PQ: 0.6626548017565101 
Thyroid PQ: 0.6497397746335498 
Uterus PQ: 0.6156768926627144 
