In [1]:
import os
import sys
import random
import json
import datetime
import cv2
import numpy as np
import skimage.io
import SimpleITK as sitk
import pandas as pd
from imgaug import augmenters as iaa
from sklearn.model_selection import train_test_split
# Root directory of the project
ROOT_DIR = os.path.abspath("../../")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils

from mrcnn import model as modellib
from mrcnn import visualize
%matplotlib inline 

# Path to trained weights file
COCO_WEIGHTS_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")

# Directory to save logs and model checkpoints, if not provided
# through the command line argument --logs
DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")

# Results directory
# Save submission files here
RESULTS_DIR = os.path.join(ROOT_DIR, "results/breast/")

# The dataset doesn't have a standard train/val split, so I picked
# a variety of images to surve as a validation set.
# VAL_IMAGE_IDS = 

Using TensorFlow backend.


In [2]:
class BreastConfig(Config):
    """Configuration for training on the breast segmentation dataset."""
    # Give the configuration a recognizable name
    NAME = "breast"

    # Adjust depending on your GPU memory
    IMAGES_PER_GPU = 1

    # Number of classes (including background)
    NUM_CLASSES = 1 + 1  # Background + nucleus

    # Number of training and validation steps per epoch
    STEPS_PER_EPOCH = (1231 - 369) // IMAGES_PER_GPU
    VALIDATION_STEPS = max(1, 369 // IMAGES_PER_GPU)

    # Don't exclude based on confidence. Since we have two classes
    # then 0.5 is the minimum anyway as it picks between nucleus and BG
    DETECTION_MIN_CONFIDENCE = 0.5

    # Backbone network architecture
    # Supported values are: resnet50, resnet101
    BACKBONE = "resnet101"

    # Input image resizing
    # Random crops of size 512x512
    IMAGE_RESIZE_MODE = "square"
    IMAGE_MIN_DIM = 512
    IMAGE_MAX_DIM = 512
    IMAGE_CHANNEL_COUNT = 3
    IMAGE_MIN_SCALE = 0

    # Length of square anchor side in pixels
    RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)

    # ROIs kept after non-maximum supression (training and inference)
    POST_NMS_ROIS_TRAINING = 1000 
    POST_NMS_ROIS_INFERENCE = 2000

    # Non-max suppression threshold to filter RPN proposals.
    # You can increase this during training to generate more propsals.
    # RPN_NMS_THRESHOLD = 0.9

    # How many anchors per image to use for RPN training
    RPN_TRAIN_ANCHORS_PER_IMAGE = 64
    
    # Grayscale images
    # IMAGE_CHANNEL_COUNT = 1
    # Image mean (Grayscale)
    MEAN_PIXEL = np.array([32768.0,32786.0,32768.0])

    # If enabled, resizes instance masks to a smaller size to reduce
    # memory load. Recommended when using high-resolution images.
    USE_MINI_MASK = True
    MINI_MASK_SHAPE = (56, 56)  # (height, width) of the mini-mask

    # Number of ROIs per image to feed to classifier/mask heads
    # The Mask RCNN paper uses 512 but often the RPN doesn't generate
    # enough positive proposals to fill this and keep a positive:negative
    # ratio of 1:3. You can increase the number of proposals by adjusting
    # the RPN NMS threshold.
    TRAIN_ROIS_PER_IMAGE = 128

    # Maximum number of ground truth instances to use in one image
    MAX_GT_INSTANCES = 200

    # Max number of final detections per image
    DETECTION_MAX_INSTANCES = 10


class BreastInferenceConfig(BreastConfig):
    # Set batch size to 1 to run one image at a time
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    # Don't resize imager for inferencing
    #IMAGE_RESIZE_MODE = "pad64"
    # Non-max suppression threshold to filter RPN proposals.
    # You can increase this during training to generate more propsals.
    RPN_NMS_THRESHOLD = 0.7
    DETECTION_MIN_CONFIDENCE = 0.5

############################################################
#  Dataset
############################################################

In [3]:
class BreastDataset(utils.Dataset):

    def load_breast(self, dataset_dir, subset):
        """Load a subset of the breast dataset.

        dataset_dir: Root directory of the dataset
        subset: Subset to load. Either the name of the sub-directory,
                such as stage1_train, stage1_test, ...etc. or, one of:
                * train: stage1_train excluding validation images
                * val: validation images from VAL_IMAGE_IDS
        """
        # Add classes. We have one class.
        # Naming the dataset nucleus, and the class nucleus
        self.add_class("breast", 1, "breast")
        df = pd.read_csv(CSV_DIR)
        # Which subset?
        # "val": use hard-coded list above
        # "train": use data from stage1_train minus the hard-coded list above
        # else: use the data from the specified sub-directory
        assert subset in ["train", "val", "test"]
        subset_dir = "train" if subset in ["train", "val"] else subset
        dataset_dir = os.path.join(dataset_dir, subset_dir)
        if subset == "test":
            image_ids = []
            for d in os.listdir(dataset_dir):
                _,p, p_id, rl, iv = d.split("_")
                for f in os.listdir(os.path.join(dataset_dir,d,"masks")):
                    # x = df[(df['patient_id']=="P_"+p_id)&(df['left or right breast']==rl)&
                    #        (df['image view']==iv)&(df['abnormality id']==int(f[0]))]['pathology'].values[0]
                    bd = df[(df['patient_id']=="P_"+p_id)&(df['left or right breast']==rl)&
                            (df['image view']==iv)&(df['abnormality id']==int(f[0]))]['breast_density'].values[0]
                    if bd==2 or bd==1:
                        image_ids.append(d)
            #image_ids = os.listdir(dataset_dir)
        else:
            x=[]
            for d in os.listdir(dataset_dir):
                _, p, p_id, rl, iv=d.split("_")
                for f in os.listdir(os.path.join(dataset_dir,d,"masks")):
                   # x = df[(df['patient_id']=="P_"+p_id)&(df['left or right breast']==rl)&
                    #        (df['image view']==iv)&(df['abnormality id']==int(f[0]))]['pathology'].values[0]
                    bd = df[(df['patient_id']=="P_"+p_id)&(df['left or right breast']==rl)&
                            (df['image view']==iv)&(df['abnormality id']==int(f[0]))]['breast_density'].values[0]
                    if bd==2 or bd==1:
                        x.append(d)
            #x = os.listdir(dataset_dir)
            y = np.ones(len(x))
            train_x, val_x, _, _, = train_test_split(x, y, test_size=0.3, random_state=7)
            if subset == "val":
                image_ids = val_x
            else:
                # Get image ids from directory names
                image_ids = train_x

        # Add images
        for image_id in image_ids:
            self.add_image(
                "breast",
                image_id=image_id,
                path=os.path.join(dataset_dir, image_id, "images/000000.png"))

    def load_mask(self, image_id):
        """Generate instance masks for an image.
       Returns:
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks.
        """
        info = self.image_info[image_id]
        # Get mask directory from image path
        mask_dir = os.path.join(os.path.dirname(os.path.dirname(info['path'])), "masks")

        # Read mask files from .png image
        mask = []
        for f in os.listdir(mask_dir):
            if f.endswith(".png"):
                #ds = sitk.ReadImage(os.path.join(mask_dir, f))
                #m = sitk.GetArrayFromImage(ds)
                #m = np.squeeze(m)
                #m = m.astype(np.bool)
                m = skimage.io.imread(os.path.join(mask_dir, f)).astype(np.bool)
                mask.append(m)
        mask = np.stack(mask, axis=-1)
        # Return mask, and array of class IDs of each instance. Since we have
        # one class ID, we return an array of ones
        return mask, np.ones([mask.shape[-1]], dtype=np.int32)

    def image_reference(self, image_id):
        """Return the path of the image."""
        info = self.image_info[image_id]
        if info["source"] == "breast":
            return info["id"]
        else:
            super(self.__class__, self).image_reference(image_id)

    

In [4]:
class InferenceConfig(BreastInferenceConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

inference_config = InferenceConfig()

# Recreate the model in inference mode
model = modellib.MaskRCNN(mode="inference", 
                          config=inference_config,
                          model_dir=DEFAULT_LOGS_DIR)

# Get path to saved weights
# Either set a specific path or find last trained weights
# model_path = os.path.join(ROOT_DIR, ".h5 file name here")
#model_path = '/backup/yuxin/Mask_RCNN/logs/breast20190313T1258/mask_rcnn_breast_0100.h5'
model_path = model.find_last()

# Load trained weights
print("Loading weights from ", model_path)
model.load_weights(model_path, by_name=True)

Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
Loading weights from  /backup/yuxin/Mask_RCNN/logs/breast20190412T1216/mask_rcnn_breast_0068.h5


OSError: Unable to open file (addr overflow, addr = 800, size = 8336, eoa = 2048)

In [4]:
class InferenceConfig(BreastInferenceConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    IMAGE_RESIZE_MODE = "none"

In [5]:
dataset='/backup/yuxin/CBIS-MASS-PNG/'
CSV_DIR = "/backup/yuxin/mass_case_description_train_set.csv"
subset='train'#test

breast_ori = BreastDataset()

breast_ori.load_breast(dataset,subset)

breast_ori.prepare()

In [None]:
a=0
b=2048
for i in breast_ori.image_ids:
    image, image_meta, gt_class_id, gt_bbox, gt_mask = modellib.load_image_gt(breast_ori, InferenceConfig, i, use_mini_mask=False)
    mask, class_id = breast_ori.load_mask(i)
    #print(gt_bbox.shape)
    #print(gt_bbox[0],image.shape,mask.shape)
    if image.shape[:2]!=mask.shape[:2]: print(breast_ori.image_info[i]['id'])
    for l in range(gt_bbox.shape[0]):
        if min(abs(gt_bbox[l][2]-gt_bbox[l][0]),abs(gt_bbox[l][3]-gt_bbox[l][1])) < b:
            b=min(abs(gt_bbox[l][2]-gt_bbox[l][0]),abs(gt_bbox[l][3]-gt_bbox[l][1]))
            print(i,"b:",b)
        if max(abs(gt_bbox[l][2]-gt_bbox[l][0]),abs(gt_bbox[l][3]-gt_bbox[l][1])) > a:
            a=max(abs(gt_bbox[l][2]-gt_bbox[l][0]),abs(gt_bbox[l][3]-gt_bbox[l][1]))
            print(i,"a:",a)
print(a,b)
    

0 b: 212
0 a: 303
1 a: 382
1 a: 419
2 b: 87
17 a: 458
18 a: 655
Mass-Training_P_00703_LEFT_CC
Mass-Training_P_00765_RIGHT_CC
Mass-Training_P_01423_RIGHT_CC
Mass-Training_P_00108_LEFT_CC
Mass-Training_P_00059_LEFT_MLO
Mass-Training_P_00765_RIGHT_MLO
71 a: 1365
Mass-Training_P_01182_LEFT_MLO
81 b: 84
Mass-Training_P_02033_RIGHT_CC
Mass-Training_P_02092_LEFT_MLO
Mass-Training_P_00715_RIGHT_MLO
Mass-Training_P_01686_RIGHT_CC
Mass-Training_P_01048_RIGHT_CC
Mass-Training_P_00694_RIGHT_MLO
Mass-Training_P_01981_RIGHT_CC
Mass-Training_P_00927_LEFT_MLO
Mass-Training_P_02092_LEFT_CC
Mass-Training_P_01908_LEFT_CC
Mass-Training_P_01831_RIGHT_CC
Mass-Training_P_01182_LEFT_CC
Mass-Training_P_01115_RIGHT_CC
Mass-Training_P_01048_RIGHT_MLO
Mass-Training_P_00859_LEFT_CC
Mass-Training_P_00453_LEFT_MLO
Mass-Training_P_01946_RIGHT_MLO
Mass-Training_P_01983_LEFT_MLO


In [20]:
print(breast_ori.image_info[218]['id'])

Mass-Test_P_00813_RIGHT_MLO


In [None]:
datasetdir='/backup/yuxin/CBIS-Mass-Patches1024-New/'

CSV_DIR = "/backup/yuxin/mass_case_description_test_set.csv"

subset='test'#test

breast_test = BreastDataset()

breast_test.load_breast(datasetdir,subset)

breast_test.prepare()

In [None]:
breast_test.image_info[200]['id']

In [None]:
# Compute VOC-Style mAP @ IoU=0.5
# Running on 10 images. Increase for better accuracy.
image_ids =breast_test.image_ids
print(len(image_ids))
APs = []
recallss = []
overlapss = []

for image_id in image_ids:
   # try:
    # Load image and ground truth data
    image, image_meta, gt_class_id, gt_bbox, gt_mask =\
            modellib.load_image_gt(breast_test, inference_config,
                                   image_id, use_mini_mask=False)
    #print(gt_class_id)
    #molded_images = np.expand_dims(modellib.mold_image(image, inference_config), 0)
    # Run object detection
    results = model.detect([image], verbose=0)
    r = results[0]
    print(r['scores'])
    #print(image_id,breast_test.image_info[image_id]['id'])
    #print(image.shape,gt_mask.shape,r['masks'].shape)
    # Compute AP
    AP, precisions, recalls, overlaps =\
            utils.compute_ap(gt_bbox, gt_class_id, gt_mask,
                             r["rois"], r["class_ids"], r["scores"], r['masks'])
    #active_class_ids=parse_image_meta(image_meta)['active_class_ids']
    #l_cls=modellib.mrcnn_class_loss_graph(gt_class_id, r['class_ids'], active_class_ids)
    #l_bbx=modelib.mrcnn_bbox_loss_graph(gt_bbox,gt_class_id,r['rois'])
    #l_mask=modelib.mrcnn_mask_loss_graph(gt_mask, gt_class_id, r['masks'])
    APs.append(AP)
   # df = pd.data
   # except:
    #    continue
    print(AP)
    #print(breast_test.image_info[image_id]['id'])
    #recallss.append(recalls)
 #   overlapss.append(overlaps)
#print(APs)
print("mAP: ", np.mean(APs))

#print("recall: ", recallss)
#print("overlap: ", np.mean(overlapss))