In [1]:
import numpy as np
np.__version__

'1.14.6'

In [1]:
import os
import sys
import random
import json
import datetime
import cv2
import numpy as np
import skimage.io
import SimpleITK as sitk
from imgaug import augmenters as iaa
from sklearn.model_selection import train_test_split
# Root directory of the project
ROOT_DIR = os.path.abspath("../../")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils

from mrcnn import model as modellib
from mrcnn import visualize
%matplotlib inline 

# Path to trained weights file
COCO_WEIGHTS_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")

# Directory to save logs and model checkpoints, if not provided
# through the command line argument --logs
DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")

# Results directory
# Save submission files here
RESULTS_DIR = os.path.join(ROOT_DIR, "results/breast/")

# The dataset doesn't have a standard train/val split, so I picked
# a variety of images to surve as a validation set.
# VAL_IMAGE_IDS = 

Using TensorFlow backend.


In [2]:
############################################################
#  Configurations
############################################################

class BreastConfig(Config):
    """Configuration for training on the breast segmentation dataset."""
    # Give the configuration a recognizable name
    NAME = "breast"

    # Adjust depending on your GPU memory
    IMAGES_PER_GPU = 2

    # Number of classes (including background)
    NUM_CLASSES = 1 + 1  # Background + nucleus

    # Number of training and validation steps per epoch
    STEPS_PER_EPOCH = (2470 - 737) // IMAGES_PER_GPU
    VALIDATION_STEPS = max(1, 737 // IMAGES_PER_GPU)

    # Don't exclude based on confidence. Since we have two classes
    # then 0.5 is the minimum anyway as it picks between nucleus and BG
    DETECTION_MIN_CONFIDENCE = 0

    # Backbone network architecture
    # Supported values are: resnet50, resnet101
    BACKBONE = "resnet50"

    # Input image resizing
    # Random crops of size 512x512
    IMAGE_RESIZE_MODE = "square"

    IMAGE_CHANNEL_COUNT = 1
    # Length of square anchor side in pixels
    RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)

    # ROIs kept after non-maximum supression (training and inference)
    POST_NMS_ROIS_TRAINING = 1000
    POST_NMS_ROIS_INFERENCE = 2000

    # Non-max suppression threshold to filter RPN proposals.
    # You can increase this during training to generate more propsals.
    RPN_NMS_THRESHOLD = 0.9

    # How many anchors per image to use for RPN training
    RPN_TRAIN_ANCHORS_PER_IMAGE = 64

    # Image mean (RGB)
    MEAN_PIXEL = 43.5

    # If enabled, resizes instance masks to a smaller size to reduce
    # memory load. Recommended when using high-resolution images.
    USE_MINI_MASK = True
    MINI_MASK_SHAPE = (56, 56)  # (height, width) of the mini-mask

    # Number of ROIs per image to feed to classifier/mask heads
    # The Mask RCNN paper uses 512 but often the RPN doesn't generate
    # enough positive proposals to fill this and keep a positive:negative
    # ratio of 1:3. You can increase the number of proposals by adjusting
    # the RPN NMS threshold.
    TRAIN_ROIS_PER_IMAGE = 128

    # Maximum number of ground truth instances to use in one image
    MAX_GT_INSTANCES = 200

    # Max number of final detections per image
    DETECTION_MAX_INSTANCES = 400


class BreastInferenceConfig(BreastConfig):
    # Set batch size to 1 to run one image at a time
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    # Don't resize imager for inferencing
    IMAGE_RESIZE_MODE = "none"
    # Non-max suppression threshold to filter RPN proposals.
    # You can increase this during training to generate more propsals.
    RPN_NMS_THRESHOLD = 0.7

In [3]:
############################################################
#  Dataset
############################################################

class BreastDataset(utils.Dataset):

    def load_breast(self, dataset_dir, subset):
        """Load a subset of the breast dataset.

        dataset_dir: Root directory of the dataset
        subset: Subset to load. Either the name of the sub-directory,
                such as stage1_train, stage1_test, ...etc. or, one of:
                * train: stage1_train excluding validation images
                * val: validation images from VAL_IMAGE_IDS
        """
        # Add classes. We have one class.
        # Naming the dataset nucleus, and the class nucleus
        self.add_class("breast", 1, "breast")

        # Which subset?
        # "val": use hard-coded list above
        # "train": use data from stage1_train minus the hard-coded list above
        # else: use the data from the specified sub-directory
        assert subset in ["train", "val", "test"]
        subset_dir = "train" if subset in ["train", "val"] else subset
        dataset_dir = os.path.join(dataset_dir, subset_dir)
 #       if subset == "test":
        image_ids = os.listdir(dataset_dir)
   #     else:
   #         x = os.listdir(dataset_dir)
     #       y = np.ones(len(x))
       #     train_x, val_x, _, _, = train_test_split(x, y, test_size=0.3, random_state=7)
         #   if subset == "val":
           #     image_ids = val_x
           # else:
                # Get image ids from directory names
             #   image_ids = train_x

        # Add images
        for image_id in image_ids:
            self.add_image(
                "breast",
                image_id=image_id,
                path=os.path.join(dataset_dir, image_id, "images/000000.png"))

    def load_image(self, image_id):
        """Load the specified image and return a [H,W,3] Numpy array.
        """
        # Load image
        #ds = sitk.ReadImage(self.image_info[image_id]['path'])
        #image = sitk.GetArrayFromImage(ds)
        #image = np.squeeze(image)
        image = cv2.imread(self.image_info[image_id]['path'],cv2.IMREAD_ANYDEPTH)
        # image = img_as_ubyte(image)
        # If grayscale. Convert to RGB for consistency.
       # print(image.ndim)
        #if image.ndim != 3:
         #   image = skimage.color.gray2rgb(image)
        # If has an alpha channel, remove it for consistency
        if image.shape[-1] == 4:
            image = image[..., :3]
        return image
    
    def load_mask(self, image_id):
        """Generate instance masks for an image.
       Returns:
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks.
        """
        info = self.image_info[image_id]
        # Get mask directory from image path
        mask_dir = os.path.join(os.path.dirname(os.path.dirname(info['path'])), "masks")

        # Read mask files from .png image
        mask = []
        mask_name = []
        for f in os.listdir(mask_dir):
            if f.endswith(".png"):
                #ds = sitk.ReadImage(os.path.join(mask_dir, f))
                #m = sitk.GetArrayFromImage(ds).astype(np.bool)
                #m = np.squeeze(m)
                m = cv2.imread(os.path.join(mask_dir,f),cv2.IMREAD_ANYDEPTH)
                mask.append(m)
                mask_name.append(os.path.splitext(f)[0])
        mask = np.stack(mask, axis=-1)
        # Return mask, and array of class IDs of each instance. Since we have
        # one class ID, we return an array of ones
        return mask, mask_name

    def image_reference(self, image_id):
        """Return the path of the image."""
        info = self.image_info[image_id]
        if info["source"] == "breast":
            return info["id"]
        else:
            super(self.__class__, self).image_reference(image_id)

In [4]:
from PIL import Image

In [5]:
def normalize(img):
    normalized_img = ((img - np.min(img))/(np.max(img) - np.min(img)))*255
    return normalized_img

In [6]:
def get_patches(dataset, config, image_id, num):
    
    image, _, mask_name, bbox, mask =\
    modellib.load_image_gt(dataset, config, image_id, augment=False, augmentation=None, use_mini_mask=False)
#    image = dataset.load_image(image_id)
 #   mask, class_ids = dataset.load_mask(image_id)
 #   _idx = np.sum(mask, axis=(0, 1)) > 0
 #   mask = mask[:, :, _idx]
 #   bbox = utils.extract_bboxes(mask)
    h, w = image.shape
    patches = np.zeros([mask.shape[-1], 4], dtype=np.int32)
    mask_p = []
    for i in range(mask.shape[-1]):
        y1, x1, y2, x2 = bbox[i]
        
        if y2-y1<num and x2-x1<num:
            y = random.randint(0, num-(y2-y1))
            x = random.randint(0, num-(x2-x1))
            patches[i] = np.array([max(y1-y, 0), max(x1-x, 0), min(y1+num-y, h), min(x1+num-x, w)])
        elif y2-y1<num and x2-x2>=num:
            y = random.randint(0, num-(y2-y1))
            patches[i] = np.array([max(y1-y, 0), x1, min(y1+num-y, h), x2])
        elif y2-y1>=num and x2-x1<num:
            x = random.randint(0, num-(x2-x1))
            patches[i] = np.array([y1, max(x1-x, 0), y2, min(x1+num-x, w)])
        else:
            patches[i] = np.array([y1, x1, y2, x2])
        
    
    image = image[np.amin(patches, axis=0)[0]:np.amax(patches, axis=0)[2], np.amin(patches, axis=0)[1]:np.amax(patches, axis=0)[3]]
    for i in range(mask.shape[-1]):
            mask_p.append(mask[:,:,i][np.amin(patches, axis=0)[0]:np.amax(patches, axis=0)[2], np.amin(patches, axis=0)[1]:np.amax(patches, axis=0)[3]])
    print(image.shape,mask_p[0].shape)
    return image, mask_p, mask_name



In [7]:
def save_patches(dataset, config, image_id, image_info, out_path, num):
    image, mask, mask_name = get_patches(dataset, config, image_id, num)
    #image = normalize(image)

    out_img = out_path + image_info[image_id]['id'] + '/'+ 'images'
    folder = os.path.exists(out_img)
    if not folder:  #判断是否存在文件夹如果不存在则创建为文件夹
        os.makedirs(out_img) 
     
    out_img = os.path.join(out_img,'000000.png')
  #  image = image.astype(np.uint16)
    
    print(np.max(image))
    #img = Image.fromarray(image)
    #print(np.max(img))
    #img.save(out_img)
    cv2.imwrite(out_img, image, [int(cv2.IMWRITE_PNG_COMPRESSION), 0])
    print(out_img)
    #skimage.io.imshow(np.squeeze(mask))
    for i in range(len(mask)):
        mask[i] = np.where(mask[i], 255, 0)
        
        out_mask = out_path + image_info[image_id]['id'] + '/'+ 'masks'
        folder1 = os.path.exists(out_mask)
        if not folder1:  #判断是否存在文件夹如果不存在则创建为文件夹
            os.makedirs(out_mask)
        out_mask = os.path.join(out_mask, mask_name[i]+'.png')
        cv2.imwrite(out_mask, mask[i],[int(cv2.IMWRITE_PNG_COMPRESSION), 0])
        print(out_mask)
    #print(out_img.shape,out_mask.shape)
    

In [4]:
datasetdir='/Extended/backup1/yuxin/CBIS-MASS-PNG/'

subset='train'#test

breast = BreastDataset()

breast.load_breast(datasetdir,subset)

breast.prepare()

In [5]:
cfg = BreastInferenceConfig()
image_info=breast.image_info
image_ids=breast.image_ids

In [12]:
_, _, _, bbox, _ =\
    modellib.load_image_gt(breast, cfg, 67, augment=False, augmentation=None, use_mini_mask=False)
print(bbox)
print(image_info[67]['id'])


[[   0 2366  399 2936]
 [1383 1587 1860 1952]]
Mass-Training_P_01600_RIGHT_MLO


In [16]:
print(np.amax(bbox,axis=0))

[1383 2366 1860 2936]


In [6]:
min_y=7000
max_y=0
min_x=7000
max_x=0
ht=0
wt=0
for i in image_ids:
    image=breast.load_image(i)
    h,w=image.shape
    if h>ht:ht=h
    if w>wt:wt=w
    mask, _ = breast.load_mask(i)
    bbox = utils.extract_bboxes(mask)
    if np.amin(bbox,axis=0)[0]<min_y:min_y=np.amin(bbox,axis=0)[0]
    if np.amax(bbox,axis=0)[2]>max_y:max_y=np.amax(bbox,axis=0)[2]
    if np.amin(bbox,axis=0)[1]<min_x:min_x=np.amin(bbox,axis=0)[1]
    if np.amax(bbox,axis=0)[3]>max_x:max_x=np.amax(bbox,axis=0)[3]
print(min_y,min_x,max_y,max_x,ht,wt)    

0 0 5962 4088 6931 5386


In [43]:
breast.image_info[109]['id']

'Mass-Training_P_00044_RIGHT_CC'

In [44]:
mage = breast.load_image(109)
mask, mask_name = breast.load_mask(109)
print(mage.shape,mask.shape,mask_name)

(4504, 2888) (4504, 2888, 4) ['1000001', '3000000', '4000000', '2000000']


In [45]:
out_path = '/Extended/backup1/yuxin/CBIS-Mass-Patches2048/'
save_patches(breast,cfg,109,image_info,out_path,1024)
#info = image_info[109]
#print()

TypeError: only integer scalar arrays can be converted to a scalar index

In [14]:
print(info['path'])
#os.path.getsize(os.path.join(os.path.dirname(os.path.dirname(info['path'])),'masks/1000001.dcm'))

NameError: name 'info' is not defined

In [22]:
for i in image_ids:
    try:    
        save_patches(breast, cfg, i, image_info, out_path, 1024)
        print(i)
    except ValueError:
        print("!!!!")
    

45623
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00587_RIGHT_CC/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00587_RIGHT_CC/masks/1000001.png
0
34111
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00099_LEFT_MLO/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00099_LEFT_MLO/masks/1000001.png
1
61449
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00296_LEFT_MLO/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00296_LEFT_MLO/masks/1000001.png
2
65535
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01716_RIGHT_CC/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01716_RIGHT_CC/masks/1000001.png
3
61273
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00969_LEFT_CC/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00969_LEFT_CC/masks/3000001.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00969_LEFT_CC/masks/1000001.png
4
65535
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01815_

55505
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00947_RIGHT_MLO/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00947_RIGHT_MLO/masks/1000001.png
47
56738
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00928_RIGHT_CC/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00928_RIGHT_CC/masks/1000001.png
48
46676
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01787_LEFT_MLO/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01787_LEFT_MLO/masks/1000001.png
49
56055
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00212_RIGHT_CC/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00212_RIGHT_CC/masks/1000001.png
50
54778
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00766_LEFT_MLO/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00766_LEFT_MLO/masks/1000001.png
51
61296
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00358_RIGHT_CC/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Tes

52661
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00758_LEFT_CC/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00758_LEFT_CC/masks/1000001.png
93
54272
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01331_LEFT_MLO/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01331_LEFT_MLO/masks/1000001.png
94
55560
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01365_LEFT_CC/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01365_LEFT_CC/masks/2000001.png
95
49195
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01251_LEFT_MLO/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01251_LEFT_MLO/masks/1000001.png
96
59512
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00623_LEFT_CC/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00623_LEFT_CC/masks/1000001.png
97
47535
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00369_LEFT_MLO/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_0036

65535
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00209_LEFT_MLO/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00209_LEFT_MLO/masks/1000001.png
140
54360
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00194_RIGHT_MLO/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00194_RIGHT_MLO/masks/1000001.png
141
45906
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01331_LEFT_CC/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01331_LEFT_CC/masks/1000001.png
142
38060
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00278_RIGHT_CC/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00278_RIGHT_CC/masks/1000001.png
143
65535
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00652_LEFT_CC/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00652_LEFT_CC/masks/1000001.png
144
39653
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00677_RIGHT_MLO/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-T

65535
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01257_RIGHT_MLO/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01257_RIGHT_MLO/masks/1000001.png
187
53501
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00820_LEFT_CC/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00820_LEFT_CC/masks/1000001.png
188
58903
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00962_RIGHT_CC/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00962_RIGHT_CC/masks/1000001.png
189
41876
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01666_RIGHT_MLO/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01666_RIGHT_MLO/masks/1000001.png
190
65535
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00656_RIGHT_MLO/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00656_RIGHT_MLO/masks/1000001.png
191
48543
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01796_LEFT_MLO/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/M

/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01090_LEFT_CC/masks/3000001.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01090_LEFT_CC/masks/2000001.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01090_LEFT_CC/masks/1000001.png
233
51630
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01825_RIGHT_CC/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01825_RIGHT_CC/masks/1000001.png
234
42514
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00118_RIGHT_CC/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00118_RIGHT_CC/masks/1000001.png
235
60613
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00591_RIGHT_MLO/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00591_RIGHT_MLO/masks/3000001.png
236
60327
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00429_LEFT_CC/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00429_LEFT_CC/masks/1000001.png
237
54321
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01

52092
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00147_RIGHT_CC/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00147_RIGHT_CC/masks/1000001.png
280
59776
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00156_RIGHT_MLO/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00156_RIGHT_MLO/masks/1000001.png
281
62073
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01347_RIGHT_MLO/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01347_RIGHT_MLO/masks/1000001.png
282
55527
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01697_LEFT_CC/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01697_LEFT_CC/masks/1000001.png
283
51564
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01477_LEFT_CC/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01477_LEFT_CC/masks/1000001.png
284
54668
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00173_LEFT_CC/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-T

54778
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00296_LEFT_CC/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00296_LEFT_CC/masks/1000001.png
327
54118
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01566_RIGHT_MLO/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01566_RIGHT_MLO/masks/3000001.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01566_RIGHT_MLO/masks/2000001.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_01566_RIGHT_MLO/masks/1000001.png
328
46275
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00500_RIGHT_MLO/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00500_RIGHT_MLO/masks/1000001.png
329
50518
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00494_RIGHT_CC/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00494_RIGHT_CC/masks/1000001.png
330
51894
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P_00056_LEFT_MLO/images/000000.png
/home/xyu/CBIS-Mass-Patches1024/test/Mass-Test_P

In [44]:
for i in image_ids:
    path=os.path.join("/home/xyu/CBIS-Mass-Patches1024/train/", image_info[i]["id"], "images/000000.png")
    img = cv2.imread(path, cv2.IMREAD_ANYDEPTH)
    print(np.max(img))

55307
55932
54867
59402
51036
57871
55518
39256
65535
47513
65535
56870
44909
62682
55791
59402
58962
59594
55197
54636
56231
49825
56716
65535
55844
54153
46558
43943
48585
59732
58918
37666
56254
54788
56672
51762
57453
65535
61956
53634
56672
49132
45553
52136
42949
61141
56099
52973
51036
60040
58653
59820
58455
61141
56659
56394
56143
65535
28081
51630
46338
65535
57241
51663
55153
35056
58668
62286
55175
60987
55725
59488
50089
61626
58039
42955
65535
52357
43585
56981
45369
51043
53700
56570
57553
55549
50277
52711
52885
65535
54006
44511
56320
65535
54646
47514
53410
51938
61380
52842
54294
65535
51113
51069
65535
50133
52290
57773
61494
58697
57090
53467
56231
49374
53171
54096
42003
60018
61339
57850
55980
61170
55153
57178
56842
61119
58947
55351
49239
57420
59468
60261
57266
65535
49762
58565
47755
58411
46022
65535
61229
60151
54844
57766
56848
50140
65535
52467
45149
59035
56800
55316
46052
49374
65535
57993
65535
59776
41062
57244
65535
51128
59864
65535
65535
49132
4890

# Evaluation

In [1]:
import os
import sys
import random
import json
import datetime
import cv2
import numpy as np
import skimage.io
import SimpleITK as sitk
import pandas as pd
from imgaug import augmenters as iaa
from sklearn.model_selection import train_test_split
# Root directory of the project
ROOT_DIR = os.path.abspath("../../")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils

from mrcnn import model as modellib
from mrcnn import visualize
%matplotlib inline 

# Path to trained weights file
COCO_WEIGHTS_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")

# Directory to save logs and model checkpoints, if not provided
# through the command line argument --logs
DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")

# Results directory
# Save submission files here
RESULTS_DIR = os.path.join(ROOT_DIR, "results/breast/")

# The dataset doesn't have a standard train/val split, so I picked
# a variety of images to surve as a validation set.
# VAL_IMAGE_IDS = 

Using TensorFlow backend.


In [2]:
class BreastConfig(Config):
    """Configuration for training on the breast segmentation dataset."""
    # Give the configuration a recognizable name
    NAME = "breast"

    # Adjust depending on your GPU memory
    IMAGES_PER_GPU = 1

    # Number of classes (including background)
    NUM_CLASSES = 1 + 1  # Background + nucleus

    # Number of training and validation steps per epoch
    STEPS_PER_EPOCH = (1231 - 369) // IMAGES_PER_GPU
    VALIDATION_STEPS = max(1, 369 // IMAGES_PER_GPU)

    # Don't exclude based on confidence. Since we have two classes
    # then 0.5 is the minimum anyway as it picks between nucleus and BG
    DETECTION_MIN_CONFIDENCE = 0.5

    # Backbone network architecture
    # Supported values are: resnet50, resnet101
    BACKBONE = "resnet101"

    # Input image resizing
    # Random crops of size 512x512
    IMAGE_RESIZE_MODE = "square"
    IMAGE_MIN_DIM = 512
    IMAGE_MAX_DIM = 512
    IMAGE_CHANNEL_COUNT = 3
    IMAGE_MIN_SCALE = 0

    # Length of square anchor side in pixels
    RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)

    # ROIs kept after non-maximum supression (training and inference)
    POST_NMS_ROIS_TRAINING = 1000 
    POST_NMS_ROIS_INFERENCE = 2000

    # Non-max suppression threshold to filter RPN proposals.
    # You can increase this during training to generate more propsals.
    # RPN_NMS_THRESHOLD = 0.9

    # How many anchors per image to use for RPN training
    RPN_TRAIN_ANCHORS_PER_IMAGE = 64
    
    # Grayscale images
    # IMAGE_CHANNEL_COUNT = 1
    # Image mean (Grayscale)
    MEAN_PIXEL = np.array([32768.0,32786.0,32768.0])

    # If enabled, resizes instance masks to a smaller size to reduce
    # memory load. Recommended when using high-resolution images.
    USE_MINI_MASK = True
    MINI_MASK_SHAPE = (56, 56)  # (height, width) of the mini-mask

    # Number of ROIs per image to feed to classifier/mask heads
    # The Mask RCNN paper uses 512 but often the RPN doesn't generate
    # enough positive proposals to fill this and keep a positive:negative
    # ratio of 1:3. You can increase the number of proposals by adjusting
    # the RPN NMS threshold.
    TRAIN_ROIS_PER_IMAGE = 128

    # Maximum number of ground truth instances to use in one image
    MAX_GT_INSTANCES = 200

    # Max number of final detections per image
    DETECTION_MAX_INSTANCES = 10


class BreastInferenceConfig(BreastConfig):
    # Set batch size to 1 to run one image at a time
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    # Don't resize imager for inferencing
    #IMAGE_RESIZE_MODE = "pad64"
    # Non-max suppression threshold to filter RPN proposals.
    # You can increase this during training to generate more propsals.
    RPN_NMS_THRESHOLD = 0.7
    DETECTION_MIN_CONFIDENCE = 0.5

############################################################
#  Dataset
############################################################

In [3]:
class BreastDataset(utils.Dataset):

    def load_breast(self, dataset_dir, subset):
        """Load a subset of the breast dataset.

        dataset_dir: Root directory of the dataset
        subset: Subset to load. Either the name of the sub-directory,
                such as stage1_train, stage1_test, ...etc. or, one of:
                * train: stage1_train excluding validation images
                * val: validation images from VAL_IMAGE_IDS
        """
        # Add classes. We have one class.
        # Naming the dataset nucleus, and the class nucleus
        self.add_class("breast", 1, "breast")
        df = pd.read_csv(CSV_DIR)
        # Which subset?
        # "val": use hard-coded list above
        # "train": use data from stage1_train minus the hard-coded list above
        # else: use the data from the specified sub-directory
        assert subset in ["train", "val", "test"]
        subset_dir = "train" if subset in ["train", "val"] else subset
        dataset_dir = os.path.join(dataset_dir, subset_dir)
        if subset == "test":
            image_ids = []
            for d in os.listdir(dataset_dir):
                _,p, p_id, rl, iv = d.split("_")
                for f in os.listdir(os.path.join(dataset_dir,d,"masks")):
                    # x = df[(df['patient_id']=="P_"+p_id)&(df['left or right breast']==rl)&
                    #        (df['image view']==iv)&(df['abnormality id']==int(f[0]))]['pathology'].values[0]
                    bd = df[(df['patient_id']=="P_"+p_id)&(df['left or right breast']==rl)&
                            (df['image view']==iv)&(df['abnormality id']==int(f[0]))]['breast_density'].values[0]
                    if bd==2 or bd==1:
                        image_ids.append(d)
            #image_ids = os.listdir(dataset_dir)
        else:
            x=[]
            for d in os.listdir(dataset_dir):
                _, p, p_id, rl, iv=d.split("_")
                for f in os.listdir(os.path.join(dataset_dir,d,"masks")):
                   # x = df[(df['patient_id']=="P_"+p_id)&(df['left or right breast']==rl)&
                    #        (df['image view']==iv)&(df['abnormality id']==int(f[0]))]['pathology'].values[0]
                    bd = df[(df['patient_id']=="P_"+p_id)&(df['left or right breast']==rl)&
                            (df['image view']==iv)&(df['abnormality id']==int(f[0]))]['breast_density'].values[0]
                    if bd==2 or bd==1:
                        x.append(d)
            #x = os.listdir(dataset_dir)
            y = np.ones(len(x))
            train_x, val_x, _, _, = train_test_split(x, y, test_size=0.3, random_state=7)
            if subset == "val":
                image_ids = val_x
            else:
                # Get image ids from directory names
                image_ids = train_x

        # Add images
        for image_id in image_ids:
            self.add_image(
                "breast",
                image_id=image_id,
                path=os.path.join(dataset_dir, image_id, "images/000000.png"))

    def load_mask(self, image_id):
        """Generate instance masks for an image.
       Returns:
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks.
        """
        info = self.image_info[image_id]
        # Get mask directory from image path
        mask_dir = os.path.join(os.path.dirname(os.path.dirname(info['path'])), "masks")

        # Read mask files from .png image
        mask = []
        for f in os.listdir(mask_dir):
            if f.endswith(".png"):
                #ds = sitk.ReadImage(os.path.join(mask_dir, f))
                #m = sitk.GetArrayFromImage(ds)
                #m = np.squeeze(m)
                #m = m.astype(np.bool)
                m = skimage.io.imread(os.path.join(mask_dir, f)).astype(np.bool)
                mask.append(m)
        mask = np.stack(mask, axis=-1)
        # Return mask, and array of class IDs of each instance. Since we have
        # one class ID, we return an array of ones
        return mask, np.ones([mask.shape[-1]], dtype=np.int32)

    def image_reference(self, image_id):
        """Return the path of the image."""
        info = self.image_info[image_id]
        if info["source"] == "breast":
            return info["id"]
        else:
            super(self.__class__, self).image_reference(image_id)


In [4]:
class InferenceConfig(BreastInferenceConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

inference_config = InferenceConfig()

# Recreate the model in inference mode
model = modellib.MaskRCNN(mode="inference", 
                          config=inference_config,
                          model_dir=DEFAULT_LOGS_DIR)

# Get path to saved weights
# Either set a specific path or find last trained weights
# model_path = os.path.join(ROOT_DIR, ".h5 file name here")
model_path = '/backup/yuxin/Mask_RCNN/logs/breast20190313T1258/mask_rcnn_breast_0100.h5'
#model_path = model.find_last()

# Load trained weights
print("Loading weights from ", model_path)
model.load_weights(model_path, by_name=True)

Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
Loading weights from  /backup/yuxin/Mask_RCNN/logs/breast20190313T1258/mask_rcnn_breast_0100.h5
Re-starting from epoch 100


In [5]:
datasetdir='/backup/yuxin/CBIS-Mass-Patches1024/'

CSV_DIR = "/backup/yuxin/mass_case_description_test_set.csv"

subset='test'#test

breast_test = BreastDataset()

breast_test.load_breast(datasetdir,subset)

breast_test.prepare()

In [8]:
breast_test.image_info[200]['id']

'Mass-Test_P_00671_LEFT_MLO'

In [9]:
def parse_image_meta(meta):
    """Parses an array that contains image attributes to its components.
    See compose_image_meta() for more details.

    meta: [batch, meta length] where meta length depends on NUM_CLASSES

    Returns a dict of the parsed values.
    """
    image_id = meta[0]
    original_image_shape = meta[1:4]
    image_shape = meta[4:7]
    window = meta[7:11]  # (y1, x1, y2, x2) window of image in in pixels
    scale = meta[11]
    active_class_ids = meta[12:]
    return {
        "image_id": image_id.astype(np.int32),
        "original_image_shape": original_image_shape.astype(np.int32),
        "image_shape": image_shape.astype(np.int32),
        "window": window.astype(np.int32),
        "scale": scale.astype(np.float32),
        "active_class_ids": active_class_ids.astype(np.int32),
    }

In [8]:
def softmax(X):
    exps = np.exp(X)
    return exps / np.sum(exps)


In [9]:
def stable_softmax(X):
    exps = np.exp(X - np.max(X))
    return exps / np.sum(exps)

In [10]:
def cross_entropy(X,y):
    """
    X is the output from fully connected layer (num_examples x num_classes)
    y is labels (num_examples x 1)
    	Note that y is not one-hot encoded vector. 
    	It can be computed as y.argmax(axis=1) from one-hot encoded vectors of labels if required.
    """
    m = y.shape[0]
    p = softmax(X)
    # We use multidimensional array indexing to extract 
    # softmax probability of the correct label for each sample.
    # Refer to https://docs.scipy.org/doc/numpy/user/basics.indexing.html#indexing-multi-dimensional-arrays for understanding multidimensional array indexing.
    log_likelihood = -np.log(p[range(m),y])
    loss = np.sum(log_likelihood) / m
    return loss

In [12]:
import pandas as pd

In [11]:
# Compute VOC-Style mAP @ IoU=0.5
# Running on 10 images. Increase for better accuracy.
image_ids =breast_test.image_ids
print(len(image_ids))
APs = []
recallss = []
overlapss = []

for image_id in image_ids:
   # try:
    # Load image and ground truth data
    image, image_meta, gt_class_id, gt_bbox, gt_mask =\
            modellib.load_image_gt(breast_test, inference_config,
                                   image_id, use_mini_mask=False)
    #print(gt_class_id)
    #molded_images = np.expand_dims(modellib.mold_image(image, inference_config), 0)
    # Run object detection
    results = model.detect([image], verbose=0)
    r = results[0]
    #print(r['class_ids'])
  #  print(image_id,breast_test.image_info[image_id]['id'])
   # print(image.shape,gt_mask.shape,r['masks'].shape)
    # Compute AP
    AP, precisions, recalls, overlaps =\
            utils.compute_ap(gt_bbox, gt_class_id, gt_mask,
                             r["rois"], r["class_ids"], r["scores"], r['masks'])
    #active_class_ids=parse_image_meta(image_meta)['active_class_ids']
    #l_cls=modellib.mrcnn_class_loss_graph(gt_class_id, r['class_ids'], active_class_ids)
    #l_bbx=modelib.mrcnn_bbox_loss_graph(gt_bbox,gt_class_id,r['rois'])
    #l_mask=modelib.mrcnn_mask_loss_graph(gt_mask, gt_class_id, r['masks'])
    APs.append(AP)
   # df = pd.data
   # except:
    #    continue
    print(AP)
    #print(breast_test.image_info[image_id]['id'])
    #recallss.append(recalls)
 #   overlapss.append(overlaps)
print(APs)
print("mAP: ", np.mean(APs))

#print("recall: ", recallss)
#print("overlap: ", np.mean(overlapss))

222
1.0
1.0
1.0
1.0
0.0
1.0
1.0
0.0
1.0
1.0
1.0
0.5
1.0
1.0
0.0
1.0
1.0
1.0
1.0
0.5
0.0
0.0
1.0
1.0
0.0
1.0
0.0
1.0
0.0
0.0
0.0
1.0
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.5
0.0
0.0
1.0
1.0
1.0
1.0
0.0
1.0
1.0
1.0
1.0
1.0
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.5
0.0
1.0
0.16666666666666666
0.16666666666666666
1.0
1.0
0.0
1.0
1.0
0.5
1.0
0.0
0.5
1.0
0.3333333333333333
0.0
1.0
1.0
1.0
1.0
0.0
0.0
1.0
1.0
1.0
0.6666666666666666
0.6666666666666666
1.0
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.0
0.0
0.0
1.0
1.0
1.0
1.0
1.0
1.0
0.1111111111111111
1.0
0.0
0.0
1.0
1.0
1.0
1.0
1.0
1.0
0.5
0.5
1.0
1.0
1.0
1.0
1.0
1.0
0.0
1.0
1.0
1.0
1.0
1.0
1.0
0.0
0.0
1.0
0.0
1.0
1.0
1.0
1.0
1.0
0.5
0.5
0.5
0.0
0.0
1.0
1.0
0.0
1.0
0.0
0.5
0.0
1.0
1.0
0.0
1.0
0.0
1.0
0.0
0.75
0.75
0.75
0.75
1.0
1.0
0.0
0.5
1.0
0.1
0.1
1.0
0.0
1.0
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.0
1.0
0.0
0.0
0.0
0.0
1.0
1.0
0.0
1.0
0.0
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.0
1.0
1.0
0.2
1.0
0.0
1.0
[1.0, 1.0, 1