# Developing Brain Atlas through Deep Learning 

## A. Iqbal, R. Khan, T. Karayannis
# .
# .
# .

In [1]:
import os
import sys
import random
import math
import re
import time
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt
from config import Config
import utils
import glob #for selecting png files in training images folder
from natsort import natsorted, ns #for sorting filenames in a directory
import skimage
import pandas
import tensorflow as tf

import model as modellib
import visualize
from model import log

Using TensorFlow backend.


In [2]:
def resetDataDir():
    while os.getcwd() != "C:\\":
        os.chdir('..')

    # Replace the following with the entire path to your data
    os.chdir('C:\\Users\\dal4019\\Documents\\Bst_Reg')

# Root directory of the project
resetDataDir()
ROOT_DIR = os.getcwd()
print(ROOT_DIR)

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "weights")

# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")


C:\Users\dal4019\Documents\Bst_Reg


In [3]:
RGB_MAPPINGS_DIR = 'rgb_mappings_medulla_v2.csv'

resetDataDir()

RGB_MAPPINGS = pandas.read_csv(RGB_MAPPINGS_DIR, usecols = ['Label', 'R', 'G', 'B']).dropna()
RGB_MAPPINGS_MAP = {}
for index, row in RGB_MAPPINGS.iterrows():
    r = row["R"]
    g = row["G"]
    b = row["B"]
    label = row["Label"]
    RGB_MAPPINGS_MAP[label] =(r,g,b)
    
RGB_MAPPINGS_LABELS = list(RGB_MAPPINGS_MAP.keys())
RGB_MAPPINGS_INDEX = RGB_MAPPINGS.index.values
NUM_LABELS = len(RGB_MAPPINGS_INDEX)
print(NUM_LABELS)

25


## Configurations

In [4]:
class BrainConfig(Config):
    """Configuration for training on the brain dataset.
    Derives from the base Config class and overrides values specific
    to the brain dataset.
    """
    # Give the configuration a recognizable name
    NAME = "brain"

    # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
    # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1 #8 ; reduced to avoid running out of memory when image size increased

    # Number of classes (including background)
    NUM_CLASSES = 1 + NUM_LABELS  # background + 4 regions

    # Use small images for faster training. Set the limits of the small side
    # the large side, and that determines the image shape.
    IMAGE_MIN_DIM = 128*3 #128
    IMAGE_MAX_DIM = 128*3#128

    # Use smaller anchors because our image and objects are small
    RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)  # anchor side in pixels

    # Reduce training ROIs per image because the images are small and have
    # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
    TRAIN_ROIS_PER_IMAGE = 32

    # Use a small epoch since the data is simple
    STEPS_PER_EPOCH = 2000 #100 #steps_per_epoch: Total number of steps (batches of samples) before declaring one epoch finished and starting the next epoch. 
                          #steps_per_epoch = TotalTrainingSamples / TrainingBatchSize (default to use entire training data per epoch; can modify if required)
                          
    # use small validation steps since the epoch is small
    VALIDATION_STEPS = 100 #5 #validation_steps = TotalvalidationSamples / ValidationBatchSize
                         #Ideally, you use all your validation data at once. If you use only part of your validation data, you will get different metrics for each batch, 
                         #what may make you think that your model got worse or better when it actually didn't, you just measured different validation sets.
                         #That's why they suggest validation_steps = uniqueValidationData / batchSize. 
                         #Theoretically, you test your entire data every epoch, as you theoretically should also train your entire data every epoch.
                         #https://stackoverflow.com/questions/45943675/meaning-of-validation-steps-in-keras-sequential-fit-generator-parameter-list
    

    
    ###### Further changes (experimentation):
    
     # Maximum number of ground truth instances to use in one image
    MAX_GT_INSTANCES = 100 #100 #decreased to avoid duplicate instances of each brain region
    
    # Max number of final detections
    DETECTION_MAX_INSTANCES = 100 #100 # #decreased to avoid duplicate instances of each brain region

    # Minimum probability value to accept a detected instance
    # ROIs below this threshold are skipped
    DETECTION_MIN_CONFIDENCE =  0.9 #0.7

    # Non-maximum suppression threshold for detection
    DETECTION_NMS_THRESHOLD = 0.3 # if overlap ratio is greater than the overlap threshold (0.3), suppress object (https://www.pyimagesearch.com/2014/11/17/non-maximum-suppression-object-detection-python)

        
    
    
config = BrainConfig()
config.display()


Configurations:
BACKBONE_SHAPES                [[96 96]
 [48 48]
 [24 24]
 [12 12]
 [ 6  6]]
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     1
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.9
DETECTION_NMS_THRESHOLD        0.3
GPU_COUNT                      1
IMAGES_PER_GPU                 1
IMAGE_MAX_DIM                  384
IMAGE_MIN_DIM                  384
IMAGE_PADDING                  True
IMAGE_SHAPE                    [384 384   3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
MASK_POOL_SIZE                 14
MASK_SHAPE                     [28, 28]
MAX_GT_INSTANCES               100
MEAN_PIXEL                     [123.7 116.8 103.9]
MINI_MASK_SHAPE                (56, 56)
NAME                           brain
NUM_CLASSES                    26
POOL_SIZE                      7
POST_NMS_ROIS_INFERENCE        1000
POST_NMS_ROIS_TRAINING         2000
ROI_

## Notebook Preferences

In [5]:
def get_ax(rows=1, cols=1, size=8):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.
    
    Change the default size attribute to control the size
    of rendered images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax

In [6]:
# Gets the class ID from file name
def get_class_id_from_image_name(filename):
    tmp = filename.split("_")[4]
    tmp = tmp.split(".")[0]
    class_id = tmp.split("m")[1]
    return int(class_id)
    
print(get_class_id_from_image_name("section_mask_85_NTB_m4.png"))


4


## Dataset

Load training dataset

Extend the Dataset class and add a method to load the brain sections dataset, `load_brain()`, and override the following methods:

* load_image()
* load_mask()
* image_reference() # do not need to for now

In [7]:
########### Create training dataset:

class BrainDataset_Train(utils.Dataset):
    """Generates the brain section dataset. The dataset consists of locally stored 
    brain section images, to which file access is required.
    """

    #see utils.py for default def load_image() function; modify according to your dataset
    
    def load_brain(self): 
        """
        for naming image files follow this convention: '*_(image_id).jpg'
        """
        for index, label in enumerate(RGB_MAPPINGS_LABELS):
            self.add_class('brain', index+1, label)
        
        training_images_folder = 'images/TRAINING'
        resetDataDir()
        os.chdir(training_images_folder)
        cwd = os.getcwd()
        img_list = glob.glob('*.png')
        img_list = natsorted(img_list, key=lambda y: y.lower())
        im_id=0
        for i in img_list:
            img = skimage.io.imread(i) #grayscale = 0
            [s1, s2] = np.shape(img)
            im_dims = np.shape(img)
            self.add_image("brain", image_id=im_id, path = cwd+'/'+i, height = im_dims[0], width = im_dims[1])
            im_id+=1
                
    
    def load_mask(self,image_id):
        """Load instance masks for the given image.
        Different datasets use different ways to store masks. This
        function converts the different mask format to one format
        in the form of a bitmap [height, width, instances].

        Returns:
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks."""
        
        masks_folder = 'masks/TRAINING'
        
        resetDataDir()
        os.chdir(masks_folder)
        subfolder = glob.glob('*_'+str(image_id))[0]
        os.chdir(subfolder) 
        
        info = self.image_info[image_id] 
        mk_list = glob.glob('*.png')
        mask = np.zeros([info['height'], info['width'], NUM_LABELS+1], dtype=np.uint8)
        class_ids = np.zeros(len(mk_list))
        
        for ind, m in enumerate(mk_list):
            bin_mask = skimage.io.imread(m,as_gray=True) # grayscale=0
            class_id = get_class_id_from_image_name(m)
            
            mask[:, :, class_id]= bin_mask
            
            # Map class names to class IDs.
            class_ids[ind] = class_id

        return mask, class_ids.astype(np.int32)
        
    
    
    
    
########### Create validation dataset:   

class BrainDataset_Val(utils.Dataset):
    """Generates the brain section dataset. The dataset consists of locally stored 
    brain section images, to which file access is required.
    """

    #see utils.py for default def load_image() function; modify according to your dataset
    
    def load_brain(self): 
        """
        for naming image files follow this convention: '*_(image_id+1).jpg'
        """
        
        for index, label in enumerate(RGB_MAPPINGS_LABELS):
            self.add_class('brain', index+1, label)
        
        val_images_folder = 'images/VALIDATION'
        resetDataDir()
        os.chdir(val_images_folder)
        cwd = os.getcwd()
        img_list = glob.glob('*.png')
        img_list = natsorted(img_list, key=lambda y: y.lower())
        im_id=0
        for i in img_list:
            img = skimage.io.imread(i) #grayscale = 0
            [s1, s2] = np.shape(img)
            im_dims = np.shape(img)
            self.add_image("brain", image_id=im_id, path = cwd+'/'+i, height = im_dims[0], width = im_dims[1])
            im_id+=1
            
            
    
    def load_mask(self,image_id):
        """Load instance masks for the given image.
        Different datasets use different ways to store masks. This
        function converts the different mask format to one format
        in the form of a bitmap [height, width, instances].

        Returns:
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks."""
        
        masks_folder = 'masks/VALIDATION'

        resetDataDir()
        os.chdir(masks_folder)
        subfolder = glob.glob('*_'+str(image_id))[0]
        os.chdir(subfolder) 
        
        info = self.image_info[image_id] 
        mk_list = glob.glob('*.png')
        mask = np.zeros([info['height'], info['width'], NUM_LABELS+1], dtype=np.uint8)
        class_ids = np.zeros(len(mk_list))
        
        for ind, m in enumerate(mk_list):
            bin_mask = skimage.io.imread(m,as_gray=True) # grayscale=0
            class_id = get_class_id_from_image_name(m)
            
            mask[:, :, class_id]= bin_mask
            
            # Map class names to class IDs.
            class_ids[ind] = class_id

        return mask, class_ids.astype(np.int32)




In [None]:
# Training dataset
resetDataDir()
print(os.getcwd())
dataset_train = BrainDataset_Train()
dataset_train.load_brain()
dataset_train.prepare() #does nothing for now 
print("Done processing training data.")

C:\Users\dal4019\Documents\Bst_Reg


In [None]:
# Validation dataset 
resetDataDir()
dataset_val = BrainDataset_Val()
dataset_val.load_brain()
dataset_val.prepare()#does nothing for now 
print("Done processing validation data.")

In [None]:
# Load and display random samples
resetDataDir()
image_ids = np.random.choice(dataset_train.image_ids, 2)
for image_id in image_ids:
    print("IMAGE ID: " + str(image_id))
    image = dataset_train.load_image(image_id)
    mask, class_ids = dataset_train.load_mask(image_id)

    unique_class_ids = np.unique(class_ids)
    mask_area = [np.sum(mask[:, :,i])
                 for i in range(0,len(unique_class_ids))]
    
    visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names, 4) #limit=4, display 4 images

In [None]:
# # Verify that all images and masks are the correct size
# train_errors = []
# for info in dataset_train.image_info:
#     # Check training image sizes
#     image_id = info["id"]
#     info_height = info["height"]
#     info_width = info["width"]
#     try: 
#         mask, class_ids = dataset_train.load_mask(image_id)
#         [mask_s1, mask_s2, mask_s3] = np.shape(mask)
#         if (info_height != mask_s1 or info_width != mask_s2):
#             train_errors.append("Training Images. Image and mask shape differ for image id: " + str(image_id) )
#     except: 
#         train_errors.append("Training Images. Image and mask shape differ for image id: " + str(image_id) )
#         continue
        
    
#     # Check images not empty
#     image = dataset_train.load_image(image_id)
#     image_total = np.sum(image)
#     if (image_total < 0):
#         train_errors.append("Training image empty: " + image_id)
    
#     # Check masks not empty
#     mask = dataset_train.load_mask(image_id)
#     mask_total = np.sum(mask)
#     if (image_total < 0):
#         train_errors.append("Training mask empty: " + image_id)
        
        
# val_errors = []
# for info in dataset_val.image_info:
#     image_id = info["id"]
#     info_height = info["height"]
#     info_width = info["width"]
#     try:
#         mask, class_ids = dataset_val.load_mask(image_id)
#         [mask_s1, mask_s2, mask_s3] = np.shape(mask)
#         if (info_height != mask_s1 or info_width != mask_s2):
#             val_errors.append("Validation Images. Image and mask shape differ for image id: " + str(image_id))
#     except: 
#         val_errors.append("Validation Images. Image and mask shape differ for image id: "+ str(image_id))
#         continue
        
#     # Check images not empty
#     image = dataset_val.load_image(image_id)
#     image_total = np.sum(image)
#     if (image_total < 0):
#         val_errors.append("Validation image empty: " + image_id)

#     # Check masks not empty
#     mask = dataset_val.load_mask(image_id)
#     mask_total = np.sum(mask)
#     if (image_total < 0):
#         val_errors.append("Validation mask empty: " + image_id)

# print(train_errors)
# print(val_errors)

## Create Model

In [None]:
# Create model in training mode
model = modellib.MaskRCNN(mode="training", config=config,
                          model_dir=MODEL_DIR)

In [None]:
# Which weights to start with?
init_with = "coco"  # imagenet, coco, or last

if init_with == "imagenet":
    model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":
    # Load weights trained on MS COCO, but skip layers that
    # are different due to the different number of classes
    # See README for instructions to download the COCO weights
    model.load_weights(COCO_MODEL_PATH, by_name=True,
                       exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", 
                                "mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":
    # Load the last model you trained and continue training
    model.load_weights(model.find_last()[1], by_name=True)

## Training

Train in two stages:
1. Only the heads. Here we're freezing all the backbone layers and training only the randomly initialized layers (i.e. the ones that we didn't use pre-trained weights from MS COCO). To train only the head layers, pass `layers='heads'` to the `train()` function.

2. Fine-tune all layers. For this simple example it's not necessary, but we're including it to show the process. Simply pass `layers="all` to train all layers.

In [None]:
# Train the head branches
# Passing layers="heads" freezes all layers except the head
# layers. You can also pass a regular expression to select
# which layers to train by name pattern.
model.train(dataset_train, dataset_val, 
            learning_rate=config.LEARNING_RATE, 
            epochs=1, 
            layers='heads') #epochs = 1

In [None]:
# Save weights
# Typically not needed because callbacks save after every epoch
# Uncomment to save manually
# resetDataDir()
# model_path = os.path.join(MODEL_DIR, "mask_rcnn_shapes_A4_Ch01_E=5_fine.h5")
# model.keras_model.save_weights(model_path)

In [None]:
# # Fine tune all layers
# # Passing layers="all" trains all layers. You can also 
# # pass a regular expression to select which layers to
# # train by name pattern.
model.train(dataset_train, dataset_val, 
            learning_rate=config.LEARNING_RATE / 10,
            epochs=1, 
            layers="all")#layers="heads" ; epochs = 2

In [None]:
resetDataDir()
print(os.getcwd())
model_path = os.path.join("weights", "mask_rcnn_shapes.h5")
model.keras_model.save_weights(model_path)

## Detection and Validation

In [None]:
image_id = 10
original_image, image_meta, gt_class_id, gt_bbox, gt_mask =\
    modellib.load_image_gt(dataset_val, inference_config, 
                           image_id, use_mini_mask=False)

log("original_image", original_image)
log("image_meta", image_meta)
log("gt_class_id", gt_class_id)
log("gt_bbox", gt_bbox)
log("gt_mask", gt_mask)

visualize.display_instances (original_image, gt_bbox, gt_mask, gt_class_id, 
                    dataset_val.class_names, figsize=(15, 15))

In [None]:
results = model.detect([original_image], verbose=1)
plt.figure(figsize=(20,20))

r = results[0]
print(np.sum(r['rois']))
print(np.sum(r['masks']))

visualize.display_instances(original_image, r['rois'], r['masks'], r['class_ids'], 
                            dataset_val.class_names, r['scores'], figsize=(15, 15))#ax=get_ax()

## Evaluation

In [None]:
# Compute VOC-Style mAP @ IoU=0.5
# Running on 10 images. Increase for better accuracy.

image_ids = np.random.choice(dataset_val.image_ids, 30) 
APs = []
for image_id in image_ids:#for image_id in image_ids:
    # Load image and ground truth data
    image, image_meta, gt_class_id, gt_bbox, gt_mask =\
        modellib.load_image_gt(dataset_val, inference_config,
                               image_id, use_mini_mask=False)
    molded_images = np.expand_dims(modellib.mold_image(image, inference_config), 0)
    # Run object detection
    results = model.detect([image], verbose=1)
    r = results[0]
    # Compute AP
    AP, precisions, recalls, overlaps =\
        utils.compute_ap(gt_bbox, gt_class_id, gt_mask,
                         r["rois"], r["class_ids"], r["scores"], r['masks'])
    APs.append(AP)
    print(precisions)
    
print("mAP: ", np.mean(APs))


# plotting APs
# .
# .

In [None]:
np.mean(APs)