## Milestone - 2 : Training and Tuning models for Pneumonia Detection ##

In [1]:
!pip install pydicom

zsh:1: command not found: pip


In [3]:

import numpy as np
import pandas as pd
import os
import pydicom
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import cv2
import warnings
warnings.filterwarnings("ignore")
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
import seaborn as sns


from matplotlib import pyplot as plt

In [4]:
# Loading and splitting data
data_dir = 'Dataset/stage_2_train_images'
labels_df = pd.read_csv('Dataset/stage_2_train_labels.csv')

In [6]:
#Using a datagenerator to optimize memory usage and keep the image size at 512*512

import tensorflow as tf
import numpy as np
import os
import cv2
import pydicom

class DicomDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, df, data_dir, batch_size=4, img_size=512, shuffle=True, subset='training'):
        self.df = df
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.img_size = img_size
        self.shuffle = shuffle
        self.subset = subset
        self.indices = np.arange(len(self.df))
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.df) / self.batch_size))

    def __getitem__(self, index):
        batch_indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        batch_df = self.df.iloc[batch_indices]
        return self.__data_generation(batch_df)

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)

    def __data_generation(self, batch_df):
        # Vectorized processing for faster performance
        file_paths = batch_df['patientId'].apply(lambda x: os.path.join(self.data_dir, f"{x}.dcm")).values
        labels = batch_df['Target'].values

        X = np.array([self._process_dicom(file_path) for file_path in file_paths])
        y = np.array(labels, dtype=int)

        return X, y

    def _process_dicom(self, file_path):
        # Efficient single file processing
        ds = pydicom.dcmread(file_path)
        img = ds.pixel_array
        img = cv2.resize(img, (self.img_size, self.img_size))  # Resize
        img = img / 255.0  # Normalize
        img = np.expand_dims(img, axis=-1)  # Add channel dimension
        return img

# Loading and splitting data
data_dir = 'Dataset/stage_2_train_images'
labels_df = pd.read_csv('Dataset/stage_2_train_labels.csv')
# data_dir = 'https://drive.google.com/file/d/1-2AEqHphM2n-M9L6i9SYG4E3hGkRawth/view?usp=drive_link'

# Splitting the dataset
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(labels_df, test_size=0.2, random_state=42)

# Instantiate generators
train_generator = DicomDataGenerator(train_df, data_dir, batch_size=8, img_size=512, shuffle=True, subset='training')
val_generator = DicomDataGenerator(val_df, data_dir, batch_size=8, img_size=512, shuffle=False, subset='validation')


In [7]:
from sklearn.model_selection import train_test_split


# Split into train, validation, and test sets
train_df, test_df = train_test_split(labels_df, test_size=0.2, random_state=42)
train_df, val_df = train_test_split(train_df, test_size=0.1, random_state=42)

In [9]:
# Parameters
batch_size = 4
img_size = 512

# Data directories
data_dir = 'Dataset/stage_2_train_images'

# Initialize generators
train_generator = DicomDataGenerator(train_df, data_dir, batch_size=batch_size, img_size=img_size, subset='training')
val_generator = DicomDataGenerator(val_df, data_dir, batch_size=batch_size, img_size=img_size, subset='validation')
test_generator = DicomDataGenerator(test_df, data_dir, batch_size=batch_size, img_size=img_size, subset='validation')


In [10]:
#Basic CNN Model

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Define input image size
input_shape = (img_size, img_size, 1)  # Grayscale images

# Initialize the model
model = Sequential([
    # First Convolutional Block
    Conv2D(32, (3, 3), activation='relu', input_shape=input_shape, name='Conv1'),
    MaxPooling2D((2, 2), name='Pool1'),

    # Second Convolutional Block
    Conv2D(64, (3, 3), activation='relu', name='Conv2'),
    MaxPooling2D((2, 2), name='Pool2'),

    # Third Convolutional Block
    Conv2D(128, (3, 3), activation='relu', name='Conv3'),
    MaxPooling2D((2, 2), name='Pool3'),

    # Fully Connected Layers
    Flatten(name='Flatten'),
    Dense(512, activation='relu', name='FC1'),
    Dense(1, activation='sigmoid', name='Output')  # For binary classification
])

# Compile the model
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Display the model summary
model.summary()


In [11]:
# Train the model
model.fit(train_generator, epochs=5, validation_data=val_generator)

Epoch 1/5
[1m5441/5441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3392s[0m 623ms/step - accuracy: 0.7448 - loss: 0.5303 - val_accuracy: 0.7437 - val_loss: 0.5180
Epoch 2/5
[1m5441/5441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18169s[0m 3s/step - accuracy: 0.7676 - loss: 0.4818 - val_accuracy: 0.7735 - val_loss: 0.4730
Epoch 3/5
[1m5441/5441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24410s[0m 4s/step - accuracy: 0.7877 - loss: 0.4539 - val_accuracy: 0.7896 - val_loss: 0.4613
Epoch 4/5
[1m5441/5441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4790s[0m 880ms/step - accuracy: 0.8182 - loss: 0.4040 - val_accuracy: 0.7813 - val_loss: 0.4859
Epoch 5/5
[1m5441/5441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4328s[0m 795ms/step - accuracy: 0.8523 - loss: 0.3390 - val_accuracy: 0.7929 - val_loss: 0.4972


<keras.src.callbacks.history.History at 0x32d439e20>

In [12]:
# Evaluate the model on training, validation, and testing data
train_loss, train_acc = model.evaluate(train_generator)
val_loss, val_acc = model.evaluate(val_generator)
test_loss, test_acc = model.evaluate(test_generator)

[1m5441/5441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m364s[0m 67ms/step - accuracy: 0.8781 - loss: 0.2934
[1m605/605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 67ms/step - accuracy: 0.7829 - loss: 0.5453
[1m1512/1512[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m102s[0m 67ms/step - accuracy: 0.7827 - loss: 0.4984


In [13]:
# Creating a dataframe with evaluation metrics of each model

import pandas as pd

metrics_df = pd.DataFrame(columns=[
    'Model',
    'Training Loss', 'Training Accuracy',
    'Validation Loss', 'Validation Accuracy',
    'Testing Loss', 'Testing Accuracy'
])

print(metrics_df)

# Create a dictionary with the metrics
model_metrics = {
    'Model': "CNN Base Model",
    'Training Loss': train_loss, 'Training Accuracy': train_acc,
    'Validation Loss': val_loss, 'Validation Accuracy': val_acc,
    'Testing Loss': test_loss, 'Testing Accuracy': test_acc,

}

# Append the metrics to the DataFrame
model_metrics_df = pd.DataFrame([model_metrics])
metrics_df = pd.concat([metrics_df,model_metrics_df], ignore_index=True)

# Print the DataFrame to check the added metrics
metrics_df

Empty DataFrame
Columns: [Model, Training Loss, Training Accuracy, Validation Loss, Validation Accuracy, Testing Loss, Testing Accuracy]
Index: []


Unnamed: 0,Model,Training Loss,Training Accuracy,Validation Loss,Validation Accuracy,Testing Loss,Testing Accuracy
0,CNN Base Model,0.291211,0.880066,0.497216,0.79289,0.504358,0.78432


In [14]:
metrics_df.to_csv('accuracy_metrics.csv', index=False)

In [15]:
# Save the model
model.save('cnn_base_model.keras')

Above basic CNN model gives us a testing accuracy of about 80%.To improve the testing accuracy of your CNN model, we can enhance the architecture and introduce regularization techniques

In [16]:
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.callbacks import EarlyStopping, ModelCheckpoint


def create_model(input_shape=(512, 512, 1)):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.25),

        Conv2D(64, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.25),

        Conv2D(128, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.25),

        Conv2D(256, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.25),

        Conv2D(512, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.25),


        Flatten(),
        Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
        Dropout(0.5),
        Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model


In [17]:
# Initialize the model
model = create_model()

In [18]:
# Callbacks for early stopping and model checkpointing
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.keras', monitor='val_loss', save_best_only=True)

In [19]:
import cv2
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Train the model
history = model.fit(
        train_generator,
        validation_data=val_generator,
        epochs=5,
        callbacks=[early_stopping, model_checkpoint]
    )
return history

Epoch 1/5
[1m5441/5441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4097s[0m 753ms/step - accuracy: 0.6668 - loss: 16.9127 - val_accuracy: 0.6763 - val_loss: 1.6313
Epoch 2/5
[1m5441/5441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3933s[0m 723ms/step - accuracy: 0.6751 - loss: 1.6812 - val_accuracy: 0.6763 - val_loss: 0.9176
Epoch 3/5
[1m5441/5441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3999s[0m 735ms/step - accuracy: 0.7326 - loss: 0.7163 - val_accuracy: 0.7586 - val_loss: 0.5818
Epoch 4/5
[1m5441/5441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4052s[0m 745ms/step - accuracy: 0.7458 - loss: 0.6089 - val_accuracy: 0.7565 - val_loss: 0.5596
Epoch 5/5
[1m5441/5441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4256s[0m 782ms/step - accuracy: 0.7546 - loss: 0.5908 - val_accuracy: 0.7764 - val_loss: 0.5414


SyntaxError: 'return' outside function (1248997019.py, line 11)

In [None]:
# Evaluate the model on training, validation, and testing data
train_loss, train_acc = model.evaluate(train_generator)
val_loss, val_acc = model.evaluate(val_generator)
test_loss, test_acc = model.evaluate(test_generator)

In [None]:
new_metrics = {
    'Model': "Regularized CNN Model",
    'Training Loss': train_loss, 'Training Accuracy': train_acc,
    'Validation Loss': val_loss, 'Validation Accuracy': val_acc,
    'Testing Loss': test_loss, 'Testing Accuracy': test_acc
}
new_metrics_df = pd.DataFrame([new_metrics])

# Load the existing CSV file
csv_file = 'accuracy_metrics.csv'

existing_metrics = pd.read_csv(csv_file)

# Append the new metrics to the existing DataFrame
updated_metrics = pd.concat([existing_metrics, new_metrics_df], ignore_index=True)

# Save the updated DataFrame back to the CSV file
updated_metrics.to_csv(csv_file, index=False)

# Save the model
model.save('regularised_cnn_model.keras')

In [None]:
print(updated_metrics)

### Transfer Learning - VGG16 ###

In [None]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam


def create_vgg16_model(input_shape=(512, 512, 1)):
    # Load the VGG16 model pre-trained on ImageNet, without the top (fully connected) layers
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(512, 512, 3))

    # Convert grayscale to 3 channels by duplicating the single channel
    input_layer = Input(shape=input_shape)
    x = Conv2D(3, (3, 3), padding='same', activation='relu')(input_layer)

    # Use the output of the base model as the new input
    x = base_model(x)

    # Add new fully connected layers
    x = Flatten()(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    output_layer = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=input_layer, outputs=output_layer)

    model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [None]:
# Initialize the model
model = create_vgg16_model()

## Callbacks for early stopping and model checkpointing
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('vgg16_best_model.keras', monitor='val_loss', save_best_only=True)

# Train the model
history = model.fit(
    train_generator,
    epochs=5,
    validation_data=val_generator,
    callbacks=[early_stopping, model_checkpoint]
)

In [None]:
new_metrics = {
    'Model': "VGG16 Model",
    'Training Loss': train_loss, 'Training Accuracy': train_acc,
    'Validation Loss': val_loss, 'Validation Accuracy': val_acc,
    'Testing Loss': test_loss, 'Testing Accuracy': test_acc
}
new_metrics_df = pd.DataFrame([new_metrics])

# Load the existing CSV file
csv_file = 'accuracy_metrics.csv'

existing_metrics = pd.read_csv(csv_file)

# Append the new metrics to the existing DataFrame
updated_metrics = pd.concat([existing_metrics, new_metrics_df], ignore_index=True)

# Save the updated DataFrame back to the CSV file
updated_metrics.to_csv(csv_file, index=False)

# Save the model
model.save('VGG16_model.keras')

In [None]:
print(updated_metrics)

### Transfer Learning - ResNet50 Model ###

In [None]:
from tensorflow.keras.applications.resnet50 import ResNet50

def create_resnet50_model(input_shape=(512, 512, 1)):
    # Load the resnet50 model pre-trained on ImageNet, without the top (fully connected) layers
    base_model = ResNet50(include_top=False, weights='imagenet',input_shape =(512, 512, 3))

    # Convert grayscale to 3 channels by duplicating the single channel
    input_layer = Input(shape=input_shape)
    x = Conv2D(3, (3, 3), padding='same', activation='relu')(input_layer)

    # Use the output of the base model as the new input
    x = base_model(x)

    # Add new fully connected layers
    x = Flatten()(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.3)(x)
    output_layer = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=input_layer, outputs=output_layer)

    model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [None]:
# Initialize the model
model = create_resnet50_model()

## Callbacks for early stopping and model checkpointing
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('resnet50_best_model.keras', monitor='val_loss', save_best_only=True)

# Train the model
history = model.fit(
    train_generator,
    epochs=5,
    validation_data=val_generator,
    callbacks=[early_stopping, model_checkpoint]
)

In [None]:
# Evaluate the model on training, validation, and testing data
train_loss, train_acc = model.evaluate(train_generator)
val_loss, val_acc = model.evaluate(val_generator)
test_loss, test_acc = model.evaluate(test_generator)

In [None]:
new_metrics = {
    'Model': "ResNet50 Model",
    'Training Loss': train_loss, 'Training Accuracy': train_acc,
    'Validation Loss': val_loss, 'Validation Accuracy': val_acc,
    'Testing Loss': test_loss, 'Testing Accuracy': test_acc
}
new_metrics_df = pd.DataFrame([new_metrics])

# Load the existing CSV file
csv_file = 'accuracy_metrics.csv'

existing_metrics = pd.read_csv(csv_file)

# Append the new metrics to the existing DataFrame
updated_metrics = pd.concat([existing_metrics, new_metrics_df], ignore_index=True)

# Save the updated DataFrame back to the CSV file
updated_metrics.to_csv(csv_file, index=False)

# Save the model
model.save('ResNet50_model.keras')

In [None]:
import pandas as pd
pd.read_csv('accuracy_metrics.csv')

### Object Detection with Mask RCNN ###

In [None]:
# setting up the input directories and working directory to save models
# and variables

DATA_DIR = 'input'
ROOT_DIR = 'working'

In [None]:
# Import Mask RCNN from the above implementation

sys.path.append(os.path.join(ROOT_DIR, 'Mask_RCNN'))  # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log

In [None]:
# training and testing directory of images

train_dicom_dir = os.path.join(DATA_DIR, 'stage_2_train_images')
test_dicom_dir = os.path.join(DATA_DIR, 'stage_2_test_images')

In [None]:
# list of dicom image paths and filenames

def get_dicom_fps(dicom_dir):
    dicom_fps = glob.glob(dicom_dir+'/'+'*.dcm')
    return list(set(dicom_fps))


# list of image filenames and annotations dictionary
def parse_dataset(dicom_dir, anns):
    image_fps = get_dicom_fps(dicom_dir)
    image_annotations = {fp: [] for fp in image_fps}
    for index, row in anns.iterrows():
        fp = os.path.join(dicom_dir, row['patientId']+'.dcm')
        image_annotations[fp].append(row)
    return image_fps, image_annotations

In [None]:
# Configuring the matterport Mask RCNN framework for Pneumonia Detection
# Customising parameters in the base 'config' class

class DetectorConfig(Config):
    NAME = 'pneumonia'

    GPU_COUNT = 1
    IMAGES_PER_GPU = 8

    BACKBONE = 'resnet50'

    NUM_CLASSES = 2  # background + 1 pneumonia classes

    IMAGE_MIN_DIM = 256
    IMAGE_MAX_DIM = 256
    RPN_ANCHOR_SCALES = (32, 64, 128, 256)
    TRAIN_ROIS_PER_IMAGE = 32
    MAX_GT_INSTANCES = 3
    DETECTION_MAX_INSTANCES = 3
    DETECTION_MIN_CONFIDENCE = 0.7
    DETECTION_NMS_THRESHOLD = 0.1

    STEPS_PER_EPOCH = 200

config = DetectorConfig()
config.display()

In [None]:
# Loading and processing the dataset

class DetectorDataset(utils.Dataset):
    """Dataset class for training pneumonia detection on the RSNA pneumonia dataset.
    """

    def __init__(self, image_fps, image_annotations, orig_height, orig_width):
        super().__init__(self)

        # Add classes
        self.add_class('pneumonia', 1, 'Lung Opacity')

        # add images
        for i, fp in enumerate(image_fps):
            annotations = image_annotations[fp]
            self.add_image('pneumonia', image_id=i, path=fp,
                           annotations=annotations, orig_height=orig_height, orig_width=orig_width)

    def image_reference(self, image_id):
        info = self.image_info[image_id]
        return info['path']

    def load_image(self, image_id):
        info = self.image_info[image_id]
        fp = info['path']
        ds = pydicom.read_file(fp)
        image = ds.pixel_array
        # If grayscale. Convert to RGB for consistency.
        if len(image.shape) != 3 or image.shape[2] != 3:
            image = np.stack((image,) * 3, -1)
        return image

    def load_mask(self, image_id):
        info = self.image_info[image_id]
        annotations = info['annotations']
        count = len(annotations)
        if count == 0:
            mask = np.zeros((info['orig_height'], info['orig_width'], 1), dtype=np.uint8)
            class_ids = np.zeros((1,), dtype=np.int32)
        else:
            mask = np.zeros((info['orig_height'], info['orig_width'], count), dtype=np.uint8)
            class_ids = np.zeros((count,), dtype=np.int32)
            for i, a in enumerate(annotations):
                if a['Target'] == 1:
                    x = int(a['x'])
                    y = int(a['y'])
                    w = int(a['width'])
                    h = int(a['height'])
                    mask_instance = mask[:, :, i].copy()
                    cv2.rectangle(mask_instance, (x, y), (x+w, y+h), 255, -1)
                    mask[:, :, i] = mask_instance
                    class_ids[i] = 1
        return mask.astype(np.bool), class_ids.astype(np.int32)


In [None]:
# training dataset
anns = pd.read_csv(os.path.join(DATA_DIR, 'stage_2_train_labels.csv'))
anns.head()

In [None]:
image_fps, image_annotations = parse_dataset(train_dicom_dir, anns=anns)

In [None]:
ds = pydicom.read_file(image_fps[0]) # read dicom image from filepath
image = ds.pixel_array # get image array

In [None]:
# show dicom fields
ds

In [None]:
# Original image size

ORIG_SIZE =

In [None]:
# splitting dataset into training and validation

image_fps_list = list(image_fps)
random.seed(42)
random.shuffle(image_fps_list)

val_size = 1500
image_fps_val = image_fps_list[:val_size]
image_fps_train = image_fps_list[val_size:]

print(len(image_fps_train), len(image_fps_val))

In [None]:
# prepare the training dataset

dataset_train = DetectorDataset(image_fps_train, image_annotations, ORIG_SIZE, ORIG_SIZE)
dataset_train.prepare()

In [None]:
# Show annotation for an image

test_fp = random.choice(image_fps_train)
image_annotations[test_fp]

In [None]:
# prepare the validation dataset, same as above

dataset_val = DetectorDataset(image_fps_val, image_annotations, ORIG_SIZE, ORIG_SIZE)
dataset_val.prepare()

In [None]:
# Load and display random sample and their bounding boxes

class_ids = [0]
while class_ids[0] == 0:  ## look for a mask
    image_id = random.choice(dataset_train.image_ids)
    image_fp = dataset_train.image_reference(image_id)
    image = dataset_train.load_image(image_id)
    mask, class_ids = dataset_train.load_mask(image_id)

print(image.shape)

plt.figure(figsize=(10, 10))
plt.subplot(1, 2, 1)
plt.imshow(image)
plt.axis('off')

plt.subplot(1, 2, 2)
masked = np.zeros(image.shape[:2])
for i in range(mask.shape[2]):
    masked += image[:, :, 0] * mask[:, :, i]
plt.imshow(masked, cmap='gray')
plt.axis('off')

print(image_fp)
print(class_ids)

In [None]:
# Image augmentation using the imgaug library

augmentation = iaa.Sequential([
    iaa.OneOf([ ## geometric transform
        iaa.Affine(
            scale={"x": (0.98, 1.04), "y": (0.98, 1.04)},
            translate_percent={"x": (-0.03, 0.03), "y": (-0.05, 0.05)},
            rotate=(-5, 5),
            shear=(-3, 3),
        ),
        iaa.PiecewiseAffine(scale=(0.002, 0.03)),
    ]),
    iaa.OneOf([ ## brightness or contrast
        iaa.Multiply((0.85, 1.15)),
        iaa.ContrastNormalization((0.85, 1.15)),
    ]),
    iaa.OneOf([ ## blur or sharpen
        iaa.GaussianBlur(sigma=(0.0, 0.12)),
        iaa.Sharpen(alpha=(0.0, 0.12)),
    ]),
])

# test augmentation on the same sample image as above
imggrid = augmentation.draw_grid(image[:, :, 0], cols=5, rows=2)
plt.figure(figsize=(30, 12))
_ = plt.imshow(imggrid[:, :, 0], cmap='gray')

In [None]:
# training the mask RCNN model using the config specified previously

model = modellib.MaskRCNN(mode='training', config=config, model_dir=ROOT_DIR)

In [None]:
%%time
# first epochs with higher lr to speedup the learning

model.train(dataset_train, dataset_val,
            learning_rate=LEARNING_RATE*2,
            epochs=2,
            layers='all',
            augmentation=None)

In [None]:
# Including image augmentation to improve performance
# Reducing the learning rate to improve on performance

%%time
model.train(dataset_train, dataset_val,
            learning_rate=LEARNING_RATE,
            epochs=NUM_EPOCHS,
            layers='all',
            augmentation=augmentation)

### Model Evaluation ###

In [None]:
# Visualising a few examples of ground truth vs. predictions
# on the validation dataset

dataset = dataset_val
fig = plt.figure(figsize=(10, 30))

for i in range(6):
    image_id = random.choice(dataset.image_ids)

    original_image, image_meta, gt_class_id, gt_bbox, gt_mask = \
        modellib.load_image_gt(dataset_val, inference_config,
                               image_id, use_mini_mask=False)

    print(original_image.shape)
    plt.subplot(6, 2, 2*i + 1)
    visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id,
                                dataset.class_names,
                                colors=get_colors_for_class_ids(gt_class_id), ax=fig.axes[-1])

    plt.subplot(6, 2, 2*i + 2)
    results = model.detect([original_image]) #, verbose=1)
    r = results[0]
    visualize.display_instances(original_image, r['rois'], r['masks'], r['class_ids'],
                                dataset.class_names, r['scores'],
                                colors=get_colors_for_class_ids(r['class_ids']), ax=fig.axes[-1])

In [None]:
# Get filenames of test dataset DICOM images
test_image_fps = get_dicom_fps(test_dicom_dir)

In [None]:
# Visualising a few images from the test data directory and the
# the predicted bounding boxes for them

def visualize():
    image_id = random.choice(test_image_fps)
    ds = pydicom.read_file(image_id)

    # original image
    image = ds.pixel_array

    # assume square image
    resize_factor = ORIG_SIZE / config.IMAGE_SHAPE[0]

    # If grayscale. Convert to RGB for consistency.
    if len(image.shape) != 3 or image.shape[2] != 3:
        image = np.stack((image,) * 3, -1)
    resized_image, window, scale, padding, crop = utils.resize_image(
        image,
        min_dim=config.IMAGE_MIN_DIM,
        min_scale=config.IMAGE_MIN_SCALE,
        max_dim=config.IMAGE_MAX_DIM,
        mode=config.IMAGE_RESIZE_MODE)

    patient_id = os.path.splitext(os.path.basename(image_id))[0]
    print(patient_id)

    results = model.detect([resized_image])
    r = results[0]
    for bbox in r['rois']:
        print(bbox)
        x1 = int(bbox[1] * resize_factor)
        y1 = int(bbox[0] * resize_factor)
        x2 = int(bbox[3] * resize_factor)
        y2 = int(bbox[2]  * resize_factor)
        cv2.rectangle(image, (x1,y1), (x2,y2), (77, 255, 9), 3, 1)
        width = x2 - x1
        height = y2 - y1
        print("x {} y {} h {} w {}".format(x1, y1, width, height))
    plt.figure()
    plt.imshow(image, cmap=plt.cm.gist_gray)

visualize()
visualize()
visualize()
visualize()