In [None]:
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow import keras
import matplotlib.pyplot as plt
from keras_unet.models import custom_unet
import keras.backend as K

import awscli
from google.colab import drive
drive.mount("/content/drive", force_remount=True)

In [None]:
!cat /content/drive/My\ Drive/config/awscli.ini
!export AWS_SHARED_CREDENTIALS_FILE=/content/drive/My\ Drive/config/awscli.ini
path = "/content/drive/My Drive/config/awscli.ini"
os.environ['AWS_SHARED_CREDENTIALS_FILE'] = path

!aws s3 cp s3://medical-image-segmentation/lungs/70-10-20/train.zip .
!aws s3 cp s3://medical-image-segmentation/lungs/70-10-20/val.zip .
!aws s3 cp s3://medical-image-segmentation/lungs/70-10-20/test.zip .
!aws s3 cp s3://medical-image-segmentation/lungs/70-10-20/train-output.zip .
!aws s3 cp s3://medical-image-segmentation/lungs/70-10-20/val-output.zip .
!aws s3 cp s3://medical-image-segmentation/lungs/70-10-20/test-output.zip .

!unzip train.zip
!unzip val.zip
!unzip test.zip
!unzip train-output.zip
!unzip val-output.zip
!unzip test-output.zip

In [None]:
SEED = 909
BATCH_SIZE_TRAIN = 6
BATCH_SIZE_VAL = 6
BATCH_SIZE_TEST = 6
IMAGE_HEIGHT = 512
IMAGE_WIDTH = 512
IMG_SIZE = (IMAGE_HEIGHT, IMAGE_WIDTH)
NUM_TRAIN = 6651
NUM_VAL = 932
NUM_TEST = 1950

def create_train(img_path, mask_path, batch_size=4):
    data_gen_args = dict(rescale=1./255)
    img_datagen = ImageDataGenerator(**data_gen_args)
    mask_datagen = ImageDataGenerator(**data_gen_args)

    img_generator = img_datagen.flow_from_directory(img_path, target_size=IMG_SIZE,
                                                    class_mode=None, color_mode='grayscale',
                                                    batch_size=batch_size, seed=SEED)

    mask_generator = mask_datagen.flow_from_directory(mask_path, target_size=IMG_SIZE,
                                                      class_mode=None, color_mode='grayscale',
                                                      batch_size=batch_size, seed=SEED)
    return zip(img_generator, mask_generator)

TRAIN_IMG_PATH = os.path.join('train-output', 'images')
TRAIN_MASK_PATH = os.path.join('train-output', 'masks')

VAL_IMG_PATH = os.path.join('val-output', 'images')
VAL_MASK_PATH = os.path.join('val-output', 'masks')

TEST_IMG_PATH = os.path.join('test-output', 'images')
TEST_MASK_PATH = os.path.join('test-output', 'masks')

TRAIN_GENERATOR = create_train(TRAIN_IMG_PATH, TRAIN_MASK_PATH, BATCH_SIZE_TRAIN)
VAL_GENERATOR = create_train(VAL_IMG_PATH, VAL_MASK_PATH, BATCH_SIZE_TRAIN)
TEST_GENERATOR = create_train(TEST_IMG_PATH, TEST_MASK_PATH, BATCH_SIZE_TRAIN)

NUM_OF_EPOCHS = 100

In [None]:
def display(display_list):
    """
    Displays an image, its corresponding mask, and the predicted mask
    Args:
        display_list (list): list containing image array, mask array, and
                             prediction array in that order
    Returns:
        None
    """
    plt.figure(figsize=(15, 15))
    title = ['Input', 'True Mask', 'Predicted Mask']
    for i, array in enumerate(display_list):
        plt.subplot(1, len(display_list), i+1)
        plt.title(title[i])
        plt.imshow(tf.keras.preprocessing.image.array_to_img(array), cmap='gray')
    plt.show()

def show_prediction(datagen, num=1):
    """
    Generates images, masks, and predictions and calls display function
    Args:
        datagen (generator): generates images and corresponding masks
        num (num): the number of image/mask/pred sets to be displayed
    """
    for i in range(0, num):
        image, mask = next(datagen)
        pred_mask = MODEL.predict(image)[0] > 0.5
        display([image[0], mask[0], pred_mask])

In [None]:
show_prediction(TRAIN_GENERATOR, 10)

In [None]:
def dice_loss(targets, inputs, smooth=1e-6):
    """
    Computes the dice loss given targets and predictions
    Args:
        targets (array): the ground truth masks
        inputs (array): the predicted masks
        smooth (num): additional overlapping surface area
    Returns:
        the dice loss value
    """
    inputs = K.flatten(inputs)
    targets = K.flatten(targets)
    intersection = K.sum(targets * inputs)
    dice = (2*intersection + smooth) / (K.sum(targets) + K.sum(inputs) + smooth)
    return 1 - dice

In [None]:
MODEL = custom_unet(
    input_shape=(512, 512, 1),
    use_batch_norm=True,
    num_classes=1,
    filters=64,
    dropout=0.25,
    output_activation='sigmoid')

In [None]:
STATS = [dice_loss, tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
EPOCH_STEP_TRAIN = NUM_TRAIN // BATCH_SIZE_TRAIN
EPOCH_STEP_VAL = NUM_VAL // BATCH_SIZE_VAL
EPOCH_STEP_TEST = NUM_TEST // BATCH_SIZE_TEST
model.compile(optimizer='adam', loss=dice_loss, metrics=STATS, run_eagerly=True)

In [None]:
MODEL.fit_generator(generator=TRAIN_GENERATOR,
                    steps_per_epoch=EPOCH_STEP_TRAIN,
                    validation_data=VAL_GENERATOR,
                    validation_steps=EPOCH_STEP_VAL,
                    epochs=20)

In [None]:
MODEL.save(f'drive/MyDrive/UNET5-A99-P93-R88-{IMAGE_HEIGHT}_{IMAGE_WIDTH}.h5')

In [None]:
MODEL = keras.models.load_model(f'drive/MyDrive/UNET4-A99-P93-R88-{IMAGE_HEIGHT}_{IMAGE_WIDTH}.h5', custom_objects={"DiceLoss": dice_loss})

In [None]:
def dice_gen(img_path, mask_path, batch_size=4):
    """
    Creates a data generator that does not shuffle data
    Args:
        img_path (str): path to folder contaning images
        mask_path (str): path to folder containing masks
        batch_size (num): the generator batch size
    Returns:
        a generator for images and corresponding masks
    """
    data_gen_args = dict(rescale=1./255)
    img_gen = ImageDataGenerator(**data_gen_args)
    mask_gen = ImageDataGenerator(**data_gen_args)

    img_generator = img_gen.flow_from_directory(img_path, target_size=IMG_SIZE, class_mode=None,
                                                color_mode='grayscale', batch_size=batch_size,
                                                seed=SEED, shuffle=False)

    mask_generator = mask_gen.flow_from_directory(mask_path, target_size=IMG_SIZE, class_mode=None,
                                                  color_mode='grayscale', batch_size=batch_size,
                                                  seed=SEED, shuffle=False)
    return img_generator, mask_generator

In [None]:
from pydicom import dcmread
import statistics
from os import listdir
from os.path import join

# make sure metrics.py and lookup_tables.py is uploaded into runtime
from metrics import compute_surface_distances, compute_surface_dice_at_tolerance

def get_distances(path):
    """
    Gets the voxel spacing values for images
    Args:
        path (str): path to folder containing images in dicom format
    Returns:
        dictionary with image path as keys and tuple of dimensions (x, y) as values
    """
    distances = {}
    for subdir in listdir(path):
    for image in listdir(join(path, subdir, "images")):
        dimensions = dcmread(join(path, subdir, "images", image)).PixelSpacing
        png_name = "lung_l/" + subdir + "-" + image + ".png"
        distances[png_name] = [d for d in dimensions]
    return distances

def get_surface_dice_values(img_path, mask_path, batch_size, iterations, pixel_distances):
    """
    Computes the surface dice scores for each image
    Args:
        img_path (str): path to folder containing images
        mask_path (str): path to folder containing masks
        batch_size (num): batch size of generator
        iterations (num): number of batches within dataset
        pixel_distances (dict): image name (key) and pixel spacing (value) pairs
    Returns:
        a list of surface dice scores
    """
    index = 0
    dice_list = []
    image_gen, mask_gen = dice_gen(img_path, mask_path, batch_size)
    files = image_gen.filenames

    for i in range(iterations):
    image, mask = next(image_gen), next(mask_gen)
    pred_masks = model.predict(image)


    for j in range(batch_size):
        pred_mask = pred_masks[j] > 0.5
        bool_mask = mask[j].astype(bool)
        mask_gt = bool_mask.reshape((512, 512))
        predicted_mask = pred_mask.reshape((512, 512))

        surface_distances = compute_surface_distances(mask_gt, predicted_mask, pixel_distances[files[index]])
        surface_dice = compute_surface_dice_at_tolerance(surface_distances, 1.9)

        dice_list.append(surface_dice)
        index += 1
    return dice_list
  
def compute_surface_dice_stats(dice_list):
    """
    Computes summary statistics for surface dice scores
    Args:
        dice_list (list): list of surface_dice scores
    Returns:
        mean, median, standard deviation tuple for input surface dice scores
    """
    filtered_dice_list = []
    for val in dice_list:
    if not math.isnan(val):
        filtered_dice_list.append(val)
    filtered_dice_list.sort()
    mean = sum(filtered_dice_list) / len(filtered_dice_list)
    median = filtered_dice_list[len(filtered_dice_list) // 2]
    std_dev = statistics.stdev(filtered_dice_list)
    return mean, median, std_dev


In [None]:
# split can be either train, val or test

def surface_dice(split):
    """
    Computes surface dice summary statistics depending on split
    Args:
        split (str): train, test, or val
    Returns:
        surface dice summary statistics for images in split
    """
    if split == "train":
        img_path, mask_path = TRAIN_IMG_PATH, TRAIN_MASK_PATH
        batch_size, iterations = BATCH_SIZE_TRAIN, EPOCH_STEP_TRAIN
    elif split == "val":
        img_path, mask_path = VAL_IMG_PATH, VAL_MASK_PATH
        batch_size, iterations = BATCH_SIZE_VAL, EPOCH_STEP_VAL
    elif split == "test":
        img_path, mask_path = TEST_IMG_PATH, TEST_MASK_PATH
        batch_size, iterations = BATCH_SIZE_TEST, EPOCH_STEP_TEST
    else:
        assert False, "invalid split"

    pixel_distances = get_distances(split)
    dice_list = get_surface_dice_values(img_path, mask_path, batch_size, iterations, pixel_distances)
    return compute_surface_dice_stats(dice_list)


In [None]:
summary_stats = surface_dice("val")