In [2]:
# IMPORT LIBRARIES AND PACKAGES

#This is important when you have to use Keras backend
%tensorflow_version 2.x
import tensorflow
tensorflow.__version__

import keras
from keras import backend as k
print("Backend: "+ k.backend()+ " || tensorflow version: " + tensorflow.__version__)



import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import os
from matplotlib.patches import Rectangle

!pip3 install pydicom
import pydicom
from skimage.transform import resize
import keras
import pickle
import cv2


import csv
import math



#from PIL import Image
from tensorflow.keras import Model
from tensorflow.keras.applications.mobilenet import MobileNet, preprocess_input
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import Concatenate, Conv2D, UpSampling2D, Reshape
from tensorflow.keras.utils import Sequence
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import binary_crossentropy


#!pip install --ignore-installed --upgrade tensorflow-gpu

Backend: tensorflow || tensorflow version: 2.2.0


In [3]:
#Set project path

project_path = '/content/drive/My Drive/Colab Notebooks/CAPSTONE/Data/'
train_img_path = project_path+'stage_2_train_images'
valid_img_path = project_path+'stage_2_train_images'
test_img_path = project_path+'stage_2_test_images'

In [4]:
#Set pickle files
train_data_pickle = '/train_data_pickle'
valid_data_pickle = '/valid_data_pickle'

#Open file for reading
fileObject = open(project_path+train_data_pickle, 'rb')
fileObject1 = open(project_path+valid_data_pickle, 'rb')

#Load object from the file
train_data = pickle.load(fileObject)
valid_data = pickle.load(fileObject1)

In [5]:
#Find unique patient_ids in train, valid set

train_patient_ids = train_data['patientId'].unique()
print('Unique patient_id in training dataset: {}'. format(train_patient_ids.shape[0]))

valid_patient_ids = valid_data['patientId'].unique()
print('Unique patient_id in validation dataset: {}'. format(valid_patient_ids.shape[0]))

Unique patient_id in training dataset: 21908
Unique patient_id in validation dataset: 5914


In [6]:
class Generator(keras.utils.Sequence):
  def __init__(self,unique_patient_ids, folder, dataframe, batch_size = 32, shuffle = False, image_size = 256, predict = False): #try batch size of 32
    self.unique_patient_ids = unique_patient_ids
    self.folder = folder
    self.dataframe = dataframe
    self.batch_size = batch_size
    self.shuffle = shuffle
    self.image_size = image_size
    self.predict = predict
    self.on_epoch_end()

  def __load__(self, pat_id):
    patIdWithExt = pat_id+'.dcm'
    #load dicom file from pixel array
    img = pydicom.dcmread(os.path.join(self.folder, patIdWithExt)).pixel_array
    #create empty array
    mask = np.zeros(img.shape)

    if pat_id in self.dataframe['patientId'].values:
      pat_info = self.dataframe[self.dataframe.patientId == pat_id]

      #loop through patient rows in data
      for info in pat_info.iterrows():
        row = info[1]
        if row.Target == 1:
          x = int(row.x)
          y = int(row.y)
          mask[y: int(row.y) + int(row.height), x: int(row.y)+ int(row.width)] = 1

    img = cv2.resize(img, (self.image_size, self.image_size))
    mask = cv2.resize(mask, (self.image_size, self.image_size))
    img = np.expand_dims(img, -1)
    mask = np.expand_dims(mask, -1)
    #img2 = np.concatenate((img,)*3, axis = -1)
    #mask2 = np.concatenate((mask,)*3, axis = -1)
    return img, mask

  def __loadpredict__(self, pat_id):
    patIdWithExt = pat_id
    #load dicom file from pixel array
    img = pydicom.dcmread(os.path.join(self.folder, patIdWithExt)).pixel_array
    #resize image
    img = resize(img, (self.image_size, self.image_size), mode = 'reflect')
    #add trailing channel dimension
    img = np.expan_dims(img, -1)
    return img    


  def __getitem__(self, index): #called with batch_number as an argument to obtain a given batch of data
    if self.predict:
      #load files
      imgs = [self.__loadpredict__(pat_id_row) for pat_id_row in self.unique_patient_ids]
      #create numpy batch
      imgs = np.array(imgs)
      return imgs, self.unique_patient_ids
    else:
      #select pat_ids by batches
      batch_pat_ids = self.unique_patient_ids[index*self.batch_size : (index+1)* self.batch_size]

      #load pat_ids by loop
      items = [self.__load__(pat_id_row) for pat_id_row in batch_pat_ids]
      imgs, masks = zip(*items)

      #create numpy batch
      imgs = np.array(imgs)
      masks = np.array(masks)
    return imgs, masks

  def __len__(self): #returns no. of steps in the epoch using samples and batch_size
    if self.predict:
      return int(np.ceil(len(self.unique_patient_ids) / self.batch_size))
    else:
      # __len__ provides number of batches per epoch
      return int(np.floor(self.unique_patient_ids.shape[0] / self.batch_size))

  def on_epoch_end(self): #This method is called after every epoch. Here we shuffle order of the dataset
    if self.predict:
      self.indexes = np.arange(len(self.unique_patient_ids))
    else:
      self.indexes = np.arange(self.unique_patient_ids.shape[0])
    if self.shuffle == True:
      np.random.shuffle(self.indexes)

In [9]:
train_gen = Generator(train_patient_ids, train_img_path, train_data, batch_size=32, shuffle=False, image_size=256)
valid_gen = Generator(valid_patient_ids, valid_img_path, valid_data, batch_size=32, shuffle=False, image_size=256)

In [10]:
# SETTINGS

ALPHA = 1 # Width hyper parameter for MobileNet (0.25, 0.5, 0.75, 1.0). Higher width means more accurate but slower

IMAGE_HEIGHT = 224
IMAGE_WIDTH = 224

HEIGHT_CELLS = 28
WIDTH_CELLS = 28

CELL_WIDTH = IMAGE_WIDTH / WIDTH_CELLS
CELL_HEIGHT = IMAGE_HEIGHT / HEIGHT_CELLS

BATCH_SIZE = 4
PATIENCE = 10


In [11]:
from tensorflow.keras.applications.mobilenet import MobileNet, preprocess_input
def create_model(trainable=True):
    model = MobileNet(input_shape=(IMAGE_HEIGHT, IMAGE_WIDTH, 3), include_top=False, alpha=ALPHA, weights="imagenet")

    for layer in model.layers:
        layer.trainable = trainable

    block1 = model.get_layer("conv_pw_5_relu").output
    block2 = model.get_layer("conv_pw_11_relu").output
    block3 = model.get_layer("conv_pw_13_relu").output

    x = Concatenate()([UpSampling2D()(block3), block2])
    x = Concatenate()([UpSampling2D()(x), block1])

    x = Conv2D(1, kernel_size=1, activation="sigmoid")(x)
    x = Reshape((HEIGHT_CELLS, WIDTH_CELLS))(x)

    return Model(inputs=model.input, outputs=x)

In [12]:
def dice_coefficient(y_true, y_pred):
    numerator = 2 * tensorflow.reduce_sum(y_true * y_pred)
    denominator = tensorflow.reduce_sum(y_true + y_pred)

    return numerator / (denominator + tensorflow.keras.backend.epsilon())

In [13]:
def loss(y_true, y_pred):
    return binary_crossentropy(y_true, y_pred) - tensorflow.log(dice_coefficient(y_true, y_pred) + tensorflow.keras.backend.epsilon())

In [14]:
model = create_model(False)
model.summary()


optimizer = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(loss=loss, optimizer=optimizer, metrics=[dice_coefficient])

checkpoint = ModelCheckpoint("model-MobileNet{val_loss:.2f}.h5", monitor="val_loss", verbose=1, save_best_only=True,
                             save_weights_only=True, mode="auto", period=1)
stop = EarlyStopping(monitor="val_loss", patience=PATIENCE, mode="auto")
reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.2, patience=5, min_lr=1e-6, verbose=1, mode="auto")

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 225, 225, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 112, 112, 32) 864         conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 112, 112, 32) 128         conv1[0][0]                      
______________________________________________________________________________________________

In [16]:
history = model.fit_generator(train_gen,
                              validation_data = valid_gen,
                              callbacks = [checkpoint, reduce_lr, stop],
                              epochs = 4, 
                              shuffle = True,
                              verbose = 1,
                              workers = 12,
                              use_multiprocessing = True)

ValueError: ignored