In [None]:
!pip install pydicom

In [None]:
# Import all required libraries
import pandas as pd
import numpy as np
import pydicom
import pylab
from skimage.transform import resize
import pathlib
import keras
from keras.applications.densenet import DenseNet121
from keras.layers import Input
from keras.models import Model
from keras.layers import Dense
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint,EarlyStopping,ReduceLROnPlateau

In [13]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pylab as plt
import pydicom
import os
from os import listdir
from os.path import isfile, join

In [None]:
# Setting the current working directory
import os; 
os.chdir('E:/rsna-pneumonia-detection-challenge/')
#project_path =  "Sarcasm Detection/"

In [None]:
pwd

In [14]:
train_images_dir = 'E:/rsna-pneumonia-detection-challenge/stage_2_train_images/'
train_images = [f for f in listdir(train_images_dir) if isfile(join(train_images_dir, f))]
test_images_dir = 'E:/rsna-pneumonia-detection-challenge/stage_2_test_images/'
test_images = [f for f in listdir(test_images_dir) if isfile(join(test_images_dir, f))]
print('5 Training images', train_images[:5]) # Print the first 5

5 Training images ['0004cfab-14fd-4e49-80ba-63a80b6bddd6.dcm', '000924cf-0f8d-42bd-9158-1af53881a557.dcm', '000db696-cf54-4385-b10b-6b16fbb3f985.dcm', '000fe35a-2649-43d4-b027-e67796d412e0.dcm', '001031d9-f904-4a23-b3e5-2c088acd19c6.dcm']


In [15]:
print('Number of train images:', len(train_images))
print('Number of test images:', len(test_images))

Number of train images: 26684
Number of test images: 3000


In [None]:
# Set the path for training images
#TRAIN_IMAGES ='E:/rsna-pneumonia-detection-challenge/stage_2_train_images/'
Dataset = 'E:/rsna-pneumonia-detection-challenge/'
weights = 'densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5'

In [None]:
# Read the training CSV File and remove duplicates on Patient Id
filepath = (Dataset+'/stage_2_train_labels.csv')
Images_df = pd.read_csv(filepath)
Images_model_df = Images_df[['patientId','Target']]
#Images_model_df=Images_model_df.drop_duplicates(subset='patientId')

In [None]:
# Sample the training images for initial experimentation
Images_sample_df = Images_model_df.sample(frac=1.0,random_state=42)

In [None]:
# Get the count
Images_model_df['Target'].value_counts()

In [None]:
Images_sample_df.fillna(0, inplace=True)

In [None]:
pwd

In [None]:
# Split into train and test validation datasets
train_df, test_df = train_test_split(Images_model_df, test_size=0.02, random_state=42, stratify=Images_model_df[['Target']])
# Convert to dictionary with patient-id as key and target as value
train_dict=train_df.set_index('patientId')['Target'].to_dict()
test_dict=test_df.set_index('patientId')['Target'].to_dict()

In [None]:
train_dict

In [None]:
Images_model_df.info()

In [None]:
# Define Custom Generator Class to be used in Model Generator
class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, labels, path,batch_size=128, dim=(224,224), n_channels=3,
                 n_classes=1, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.path = path
        self.on_epoch_end()
    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            dcm_file_sample = (self.path +"/"+ ID +".dcm")
            dcm_data_sample = pydicom.filereader.dcmread(dcm_file_sample)
            image = dcm_data_sample.pixel_array
            image_array = np.stack([image] * 3, axis=2)
            image_array = image_array / 255.
            image_array = resize(image_array, (224, 224), mode= 'constant', anti_aliasing=True)
            X[i,] = image_array

            # Store class
            y[i] = self.labels[ID]

        return X,y

In [None]:
"""""

import numpy as np
import cv2
from tensorflow.keras.utils import Sequence


class DataGenerator(keras.utils.Sequence):
    """Generates data for Keras
    Sequence based data generator. Suitable for building data generator for training and prediction.
    """
    def __init__(self, list_IDs, labels, image_path, mask_path,
                 to_fit=True, batch_size=32, dim=(224,224),
                 n_channels=3, n_classes=1, shuffle=True):
        """Initialization
        :param list_IDs: list of all 'label' ids to use in the generator
        :param labels: list of image labels (file names)
        :param image_path: path to images location
        :param mask_path: path to masks location
        :param to_fit: True to return X and y, False to return X only
        :param batch_size: batch size at each iteration
        :param dim: tuple indicating image dimension
        :param n_channels: number of image channels
        :param n_classes: number of output masks
        :param shuffle: True to shuffle label indexes after every epoch
        """
        self.list_IDs = list_IDs
        self.labels = labels
        self.image_path = image_path
        self.mask_path = mask_path
        self.to_fit = to_fit
        self.batch_size = batch_size
        self.dim = dim
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        """Denotes the number of batches per epoch
        :return: number of batches per epoch
        """
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        """Generate one batch of data
        :param index: index of the batch
        :return: X and y when fitting. X only when predicting
        """
        # Generate indexes of the batch
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X = self._generate_X(list_IDs_temp)

        if self.to_fit:
            y = self._generate_y(list_IDs_temp)
            return X, y
        else:
            return X

        
        
    def on_epoch_end(self):
        """Updates indexes after each epoch
        """
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def _generate_X(self, list_IDs_temp):
        """Generates data containing batch_size images
        :param list_IDs_temp: list of label ids to load
        :return: batch of images
        """
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            X[i,] = self._load_grayscale_image(self.image_path + self.labels[ID])

        return X

    def _generate_y(self, list_IDs_temp):
        """Generates data containing batch_size masks
        :param list_IDs_temp: list of label ids to load
        :return: batch if masks
        """
        y = np.empty((self.batch_size, *self.dim), dtype=int)

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            y[i,] = self._load_grayscale_image(self.mask_path + self.labels[ID])

        return y

    def _load_grayscale_image(self, image_path):
        """Load grayscale image
        :param image_path: path to image to load
        :return: loaded image
        """
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = img / 255
        return img
    
    """"""

In [None]:
from keras.models import load_model

In [None]:
# Create Train and Test generator
train_generator = DataGenerator(list(train_dict.keys()), train_dict, image_path, mask_path)#, batch_size=32)
validation_generator = DataGenerator(list(test_dict.keys()), test_dict, mask_path, mask_path)#, batch_size=1)

In [None]:
# Create Train and Test generator
train_generator = DataGenerator(list(train_dict.keys()), train_dict, train_images, batch_size=32)
validation_generator = DataGenerator(list(test_dict.keys()), test_dict, test_images, batch_size=1)

In [None]:
# Define the DenseNet model pre-loaded with imagenet weights with last layer set as false
input_shape = (224, 224, 3)
num_of_class=1
img_in = Input(input_shape)              
model = DenseNet121(include_top=False, 
                weights='imagenet',    
                input_tensor= img_in, 
                input_shape= input_shape,
                pooling ='avg') 

# The pre-trained model has classification output for 14 categories and hence Dense layer is defined with layer 14
x = model.output  
predictions = Dense(14, activation="sigmoid", name="predictions")(x)    
model = Model(inputs=img_in, outputs=predictions)

In [None]:
# Load pre-trained weights on similar dataset
model.load_weights(weights+"E:/rsna-pneumonia-detection-challenge/brucechou1983_CheXNet_Keras_0.3.0_weights.h5/")

In [None]:
# Print the model summary
model.summary()

In [None]:
#Define Custom Metrics Functions to be used in Keras Training
from keras import backend as K

def recall_m(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

def precision_m(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))



In [None]:
#Set Early stopping parameter and Reduce Learning rate on Plateau
callbacks_list = [EarlyStopping(monitor='val_loss',patience=5,),
                  ModelCheckpoint(filepath=weights+'my_model.h5',monitor='val_loss',save_best_only=True,),
                  ReduceLROnPlateau(monitor='val_loss',factor=0.1,patience=2,)]

In [None]:
# Set only the last layer as Trainable
def model_train_layers(model,layer):
    model.trainable = True
    set_trainable = False
    for layer in model.layers:
      #print(layer.name)
        if layer.name == layer:
            set_trainable = True
        if set_trainable:
             layer.trainable = True
        else:
             layer.trainable = False
        
        return model_train_layers

In [None]:
model_train_layers(model,"my_predictions")

In [None]:
# Compile with binary cross entropy loss
optimizer = Adam(lr=0.001)
model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=['acc',f1_m,precision_m, recall_m])

In [None]:
# Our vectorized labels
#y_train = np.asarray(train_labels).astype('float32').reshape((-1,1))
#y_test = np.asarray(test_labels).astype('float32').reshape((-1,1))

In [None]:
history=model.fit_generator(generator=train_generator,
                    epochs=7,
                    validation_data=validation_generator,
                    callbacks=callbacks_list)

# Extra code

In [None]:
train_generator = datagen.flow_from_directory(
                  directory='E:/rsna-pneumonia-detection-challenge/' + r'/stage_2_train_images',
                  target_size=(224, 224), # resize to this size
                  color_mode="rgb", # for coloured images
                  batch_size=1, # number of images to extract from folder for every batch
                  class_mode="binary", # classes to predict
                  seed=2020 # to make the result reproducible
                  )

fig, ax = plt.subplots(nrows=1, ncols=4, figsize=(15,15))

for i in range(4):

  # convert to unsigned integers for plotting
  image = next(train_generator)[0].astype('uint8')

  # changing size from (1, 200, 200, 3) to (200, 200, 3) for plotting the image
  image = np.squeeze(image)

  # plot raw pixel data
  ax[i].imshow(image)
  ax[i].axis('off')

In [None]:
model.fit_generator(datagen.flow(train_images, batch_size=batch_size), 
                    epochs=epochs, # one forward/backward pass of training data
                    steps_per_epoch=x_train.shape[0]//batch_size, # number of images comprising of one epoch
                    validation_data=(x_test, y_test), # data for validation
                    validation_steps=x_test.shape[0]//batch_size)

In [None]:
datagen = ImageDataGenerator(
        rotation_range=10, # rotation
        width_shift_range=0.2, # horizontal shift
        height_shift_range=0.2, # vertical shift
        zoom_range=0.2, # zoom
        horizontal_flip=True, # horizontal flip
        brightness_range=[0.2,1.2]) # brightness