In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import tensorflow as tf

# detect and init the TPU
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)

# instantiate a distribution strategy
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)




In [None]:
# Import all required libraries
import pandas as pd
import numpy as np
import pydicom
import pylab
from skimage.transform import resize
import pathlib
import keras
from keras.applications.densenet import DenseNet121
from keras.layers import Input
from keras.models import Model
from keras.layers import Dense
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint,EarlyStopping,ReduceLROnPlateau

In [None]:
# Set the path for training images
TRAIN_IMAGES ='../input/rsna-pneumonia-detection-challenge/stage_2_train_images'
Dataset = '../input/rsna-pneumonia-detection-challenge'
#weights = 'E:/Machine Learning/Great Learning/Projects/GL Capstone Project/GL Capstone Project/Code Base/ChexNet/'

In [None]:
# Read the training CSV File and remove duplicates on Patient Id
filepath = (Dataset+'/stage_2_train_labels.csv')
Images_df = pd.read_csv(filepath)
Images_model_df = Images_df[['patientId','Target']]
Images_model_df=Images_model_df.drop_duplicates(subset='patientId')

In [None]:
# Sample the training images for initial experimentation
Images_sample_df = Images_model_df.sample(frac=1.0,random_state=42)

In [None]:
# Get the count
Images_sample_df['Target'].value_counts()

# Don't Use this function

In [None]:
def train_test_dict(Images_sample_df,test_size,random_state=42): 
   # Split into train and test validation datasets
    train_df, test_df = train_test_split(Images_sample_df, test_size=0.02, random_state=42, stratify=Images_sample_df[['Target']])
   # Convert to dictionary with patient-id as key and target as value
    train_dict=train_df.set_index('patientId')['Target'].to_dict()
    test_dict=test_df.set_index('patientId')['Target'].to_dict()

In [None]:
# Split into train and test validation datasets
train_df, test_df = train_test_split(Images_sample_df, test_size=0.02, random_state=42, stratify=Images_sample_df[['Target']])

In [None]:
# Convert to dictionary with patient-id as key and target as value
train_dict=train_df.set_index('patientId')['Target'].to_dict()
test_dict=test_df.set_index('patientId')['Target'].to_dict()

In [None]:
train_dict

In [None]:
# Define Custom Generator Class to be used in Model Generator
class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, labels, path,batch_size=128, dim=(224,224), n_channels=3,
                 n_classes=1, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.path = path
        self.on_epoch_end()
    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            dcm_file_sample = (self.path +"/"+ ID +".dcm")
            dcm_data_sample = pydicom.filereader.dcmread(dcm_file_sample)
            image = dcm_data_sample.pixel_array
            image_array = np.stack([image] * 3, axis=2)
            image_array = image_array / 255.
            image_array = resize(image_array, (224, 224), mode= 'constant', anti_aliasing=True)
            X[i,] = image_array

            # Store class
            y[i] = self.labels[ID]

        return X,y

In [None]:
from keras.models import load_model

In [None]:
# Define the DenseNet model pre-loaded with imagenet weights with last layer set as false
input_shape = (224, 224, 3)
num_of_class=1
img_in = Input(input_shape)              
model = DenseNet121(include_top= False, 
                weights='imagenet',    
                input_tensor= img_in, 
                input_shape= input_shape,
                pooling ='avg') 

# The pre-trained model has classification output for 14 categories and hence Dense layer is defined with layer 14
x = model.output  
predictions = Dense(14, activation="sigmoid", name="predictions")(x)    
model = Model(inputs=img_in, outputs=predictions)



In [None]:
# Print the model summary
model.summary()

In [None]:
# Remove the last dense layer of 14 classes and print the summary
model.layers.pop()
model.summary()

In [None]:
# Add a new dense layer of 1 class and chain the previous layer output to new model
new_layer = Dense(1, activation="sigmoid", name="my_predictions")    
inp = model.input
out = new_layer(model.layers[-1].output)
model2 = Model(inp, out)

In [None]:
#Print new model summary
model2.summary()

In [None]:
# Create Train and Test generator
train_generator = DataGenerator(list(train_dict.keys()), train_dict,path=TRAIN_IMAGES,batch_size=32)
validation_generator = DataGenerator(list(test_dict.keys()), test_dict,path=TRAIN_IMAGES,batch_size=1)

In [None]:
#Define Custom Metrics Functions to be used in Keras Training
from keras import backend as K

def recall_m(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

def precision_m(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
#Set Early stopping parameter and Reduce Learning rate on Plateau
callbacks_list = [EarlyStopping(monitor='val_loss',patience=5,),
                  ModelCheckpoint(filepath='my_model.h5',monitor='val_loss',save_best_only=True,),
                  ReduceLROnPlateau(monitor='val_loss',factor=0.1,patience=2,)]

In [None]:
# Set only the last layer as Trainable
def model_train_layers(model,layer):
    model2.trainable = True
    set_trainable = False
    for layer in model2.layers:
      #print(layer.name)
        if layer.name == layer:
            set_trainable = True
        if set_trainable:
             layer.trainable = True
        else:
             layer.trainable = False

In [None]:
model_train_layers(model2,"my_predictions")

In [None]:
# Compile with binary cross entropy loss
optimizer = Adam(lr=0.001)
model2.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=['acc',f1_m,precision_m, recall_m])

In [None]:
# Run Fit Generator
history=model2.fit_generator(generator=train_generator,
                    epochs=2,
                    validation_data=validation_generator,
                    callbacks=callbacks_list)

In [None]:
# Plot the results on Loss and Accuracy
import matplotlib.pyplot as plt
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

Though training accuracy and Validation accuracy are in the range of 80%, the precision and recall scores are very low
for training and validation cases. This could be more to do with the imbalance of positive and negative cases in the training dataset(20-80 ratio)

In [None]:
Pos_df = Images_sample_df[Images_sample_df['Target']==1]
Neg_df = Images_sample_df[Images_sample_df['Target']==0]

In [None]:
Neg_sample_df = Neg_df.sample(frac=0.5)

In [None]:
Images_corr_Sample_df = pd.concat([Pos_df,Neg_sample_df],axis=0)

In [None]:
# Split into train and test validation datasets
train_data, test_data = train_test_split(Images_corr_Sample_df, test_size=0.02, random_state=42, stratify=Images_sample_df[['Target']])
# Convert to dictionary with patient-id as key and target as value
train_dr=train_data.set_index('patientId')['Target'].to_dict()
test_dr=test_data.set_index('patientId')['Target'].to_dict()

In [None]:
# Create Train and Test generator
train_generator = DataGenerator(list(train_dr.keys()), train_dr,path=TRAIN_IMAGES,batch_size=32)
validation_generator = DataGenerator(list(test_dr.keys()), test_dr,path=TRAIN_IMAGES,batch_size=1)

In [None]:
#train_test_dict(Images_corr_Sample_df,test_size=0.02,random_state=42)

In [None]:
# Run Fit Generator
history=model2.fit_generator(generator=train_generator,
                    epochs=2,
                    validation_data=validation_generator,
                    callbacks=callbacks_list)

In [None]:
model_train_layers(model2,"conv5_block16_0_bn")

In [None]:
optimizer = Adam(lr=0.001)
model2.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=['acc',f1_m,precision_m, recall_m])

In [None]:
history=model2.fit_generator(generator=train_generator,
                    epochs=2,
                    validation_data=validation_generator,
                    callbacks=callbacks_list)