#**SUMMARY**

Filename    : model.ipynb   
Description : The file contains logic for creating model

Below steps were performed for data preparation,   
      1.   Building a pneumonia detection model starting from basic CNN and then   improving upon it.   
      2.   Train the model     
      3.   To deal with large training time, save the weights so that you can use them when training the model for the second time without starting from scratch.   


**Revision History**  
Date        ||       Description               ||              Author  
07-06-2020   ||   Initial logic for model creation   ||   Arvindh   
  



#**Import Libraries**

In [0]:
%tensorflow_version 2.x

In [2]:
import tensorflow
tensorflow.__version__

'2.2.0'

In [3]:
import pandas as pd 
import numpy as np
from seaborn import countplot
from matplotlib.pyplot import figure, show
import os
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import seaborn as sns

  import pandas.util.testing as tm


In [8]:
!pip3 install pydicom
import pydicom
import os
from skimage.transform import resize
import keras
import pandas as pd
import pickle
import cv2



In [0]:
from keras.optimizers import SGD, Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping

#Mount Google Drive

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
#Set your project path 
project_path =  'drive/My Drive/Colab Notebooks/Capstone'
train_img_path = 'drive/My Drive/Colab Notebooks/Capstone/dataset/stage_2_train_images'
valid_img_path = 'drive/My Drive/Colab Notebooks/Capstone/dataset/stage_2_train_images'
test_img_path = 'drive/My Drive/Colab Notebooks/Capstone/dataset/stage_2_test_images'

In [0]:
train_df_pick = "/train_df_pickle"
valid_df_pick = "/valid_df_pickle"

# we open the file for reading
fileObject = open(project_path+train_df_pick,'rb')  
fileObject1 = open(project_path+valid_df_pick,'rb')  

# load the object from the file into temp var b
train_df = pickle.load(fileObject)  
valid_df = pickle.load(fileObject1) 

In [16]:
train_patient_Ids = train_df.patientId.unique()
print("The number of unique patient id in the training dataset is "+ str(train_patient_Ids.shape[0]))

The number of unique patient id in the training dataset is 21908


In [17]:
valid_patient_Ids = valid_df.patientId.unique()
print("The number of unique patient id in the validation dataset is "+ str(valid_patient_Ids.shape[0]))

The number of unique patient id in the validation dataset is 5914


#Custom Data generator

In [0]:
class Generator(keras.utils.Sequence):
  def __init__(self, unique_patient_ids, folder, dataframe, batch_size=10, shuffle=False, image_size=256):
        self.unique_patient_ids = unique_patient_ids
        self.folder = folder
        self.dataframe = dataframe
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.image_size = image_size
        self.on_epoch_end()
   
  def __load__(self, pat_id):
        patIdWithExt = pat_id+'.dcm'
        # load dicom file from pixel array
        img = pydicom.dcmread(os.path.join(self.folder, patIdWithExt)).pixel_array
        # create empty array
        msk = np.zeros(img.shape) 
        
        if pat_id in self.dataframe["patientId"].values:
            pat_info = self.dataframe[self.dataframe.patientId == pat_id]

            # loop through pat rows in the df
            for info in pat_info.iterrows():
                row = info[1]
                if row.Target == 1:
                    x = int(row.x)
                    y = int(row.y)
                    msk[y: int(row.y) + int(row.height), x: int(row.x) + int(row.width)] = 1 
        '''
        row = dataframe[1]
        if row.Target == 1:
            x = int(row.x)
            y = int(row.y)
            msk[y: int(row.y) + int(row.height), x: int(row.x) + int(row.width)] = 1  
        '''
        img = cv2.resize(img, (self.image_size, self.image_size))
        msk = cv2.resize(msk, (self.image_size, self.image_size))
        img = np.expand_dims(img, -1)
        msk = np.expand_dims(msk, -1)   

        #img2 = np.concatenate((img,)*3, axis=-1) 
        #msk2 = np.concatenate((msk,)*3, axis=-1)
        return img, msk

  def __getitem__(self, index):
        # select pat_ids by batches
        batch_pat_ids = self.unique_patient_ids[index*self.batch_size : (index+1)*self.batch_size]

        # load pat ids by loop
        items = [self.__load__(pat_id_row) for pat_id_row in batch_pat_ids]
        imgs, msks = zip(*items)

        # create numpy batch
        imgs = np.array(imgs)
        msks = np.array(msks)
        return imgs, msks

  def __len__(self):
    # __len__ provides number of batches per epoch
    return int(np.floor(self.unique_patient_ids.shape[0] / self.batch_size))

  def on_epoch_end(self):
        self.indexes = np.arange(self.unique_patient_ids.shape[0])
        if self.shuffle == True:
              np.random.shuffle(self.indexes)

In [0]:
#To be used finally
#train_gen = Generator(train_patient_Ids, train_img_path, train_df, batch_size=32, shuffle=False, image_size=256)
#valid_gen = Generator(valid_patient_Ids, valid_img_path, valid_df, batch_size=32, shuffle=False, image_size=256)

In [0]:
train_gen = Generator(train_patient_Ids[:20], train_img_path, train_df, batch_size=10, shuffle=False, image_size=256)
valid_gen = Generator(valid_patient_Ids[:10], valid_img_path, valid_df, batch_size=10, shuffle=False, image_size=256)

In [0]:
def create_downsample(channels, inputs):
    x = keras.layers.BatchNormalization(momentum=0.9)(inputs)
    x = keras.layers.LeakyReLU(0)(x)
    x = keras.layers.Conv2D(channels, 1, padding='same', use_bias=False)(x)
    x = keras.layers.MaxPool2D(2)(x)
    return x

def create_resblock(channels, inputs):
    x = keras.layers.BatchNormalization(momentum=0.9)(inputs)
    x = keras.layers.LeakyReLU(0)(x)
    x = keras.layers.Conv2D(channels, 3, padding='same', use_bias=False)(x)
    x = keras.layers.BatchNormalization(momentum=0.9)(x)
    x = keras.layers.LeakyReLU(0)(x)
    x = keras.layers.Conv2D(channels, 3, padding='same', use_bias=False)(x)
    return keras.layers.add([x, inputs])

def create_network(input_size, channels, n_blocks=2, depth=4):
    # input
    inputs = keras.Input(shape=(input_size, input_size, 1))
    x = keras.layers.Conv2D(channels, 3, padding='same', use_bias=False)(inputs)
    # residual blocks
    for d in range(depth):
        channels = channels * 2
        x = create_downsample(channels, x)
        for b in range(n_blocks):
            x = create_resblock(channels, x)
    
    x = keras.layers.BatchNormalization(momentum=0.9)(x)
    x = keras.layers.LeakyReLU(0)(x)
    x = keras.layers.Conv2D(1, 1, activation='sigmoid')(x)
    outputs = keras.layers.UpSampling2D(2**depth)(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    return model

In [0]:
# create network and compiler
model = create_network(input_size=256, channels=32, n_blocks=2, depth=4)
model.compile(optimizer='adam',loss='binary_crossentropy',
              metrics=['accuracy'])

In [75]:
history = model.fit_generator(train_gen, validation_data=valid_gen, epochs=2, workers=4, use_multiprocessing=False)

Epoch 1/2
Epoch 2/2
