In [5]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import zipfile
import cv2
from skimage import io

import tensorflow as tf
from tensorflow.python.keras import Sequential
from tensorflow.keras import layers, optimizers
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint, LearningRateScheduler
import tensorflow.keras.backend as K

import random
import glob
from sklearn.preprocessing import StandardScaler, normalize
from IPython.display import display


WHAT IS THE PURPOSE OF THIS NOTEBOOK?


The notebook is divided into two sections:

1. Image Categorization (i.e whether the Image has tumour or not)

CONTENTS:



LIBRARIES AND DATASETS IMPORT

THE VISUALIZATION OF DATA

GENERATING TRAINING, VALIDATION, AND TEST DATASETS

THE MODEL'S TRAINING

EVALUATION

2. Image Segmentation (i.e., if the image has a tumour, forecast the region where the tumour is located; simply, we need to map all of the pixel values to a label of 0 or 1, and that's all there is to it)

CONTENTS:



MASKED ELEMENTS DATASET CREATION

CREATING A PERSONALIZED DATA GENERATOR

FORMING A RESNET BLOCK

MODEL OF TRAINING SEGMENTATION

EVALUATION OF SEGMENTATION MODELS

SEGMENTATION MODEL PREDICTION

A personal note:
The notebook will be almost identical to the one mentioned above, but I wanted to practise the problems of Image Segmentation and Classification, so I thought of making a notebook, (sought of tutorial, so that I can revisit this notebook, as and when required), all credit goes to the author of Brain MRI Detection | Segmentation | ResUNet, thanks and best wishes

In [6]:
data = pd.read_csv('../input/lgg-mri-segmentation/kaggle_3m/data.csv')
data.info()


In [7]:
data.head(10)


In [8]:
data_map = []
for sub_dir_path in glob.glob("/kaggle/input/lgg-mri-segmentation/kaggle_3m/"+"*"):
    #if os.path.isdir(sub_path_dir):
    try:
        dir_name = sub_dir_path.split('/')[-1]
        for filename in os.listdir(sub_dir_path):
            image_path = sub_dir_path + '/' + filename
            data_map.extend([dir_name, image_path])
    except Exception as e:
        print(e)


In [9]:
df = pd.DataFrame({"patient_id" : data_map[::2],
                   "path" : data_map[1::2]})
df.head()

In [10]:
df_imgs = df[~df['path'].str.contains("mask")]
df_masks = df[df['path'].str.contains("mask")]

# File path line length images for later sorting
BASE_LEN = 89 # len(/kaggle/input/lgg-mri-segmentation/kaggle_3m/TCGA_DU_6404_19850629/TCGA_DU_6404_19850629_ <-!!!43.tif)
END_IMG_LEN = 4 # len(/kaggle/input/lgg-mri-segmentation/kaggle_3m/TCGA_DU_6404_19850629/TCGA_DU_6404_19850629_43 !!!->.tif)
END_MASK_LEN = 9 # (/kaggle/input/lgg-mri-segmentation/kaggle_3m/TCGA_DU_6404_19850629/TCGA_DU_6404_19850629_43 !!!->_mask.tif)

# Data sorting
imgs = sorted(df_imgs["path"].values, key=lambda x : int(x[BASE_LEN:-END_IMG_LEN]))
masks = sorted(df_masks["path"].values, key=lambda x : int(x[BASE_LEN:-END_MASK_LEN]))

# Sorting check
idx = random.randint(0, len(imgs)-1)
print("Path to the Image:", imgs[idx], "\nPath to the Mask:", masks[idx])


In [11]:
# Final dataframe
brain_df = pd.DataFrame({"patient_id": df_imgs.patient_id.values,
                         "image_path": imgs,
                         "mask_path": masks
                        })
def pos_neg_diagnosis(mask_path):
    value = np.max(cv2.imread(mask_path))
    if value > 0 : 
        return 1
    else:
        return 0
    
brain_df['mask'] = brain_df['mask_path'].apply(lambda x: pos_neg_diagnosis(x))
brain_df

In [12]:
brain_df['mask'].value_counts()


In [13]:
import plotly.graph_objects as go  # using plotly to create interactive plots

fig = go.Figure([go.Bar(x=brain_df['mask'].value_counts().index, 
                        y=brain_df['mask'].value_counts(), 
                        width=[.4, .4]
                       )
                ])
fig.update_traces(marker_color='rgb(158,202,225)', marker_line_color='rgb(8,48,107)',
                  marker_line_width=4, opacity=0.4
                 )
fig.update_layout(title_text="Mask Count Plot",
                  width=700,
                  height=550,
                  yaxis=dict(
                             title_text="Count",
                             tickmode="array",
                             titlefont=dict(size=20)
                           )
                 )
fig.update_yaxes(automargin=True)
fig.show()

1 indicates the number of people having tumor and it is very less compared to the not affected.

In [14]:
# Printing the Tumor Image
for i in range(len(brain_df)):
    if cv2.imread(brain_df['mask_path'][i]).max()>0:
        break

plt.figure(figsize=(12,8))
plt.subplot(121)
plt.imshow(cv2.imread(brain_df['mask_path'][i]));
plt.title('Tumor Location')
plt.axis('off')
plt.subplot(122)
plt.imshow(cv2.imread(brain_df['image_path'][i]));
plt.title("Brain MRI Image")
plt.axis('off')
plt.show()


In [15]:
cv2.imread(brain_df['mask_path'][i]).max(),cv2.imread(brain_df['mask_path'][i]).min()


Plot Random Images

In [16]:
fig,axs = plt.subplots(6,2,figsize=(16,26))
count = 0
for x in range(6):
    i = random.randint(0,len(brain_df))
    axs[count][0].title.set_text("Brain MRI")
    axs[count][0].imshow(cv2.imread(brain_df['image_path'][i]))
    axs[count][1].title.set_text("Mask - "+str(brain_df['mask'][i]))
    axs[count][1].imshow(cv2.imread(brain_df['mask_path'][i]))
    axs[count][0].axis('off')
    axs[count][1].axis('off')
    count+=1
fig.tight_layout()


Perform Masking

In [17]:
count = 0
i = 0
fig,axs= plt.subplots(12,3,figsize=(20,50))
for mask in brain_df['mask']:
    if(mask==1):
        img = io.imread(brain_df['image_path'][i])
        axs[count][0].title.set_text("Brain MRI")
        axs[count][0].imshow(img)
        
        mask = io.imread(brain_df['mask_path'][i])
        axs[count][1].title.set_text('Mask')
        axs[count][1].imshow(mask,cmap ='gray')
        img[mask==255]=(0,255,150)
        axs[count][2].title.set_text('MRI with Mask')
        axs[count][2].imshow(img)
        axs[count][0].axis('off')
        axs[count][1].axis('off')
        axs[count][2].axis('off')
        count+=1
    i+=1
    if(count==12):
        break
fig.tight_layout()


Create Train, Test, Val dataset

In [18]:
brain_df_train = brain_df.drop(columns=['patient_id'])
brain_df_train['mask'] = brain_df_train['mask'].apply(lambda x: str(x))
brain_df_train.info()


In [19]:
from sklearn.model_selection import train_test_split
train,test = train_test_split(brain_df_train,test_size=0.15) # Splitting the data


In [20]:
from keras_preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(rescale=1./255,validation_split=0.1)
# Creating dataset
train_generator = datagen.flow_from_dataframe(train,directory = './',
                x_col = 'image_path',y_col='mask',subset='training',class_mode='categorical',
                                             batch_size=16,shuffle=True,target_size=(256,256))
valid_generator = datagen.flow_from_dataframe(train,directory='./',
                                x_col = 'image_path',y_col = 'mask',
                                             subset='validation',class_mode='categorical',batch_size=16,shuffle=True,target_size=(256,256))
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_dataframe(test,directory='./',x_col='image_path',y_col = 'mask',class_mode='categorical',batch_size=16,shuffle=False,target_size=(256,256))


Create and Train Model

In [21]:
from tensorflow.keras.applications.resnet50 import ResNet50




In [24]:
clf_model = ResNet50(weights='imagenet', include_top=False, input_tensor=Input(shape=(256,256,3)))
clf_model.summary()


In [25]:
for layer in clf_model.layers:
    layers.trainable = False


In [26]:
head = clf_model.output
head = AveragePooling2D(pool_size=(4,4))(head)
head = Flatten(name='Flatten')(head)
head = Dense(256, activation='relu')(head)
head = Dropout(0.3)(head)
head = Dense(256, activation='relu')(head)
head = Dropout(0.3)(head)
head = Dense(2, activation='softmax')(head)

model = Model(clf_model.input, head)
model.compile(loss = 'categorical_crossentropy', 
              optimizer='adam', 
              metrics= ["accuracy"]
             )
model.summary()


In [27]:
early_stopping= EarlyStopping(monitor='val_loss',mode='min',verbose=1,patience=15) # Early stopping, if our validation loss does not improve
check_pointer = ModelCheckpoint(filepath = 'clf-resnet-weights.hdf5',verbose=1,save_best_only=True) # Save only the best model, by monitoring the validation loss
reduce_lr = ReduceLROnPlateau(monitor='val_loss',mode='min',verbose=1,patience=10,min_delta = 0.0001,factor=0.2) # Reduce the Learning Rate, by monitori
callbacks = [check_pointer,early_stopping,reduce_lr]


In [29]:
h = model.fit(train_generator,steps_per_epoch = train_generator.n//train_generator.batch_size,
                        epochs=100,validation_data=valid_generator,validation_steps = valid_generator.n//valid_generator.batch_size,
                             callbacks = [check_pointer,early_stopping])


In [30]:
# Saving the Model architecture in json file
model_json = model.to_json()
with open('clf-resnet-model.json','w') as json_file:
    json_file.write(model_json)
model.save('clf-brain.hdf5')


In [31]:
h.history.keys()


In [32]:
plt.figure(figsize=(12,5))
plt.subplot(121)
plt.plot(h.history['loss'])
plt.plot(h.history['val_loss'])
plt.title("Classification Model LOSS")
plt.ylabel("Loss")
plt.xlabel("Epochs")
plt.legend(['train','val'])

plt.subplot(122)
plt.plot(h.history['accuracy'])
plt.plot(h.history['val_accuracy'])
plt.title("Classification Model Accuracy")
plt.ylabel("Accuracy")
plt.xlabel("Epochs")
plt.legend(['train','val'])
plt.show()


In [33]:
_,acc = model.evaluate(test_generator)
print("Test Accuracy :  {} %".format(acc*100))


In [34]:
prediction = model.predict(test_generator)
pred = np.argmax(prediction,axis=1)
original = np.asarray(test['mask']).astype('int')

from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
accuracy = accuracy_score(original,pred)
print(accuracy)

cm = confusion_matrix(original,pred)
report = classification_report(original,pred,labels=[0,1])
print(report)
plt.figure(figsize=(5,5))
sns.heatmap(cm,annot=True)


Building a Segmentation Model to Localize Tumour




Create dataset of masked elements

In [35]:
brain_df_mask = brain_df[brain_df['mask']==1]
brain_df_mask.shape


In [36]:
X_train,X_val = train_test_split(brain_df_mask,test_size=0.15)
X_test,X_val = train_test_split(X_val,test_size=0.5)

print("Train Size is {}, validation size is {} & test size is {}".format(len(X_train),len(X_val),len(X_test)))
      
train_ids = list(X_train.image_path)
train_mask = list(X_train.mask_path)
      
val_ids = list(X_val.image_path)
val_mask = list(X_val.mask_path)


In [37]:
class DataGenerator(tf.keras.utils.Sequence):
  def __init__(self, ids , mask, image_dir = './', batch_size = 16, img_h = 256, img_w = 256, shuffle = True):

    self.ids = ids
    self.mask = mask
    self.image_dir = image_dir
    self.batch_size = batch_size
    self.img_h = img_h
    self.img_w = img_w
    self.shuffle = shuffle
    self.on_epoch_end()

  def __len__(self):
    'Get the number of batches per epoch'

    return int(np.floor(len(self.ids)) / self.batch_size)

  def __getitem__(self, index):
    'Generate a batch of data'

    #generate index of batch_size length
    indexes = self.indexes[index* self.batch_size : (index+1) * self.batch_size]

    #get the ImageId corresponding to the indexes created above based on batch size
    list_ids = [self.ids[i] for i in indexes]

    #get the MaskId corresponding to the indexes created above based on batch size
    list_mask = [self.mask[i] for i in indexes]


    #generate data for the X(features) and y(label)
    X, y = self.__data_generation(list_ids, list_mask)

    #returning the data
    return X, y
  def on_epoch_end(self):

    #get the ImageId corresponding to the indexes created above based on batch size
    self.indexes = np.arange(len(self.ids))

    #if shuffle is true, shuffle the indices
    if self.shuffle:
      np.random.shuffle(self.indexes)

  def __data_generation(self, list_ids, list_mask):
    'generate the data corresponding the indexes in a given batch of images'

    # create empty arrays of shape (batch_size,height,width,depth) 
    #Depth is 3 for input and depth is taken as 1 for output becasue mask consist only of 1 channel.
    X = np.empty((self.batch_size, self.img_h, self.img_w, 3))
    y = np.empty((self.batch_size, self.img_h, self.img_w, 1))

    #iterate through the dataframe rows, whose size is equal to the batch_size
    for i in range(len(list_ids)):
      #path of the image
      img_path = str(list_ids[i])
      
      #mask path
      mask_path = str(list_mask[i])
      
      #reading the original image and the corresponding mask image
      img = io.imread(img_path)
      mask = io.imread(mask_path)

      #resizing and coverting them to array of type float64
      img = cv2.resize(img,(self.img_h,self.img_w))
      img = np.array(img, dtype = np.float64)
      
      mask = cv2.resize(mask,(self.img_h,self.img_w))
      mask = np.array(mask, dtype = np.float64)

      #standardising 
      img -= img.mean()
      img /= img.std()
      
      mask -= mask.mean()
      mask /= mask.std()
      
      #Adding image to the empty array
      X[i,] = img
      
      #expanding the dimnesion of the image from (256,256) to (256,256,1)
      y[i,] = np.expand_dims(mask, axis = 2)
    
    #normalizing y
    y = (y > 0).astype(int)

    return X, y

train_data = DataGenerator(train_ids, train_mask)
val_data = DataGenerator(val_ids, val_mask)


In [39]:
def resblock(X,f):
    X_copy = X
    X  =Conv2D(f,kernel_size=(1,1),kernel_initializer='he_normal')(X)
    X = BatchNormalization()(X)
    X = Activation('relu')(X)
    
    X = Conv2D(f,kernel_size=(3,3),padding='same',kernel_initializer='he_normal')(X)
    X = BatchNormalization()(X)
    
    X_copy = Conv2D(f,kernel_size=(1,1),kernel_initializer='he_normal')(X_copy)
    X_copy = BatchNormalization()(X_copy)
    
    X = Add()([X,X_copy])
    X =Activation('relu')(X)
    
    return X

def upsample_concat(x,skip):
    X = UpSampling2D((2,2))(x)
    merge = Concatenate()([X,skip])
    return merge

In [40]:
input_shape = (256,256,3)
X_input = Input(input_shape)

# Stage 1
conv_1 = Conv2D(16,3,activation='relu',padding='same',kernel_initializer='he_normal')(X_input)
conv_1 =BatchNormalization()(conv_1)
conv_1 = Conv2D(16,3,activation='relu',padding='same',kernel_initializer='he_normal')(conv_1)
conv_1 = BatchNormalization()(conv_1)
pool_1 = MaxPool2D((2,2))(conv_1)

# Stage 2
conv_2 = resblock(pool_1,32)
pool_2 = MaxPool2D((2,2))(conv_2)

# Stage 3
conv_3 = resblock(pool_2,64)
pool_3 = MaxPool2D((2,2))(conv_3)

# Stage 4
conv_4 = resblock(pool_3,128)
pool_4 = MaxPool2D((2,2))(conv_4)

# Stage 5 (bottle neck)
conv_5 = resblock(pool_4,256)

# Upsample Stage 1
up_1 = upsample_concat(conv_5,conv_4)
up_1 = resblock(up_1,128)

# Upsample Stage 2
up_2 = upsample_concat(up_1,conv_3)
up_2 = resblock(up_2,64)

# Upsample Stage 3
up_3 = upsample_concat(up_2,conv_2)
up_3 = resblock(up_3,32)

# Upsample stage 4
up_4 = upsample_concat(up_3,conv_1)
up_4 = resblock(up_4,16)

out = Conv2D(1,(1,1),kernel_initializer='he_normal',padding='same',activation='sigmoid')(up_4)
seg_model = Model(X_input,out)
seg_model.summary()


In [41]:
tf.keras.utils.plot_model(seg_model,to_file='seg_model.png')


In [42]:
from keras.losses import binary_crossentropy

epsilon = 1e-5
smooth = 1

def tversky(y_true, y_pred):
    y_true_pos = K.flatten(y_true)
    y_pred_pos = K.flatten(y_pred)
    true_pos = K.sum(y_true_pos * y_pred_pos)
    false_neg = K.sum(y_true_pos * (1-y_pred_pos))
    false_pos = K.sum((1-y_true_pos)*y_pred_pos)
    alpha = 0.7
    return (true_pos + smooth)/(true_pos + alpha*false_neg + (1-alpha)*false_pos + smooth)

def focal_tversky(y_true,y_pred):
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)
    pt_1 = tversky(y_true, y_pred)
    gamma = 0.75
    return K.pow((1-pt_1), gamma)

def tversky_loss(y_true, y_pred):
    return 1 - tversky(y_true,y_pred)


In [44]:
# compling model and callbacks functions
adam = tf.keras.optimizers.Adam(learning_rate = 0.05, epsilon = 0.1)
seg_model.compile(optimizer = adam, 
                  loss = focal_tversky, 
                  metrics = [tversky]
                 )
#callbacks
earlystopping = EarlyStopping(monitor='val_loss',
                              mode='min', 
                              verbose=1, 
                              patience=20
                             )
# save the best model with lower validation loss
checkpointer = ModelCheckpoint(filepath="ResUNet-segModel-weights.hdf5", 
                               verbose=1, 
                               save_best_only=True
                              )
reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                              mode='min',
                              verbose=1,
                              patience=10,
                              min_delta=0.0001,
                              factor=0.2
                             )

In [45]:
h = seg_model.fit(train_data,epochs = 100,validation_data = val_data,callbacks=[checkpointer,early_stopping,reduce_lr])


In [46]:
# saving model achitecture in json file
seg_model_json = seg_model.to_json()
with open("ResUNet-seg-model.json", "w") as json_file:
    json_file.write(seg_model_json)
seg_model.save('seg_model_brain.hdf5')


In [47]:
h.history.keys()


In [48]:
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
plt.plot(h.history['loss']);
plt.plot(h.history['val_loss']);
plt.title("SEG Model focal tversky Loss");
plt.ylabel("focal tversky loss");
plt.xlabel("Epochs");
plt.legend(['train', 'val']);

plt.subplot(1,2,2)
plt.plot(h.history['tversky']);
plt.plot(h.history['val_tversky']);
plt.title("SEG Model tversky score");
plt.ylabel("tversky Accuracy");
plt.xlabel("Epochs");
plt.legend(['train', 'val']);


In [49]:
test_ids = list(X_test.image_path)
test_mask = list(X_test.mask_path)
test_data = DataGenerator(test_ids, test_mask)
_, tv = seg_model.evaluate(test_data)
print("Segmentation tversky is {:.2f}%".format(tv*100))


In [50]:
def prediction(test,model,model_seg):
    mask,image_id,has_mask = [],[],[]
    
    for i in test.image_path:
        img = io.imread(i)
        img=img*1./255.0
        img = cv2.resize(img,(256,256))
        img = np.array(img,dtype=np.float64)
        img = np.reshape(img,(1,256,256,3))
        
        is_defect = model.predict(img)
        
        if np.argmax(is_defect) ==0:
            image_id.append(i)
            has_mask.append(0)
            mask.append("No Mask :)")
            continue
        
        X = np.empty((1,256,256,3))
        img = io.imread(i)
        img = cv2.resize(img,(256,256))
        img = np.array(img,dtype=np.float64)
        img-=img.mean()
        img/=img.std()
        X[0,] = img
        predict = model_seg.predict(X)
        if predict.round().astype(int).sum()==0:
            image_id.append(i)
            has_mask.append(0)
            mask.append("No Mask :)")
        else:
            image_id.append(i)
            has_mask.append(1)
            mask.append(predict)
    return pd.DataFrame({"image_path":image_id,'predicted_mask':mask,'has_mask':has_mask})


In [51]:
df_pred = prediction(test,model,seg_model)
df_pred


In [52]:
df_pred = test.merge(df_pred,on='image_path')
df_pred.head(10)


In [53]:
count = 0
fig,axs = plt.subplots(15,5,figsize=(40,80))
for i in range(len(df_pred)):
    if df_pred.has_mask[i]==1 and count<15:
        img = io.imread(df_pred.image_path[i])
        img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
        axs[count][0].imshow(img)
        axs[count][0].title.set_text("Brain MRI")
        axs[count][0].axis('off')
        
        mask = io.imread(df_pred.mask_path[i])
        axs[count][1].imshow(mask)
        axs[count][1].title.set_text("Original Mask")
        axs[count][1].axis('off')
        
        pred = np.array(df_pred.predicted_mask[i]).squeeze().round()
        axs[count][2].imshow(pred)
        axs[count][2].title.set_text("AI Predicted Mask")
        axs[count][2].axis('off')
        
        img[mask==255] =(255,0,0)
        axs[count][3].imshow(img)
        axs[count][3].title.set_text("Brain MRI with original Mask (Ground Truth)")
        axs[count][3].axis('off')
        
        img_ = io.imread(df_pred.image_path[i])
        img_ = cv2.cvtColor(img_,cv2.COLOR_BGR2RGB)
        img_[pred==1]= (0,255,150)
        axs[count][4].imshow(img_)
        axs[count][4].title.set_text("MRI with AI Predicted Mask")
        axs[count][4].axis('off')
        count+=1
    if(count==15):
        break
fig.tight_layout()