# Cell Neuclei Detection using Semantic Segmentation with U-Net

## 1. Import Neccesary Packages

In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import glob
import pathlib
import tensorflow as tf
from tensorflow import keras
import os
from tqdm import tqdm
import time
import datetime

## 2. Data Preprocessing and loading

In [None]:
base_dir = 'G:/My Drive/Colab Notebooks/Project/Inputs/data-science-bowl-2018/'
unzip_base_dir = 'G:/My Drive/Colab Notebooks/Project/Inputs/Working'
stage_train_zip = base_dir + 'stage1_train.zip'
stage_train_labels_zip= base_dir + 'stage1_train_labels.csv.zip'

stage_train_unzip = unzip_base_dir + 'stage1_train/'
stage_train_labels_unzip= unzip_base_dir + 'stage1_train_labels/'

### 2.1 Unzip the data

In [None]:
import zipfile
for path, unzip_path in zip([stage_train_zip,stage_train_labels_zip ], [stage_train_unzip, stage_train_labels_unzip]):
    print(path)
    print(unzip_path)
    print('---')
    with zipfile.ZipFile(path, 'r') as zip_ref:
        zip_ref.extractall(unzip_path)

In [None]:
test_data = r"G:\My Drive\Colab Notebooks\Project\Inputs\data-science-bowl-2018\stage1_test.zip"
test_data_unzip = unzip_base_dir + 'stage1_test/'
with zipfile.ZipFile(test_data, 'r') as zip_ref:
    zip_ref.extractall(test_data_unzip)

In [None]:
test = pathlib.Path(test_data_unzip)
lsttest_files = glob.glob(str(test/'*/'))
lsttest_files[:10]

In [None]:
def get_image(image_path):
    temp_path = glob.glob(image_path+'/images/*')
    img = tf.io.read_file(temp_path[0])
    img = tf.io.decode_image(img)
    img = tf.image.resize(img, (128,128))

    arr = img[:, :, :3].numpy()
    new_arr = ((arr - arr.min()) * (1/(arr.max() - arr.min())))
    return new_arr

test_images = []
for i in lsttest_files:
    test_images.append(get_image(i))

###  2.2 Convert the images to the tensor

In [None]:
test_dataset = tf.convert_to_tensor(np.array(test_images))

In [None]:
path = pathlib.Path(stage_train_unzip)
lst_files = glob.glob(str(path/'*/'))

In [None]:
BATCH_SIZE = 64

In [None]:
class create_ds(keras.utils.Sequence):
    def __init__(self, lst_files, batch_size):
        self.lst_files = lst_files
        self.batch_size = batch_size
        
    def __len__(self):
        return len(self.lst_files)//self.batch_size

    
    def get_image(self, image_path):
        temp_path = glob.glob(image_path+'/images/*')
        img = tf.io.read_file(temp_path[0])
        img = tf.io.decode_image(img)
        img = tf.image.resize(img, (128,128))

        arr = img[:, :, :3].numpy()
        new_arr = ((arr - arr.min()) * (1/(arr.max() - arr.min())))
        
        ma = glob.glob(image_path+'/masks/*')
        mask_ = tf.zeros(shape = (128, 128, 1))
        
        # Get all mask in particular folder and append to mask with all pixel value equal to zero
        for mask_path in ma:
            mask = tf.io.read_file(mask_path)
            mask = tf.io.decode_image(mask)
            mask = tf.image.resize(mask, (128,128))
           
            mask_ = tf.maximum(mask_, mask)
        
        return new_arr, mask_.numpy()
        
    # Create batches of given batch and return to the dataset object    
    def __getitem__(self, idx):
        batch = self.lst_files[idx * self.batch_size : (idx+1)*self.batch_size]
        
        temp_image = []
        temp_label = []
        for path in batch:
            new_arr, mask = self.get_image(path)
            temp_image.append(new_arr)
            temp_label.append(mask/255.0)
        
        return tf.convert_to_tensor(np.array(temp_image), dtype='float32'), tf.convert_to_tensor(np.array(temp_label),dtype ='float32')

In [None]:
import plotly.graph_objects as go

# We will pass the df which we created using model.history, So we wo=ill plot train, test loss. Train test accuracy
def plot_train_valid_curcve(df):
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=list(range(len(df))), y=df['loss'],
                            mode='lines',
                            name='Train_loss'))

        fig.add_trace(go.Scatter(x=list(range(len(df))), y=df['val_loss'],
                            mode='lines',
                            name='val_loss'))

        fig.add_trace(go.Scatter(x=list(range(len(df))), y=df['accuracy'],
                            mode='lines',
                            name='Train_accuracy'))

        fig.add_trace(go.Scatter(x=list(range(len(df))), y=df['val_accuracy'],
                            mode='lines',
                            name='val_accuracy'))
        fig.show()
        
# Let's see what we got as a prediction
def plot_predicted_image(x_test, valid, pred):
    ''' Function to plot Actual image, actual mask, Predicted Mask
    
    Param X-test : Actual image
    Param valid  : Validation mask
    Param pred   : Predicted image
    '''
    
    fig = plt.figure(figsize = (15,7))
    plt.subplot(1, 3,1)
    plt.title('Actual Image')
    plt.imshow(x_test)
    
#     fig = plt.figure(figsize = (20,7))
    plt.subplot(1, 3,2)
    plt.title('Actual Mask')
    plt.imshow(valid)
    
    plt.subplot(1, 3,3)
    plt.title('Predicted mask')
    plt.imshow(pred)

In [None]:
# let's split the data into train test split
from sklearn.model_selection import train_test_split
train, valid = train_test_split(lst_files)

In [None]:
# Creating the data using the class we created where we leverage the functionality of keras.sequence
train_ds = create_ds(lst_files=train, batch_size = BATCH_SIZE) 
valid_ds= create_ds(lst_files=valid, batch_size = BATCH_SIZE)

## 3. Model 1

In [None]:
# the dimension of image
IMAGE_HEIGHT=128
IMAGE_WIDTH=128
CHANNEL = 3

In [None]:
def get_model(IMAGE_HEIGHT, IMAGE_WIDTH, CHANNEL):
    inputs = keras.Input(shape = (IMAGE_HEIGHT, IMAGE_WIDTH, CHANNEL))
    conv_1 = keras.layers.Conv2D(filters = 16, kernel_size = 3, padding='same', activation='relu')(inputs)
    conv_1 = keras.layers.Conv2D(filters = 16, kernel_size = 3, padding='same', activation='relu')(conv_1)
    # conv_1

    conv_2 = keras.layers.MaxPool2D(pool_size = (2,2))(conv_1)
    conv_2 = keras.layers.Conv2D(filters = 32, kernel_size = 3, padding='same', activation='relu')(conv_2)
    conv_2 = keras.layers.Conv2D(filters = 32, kernel_size = 3, padding='same', activation='relu')(conv_2)

    conv_3 = keras.layers.MaxPool2D(pool_size = (2,2))(conv_2)
    conv_3 = keras.layers.Conv2D(filters = 64, kernel_size = 3, padding='same', activation='relu')(conv_3)
    conv_3 = keras.layers.Conv2D(filters = 64, kernel_size = 3, padding='same', activation='relu')(conv_3)

    conv_4 = keras.layers.MaxPool2D(pool_size = (2,2))(conv_3)
    conv_4 = keras.layers.Conv2D(filters = 128, kernel_size = 3, padding='same', activation='relu')(conv_4)
    conv_4 = keras.layers.Conv2D(filters = 128, kernel_size = 3, padding='same', activation='relu')(conv_4)

    conv_5 = keras.layers.MaxPool2D(pool_size = (2,2))(conv_4)
    conv_5 = keras.layers.Conv2D(filters = 256, kernel_size = 3, padding='same', activation='relu')(conv_5)
    conv_5 = keras.layers.Conv2D(filters = 256, kernel_size = 3, padding='same', activation='relu')(conv_5)
    
    conv_6 = keras.layers.Conv2DTranspose(filters = 128, kernel_size = 2, strides = 2, padding='same')(conv_5)
    conv_6 = keras.layers.concatenate([conv_4, conv_6])
    conv_6 = keras.layers.Conv2D(filters = 128, kernel_size = 3, padding='same', activation='relu')(conv_6)
    conv_6 = keras.layers.Conv2D(filters = 128, kernel_size = 3, padding='same', activation='relu')(conv_6)

    conv_7 = keras.layers.Conv2DTranspose(filters = 64, kernel_size = 2, strides = 2, padding='same')(conv_6)
    conv_7 = keras.layers.concatenate([conv_3, conv_7])
    conv_7 = keras.layers.Conv2D(filters = 64, kernel_size = 3, padding='same', activation='relu')(conv_7)
    conv_7 = keras.layers.Conv2D(filters = 64, kernel_size = 3, padding='same', activation='relu')(conv_7)

    conv_8 = keras.layers.Conv2DTranspose(filters = 32, kernel_size = 2, strides = 2, padding='same')(conv_7)
    conv_8 = keras.layers.concatenate([conv_2, conv_8])
    conv_8 = keras.layers.Conv2D(filters = 32, kernel_size = 3, padding='same', activation='relu')(conv_8)
    conv_8 = keras.layers.Conv2D(filters = 32, kernel_size = 3, padding='same', activation='relu')(conv_8)


    conv_9 = keras.layers.Conv2DTranspose(filters = 16, kernel_size = 2, strides = 2, padding='same')(conv_8)
    conv_9 = keras.layers.concatenate([conv_1, conv_9])
    conv_9 = keras.layers.Conv2D(filters = 16, kernel_size = 3, padding='same', activation='relu')(conv_9)
    conv_9 = keras.layers.Conv2D(filters = 16, kernel_size = 3, padding='same', activation='relu')(conv_9)

    output = keras.layers.Conv2D(filters = 1, kernel_size = 3, padding='same', activation='relu')(conv_9)
    model = keras.Model(inputs, output)
    return model

In [None]:
model = get_model(IMAGE_HEIGHT, IMAGE_WIDTH, CHANNEL)
model.summary()

In [None]:
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics=['accuracy'])

In [None]:
#Create a function to display some examples
def display(display_list):
    plt.figure(figsize=(15,15))
    title = ['Input Image','True Mask','Predicted Mask']
    
    for i in range(len(display_list)):
        plt.subplot(1, len(display_list),i+1)
        plt.title(title[i])
        plt.imshow(tf.keras.utils.array_to_img(display_list[i]))
        plt.axis('off')
        
    plt.show()
    
for images, masks in train.take(2):
    sample_image, sample_mask = images[0], masks[0]
    display([sample_image,sample_mask])
    
#Create a function to process predicted mask
def create_mask(pred_mask):
    pred_mask = tf.argmax(pred_mask,axis=-1)
    pred_mask = pred_mask[..., tf.newaxis]
    return pred_mask

#Create a function to display prediction
def show_predictions(dataset=None,num=1):
    if dataset:
        for image, mask in dataset.take(num):
            pred_mask = model.predict(image)
            display([image[0],mask[0],create_mask(pred_mask)[0]])
    else:
        display([sample_image,sample_mask,create_mask(model.predict(sample_image[tf.newaxis,...]))[0]])

#Custom callback to display result during training
class DisplayCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        clear_output(wait=True)
        show_predictions()
        print('\n Sample prediction after epoch {}\n'.format(epoch+1))

In [None]:
#Tensorboard callback
base_log_path = r"G:/My Drive/Colab Notebooks/Project/Inputs/Log"
log_dir = os.path.join(base_log_path, datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tb_callback = tf.keras.callbacks.TensorBoard(log_dir,histogram_freq=1,profile_batch=0)

# We will create baseline model and execute for 10 epochs
history = model.fit(train_ds,epochs=10,batch_size=64,
                    validation_data=valid_ds,
                    callbacks=[DisplayCallback(),tb_callback])
#history = model.fit(train_ds, validation_data=valid_ds, epochs = 10, verbose=False, callbacks=tb_callback)

In [None]:
df = pd.DataFrame(history.history)

In [None]:
plot_train_valid_curcve(df)