# Read data

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
!unzip '/content/gdrive/My Drive/new_images.zip' 

In [1]:
import pandas as pd

train2 = pd.read_pickle('/content/gdrive/My Drive/train2_new')

Successfully created augmented images and their encoded pixels. train2 contains the names and the encoded pixels of all images

In [2]:
from tqdm import tqdm 

for i in tqdm(range(len(train2))):
  for j in range(1,5):
    if type(train2['e'+str(j)].iloc[i]) is not str:
      train2['e'+str(j)][i] = train2['e'+str(j)].iloc[i][0]

100%|██████████| 13377/13377 [00:03<00:00, 3408.19it/s]


In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test = train_test_split(train2, test_size=0.2)

# Tensorflow tf.data pipeline

In [4]:
import cv2

def masks(encoded_pixels):
    counts=[]
    mask=np.zeros((256*1600), dtype=np.int8) 
    pre_mask=np.asarray([int(point) for point in encoded_pixels.split()])
    for index,count in enumerate(pre_mask):
        if(index%2!=0):
            counts.append(count)
    i=0
    for index,pixel in enumerate(pre_mask):
        if(index%2==0):
            if(i==len(counts)):
                break
            mask[pixel:pixel+counts[i]]=1
            i+=1
    mask=np.reshape(mask,(1600,256))
    mask=cv2.resize(mask,(256,1600)).T
    return mask

from PIL import Image

def preprocess(file_path):
    # you should decode bytes type to string type
    img_name = (file_path.numpy()[0].decode('UTF-8'))
    img_name = 'images/'+img_name
    img = cv2.imread(img_name)/255
    img = cv2.resize(img,(1600,256))
    mask = np.empty((256,1600,4),dtype=np.int8)
    for i in range(4):
      mask[:,:,i] = masks(file_path.numpy()[i+1].decode('UTF-8'))
    mask = tf.convert_to_tensor(mask)
    return img,mask

In [5]:
import tensorflow as tf
import numpy as np

train_dataset = tf.data.Dataset.from_tensor_slices(X_train.values)
test_dataset = tf.data.Dataset.from_tensor_slices(X_test.values)

In [6]:
train_ds = train_dataset.map(lambda x: tf.py_function(preprocess, [x], [tf.float64,tf.int8]),num_parallel_calls=5)
test_ds = test_dataset.map(lambda x: tf.py_function(preprocess, [x], [tf.float64,tf.int8]),num_parallel_calls=5)

In [7]:
BATCH_SIZE = 10

# for reference about the BUFFER_SIZE in shuffle:
# https://stackoverflow.com/questions/46444018/meaning-of-buffer-size-in-dataset-map-dataset-prefetch-and-dataset-shuffle
BUFFER_SIZE = 1000

dataset = {"train": train_ds, "val": test_ds}

# -- Train Dataset --#
dataset['train'] = dataset['train'].shuffle(buffer_size=BUFFER_SIZE, seed=42)
dataset['train'] = dataset['train'].repeat()
dataset['train'] = dataset['train'].batch(BATCH_SIZE)
dataset['train'] = dataset['train'].prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

#-- Validation Dataset --#
dataset['val'] = dataset['val'].repeat()
dataset['val'] = dataset['val'].batch(BATCH_SIZE)
dataset['val'] = dataset['val'].prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

print(dataset['train'])
print(dataset['val'])


<PrefetchDataset shapes: (<unknown>, <unknown>), types: (tf.float64, tf.int8)>
<PrefetchDataset shapes: (<unknown>, <unknown>), types: (tf.float64, tf.int8)>


In [8]:
! pip uninstall keras -y
! pip install segmentation-models



# Model

In [9]:
# Dice similarity coefficient loss, brought to you by: https://github.com/nabsabraham/focal-tversky-unet
from tensorflow.keras import backend as K
# https://www.kaggle.com/xhlulu/severstal-simple-keras-u-net-boilerplate

# COMPETITION METRIC
def dice_coef(y_true, y_pred, smooth=1):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

In [10]:
import tensorflow as tf

tf.keras.backend.clear_session()

SM_FRAMEWORK=tf.keras
import segmentation_models as sm
from segmentation_models import Unet
from segmentation_models import get_preprocessing
from segmentation_models.losses import DiceLoss
from segmentation_models.metrics import iou_score,f1_score,Recall
import tensorflow
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.layers import Input, Conv2D, MaxPool2D, UpSampling2D, Concatenate, Add, Flatten
from tensorflow import reduce_sum
import numpy as np
from PIL import Image

WIDTH=1600
HEIGHT=256 

import segmentation_models as sm
network = 'resnet34'
process_input = get_preprocessing(network)

adam = tf.keras.optimizers.Adam(lr=0.001)

model = Unet(network,input_shape = (256, 1600, 3),classes=4,activation='sigmoid',encoder_freeze=True)
model.compile(optimizer=adam, loss='binary_crossentropy', metrics=[dice_coef])

Segmentation Models: using `tf.keras` framework.


In [11]:
from tensorflow.keras.callbacks import Callback,ModelCheckpoint
import warnings
warnings.filterwarnings("ignore")

callback_list=[]
%load_ext tensorboard
log_dir="Model-1-logs"
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir,histogram_freq=1, write_graph
=True,write_grads=True)
callback_list.append(tensorboard_callback)

filepath="Model-Unet-weights-v2.hdf5"
checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_loss', verbose=0, save_best_only=True,
mode='auto')
callback_list.append(checkpoint)

callback_list.append(tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2))

from tensorflow.keras.callbacks import ReduceLROnPlateau
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.8,patience=1, min_lr=0.001)
callback_list.append(reduce_lr)



In [12]:
model.load_weights('/content/gdrive/My Drive/Model-Unet-weights.hdf5')

Increased size of train data and randomized train test split to increase randomness in data and avoid overfit

In [13]:
model.fit(dataset['train'],validation_data = dataset['val'],epochs = 5, callbacks=callback_list,steps_per_epoch=len(X_train)//10,validation_steps=len(X_test)//10)

Epoch 1/5
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Epoch 2/5
Epoch 3/5


<tensorflow.python.keras.callbacks.History at 0x7f7044da67b8>

No overfit is seen

In [16]:
!cp '/content/Model-Unet-weights-v2.hdf5' '/content/gdrive/My Drive'