In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from glob import glob # For pathname matching
from skimage.transform import resize
from keras import backend as K
from sklearn.model_selection import train_test_split
from keras.models import Sequential 
from keras.layers import Conv2D, MaxPool2D, UpSampling2D, Flatten, Dense
from keras.callbacks import ModelCheckpoint,EarlyStopping
from keras.optimizers import Adam
import cv2

from PIL import Image
from scipy import ndimage
import matplotlib.pyplot as plt
from scipy.misc import imresize

from time import time
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 
from matplotlib.pyplot import rc
font = {'family' : 'monospace',
        'weight' : 'bold',
        'size'   : 12}
rc('font', **font)  # pass in the font dict as kwargs
K.set_image_dim_ordering('th')

import os
from os.path import basename
print(os.listdir("../input"))
print(os.listdir())

# Any results you write to the current directory are saved as output.

In [None]:
input_folder = '../input'

train= glob('/'.join([input_folder,'train/*.jpg']))
train_masks= glob('/'.join([input_folder,'train_masks/*.gif']))
test= glob('/'.join([input_folder,'test/*.jpg']))
print('Number of training images: ', len(train), 'Number of corresponding masks: ', len(train_masks), 'Number of test images: ', len(test))

In [None]:
tt_ratio = 0.8
img_rows, img_cols = 1024,1024
batch_size = 4
def dice_coef(y_true, y_pred, smooth=0):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection=K.sum(y_true_f * y_pred_f)
    return(2. * intersection + smooth) / ((K.sum(y_true_f) + K.sum(y_pred_f)) + smooth)

In [None]:
#split the training set into train and validation samples
train_images, validation_images = train_test_split(train, train_size=tt_ratio, test_size=1-tt_ratio)
print('Size of the training sample=', len(train_images), 'and size of the validation sample=', len(validation_images), ' images')


In [None]:
#utility function to convert greyscale inages to rgb
def grey2rgb(img):
    new_img = []
    for i in range(img.shape[0]):
        for j in range(img.shape[1]):
            new_img.append(list(img[i][j])*3)
    new_img = np.array(new_img).reshape(img.shape[0], img.shape[1], 3)
    return new_img

#generator that will be used to read data from the directory
def data_generator(data_dir, masks, images, dims, batch_size=batch_size):
    while True:
        ix=np.random.choice(np.arange(len(images)), batch_size)
        imgs = []
        labels = []
        for i in ix:
            # images
            original_img = cv2.imread(images[i])
            resized_img = imresize(original_img, dims + [3]) 
            array_img = resized_img/255
            array_img = array_img.swapaxes(0, 2)
            imgs.append(array_img)
            #imgs is a numpy array with dim: (batch size X 128 X 128 3)
            #print('shape of imgs ', array_img.shape)
            # masks
            try:
                mask_filename = basename(images[i])
                file_name = os.path.splitext(mask_filename)[0]
                correct_mask = '/'.join([input_folder,'train_masks',file_name+'_mask.gif'])
                original_mask = Image.open(correct_mask).convert('L')
                data = np.asarray(original_mask, dtype="int32")
                resized_mask = imresize(original_mask, dims+[3])
                array_mask = resized_mask / 255
                labels.append(array_mask)
            except Exception as e:
                labels=None
            
        imgs = np.array(imgs)
        labels = np.array(labels)
        try:
            relabel = labels.reshape(-1, dims[0], dims[1], 1)
            relabel = relabel.swapaxes(1, 3)
        except Exception as e:
            relabel=labels
        yield imgs, relabel

In [None]:
train_gen = data_generator('train/', train_masks, train_images, dims=[img_rows, img_cols])
img, msk = next(train_gen)
train_img = img[0].swapaxes(0,2)
train_msk = msk.swapaxes(1,3)

fig, ax = plt.subplots(1,2, figsize=(16, 16))
ax = ax.ravel()
ax[0].imshow(train_img, cmap='gray') 
ax[0].set_title('Training Image')
ax[1].imshow(grey2rgb(train_msk[0]), cmap='gray')
ax[1].set_title('Training Image mask')

In [None]:
# create an instance of a validation generator:
validation_gen = data_generator('train/', train_masks, validation_images, dims=[img_rows, img_cols]) 


In [None]:
def build_model():
    model = Sequential()
    
    model.add(Conv2D(16, (3,3), input_shape = (3, img_rows, img_cols), padding="same",activation = 'relu'))
    model.add(MaxPool2D(pool_size=(2,2), data_format="channels_first"))
    
    model.add(Conv2D(32, (3,3), padding="same",activation = 'relu'))
    model.add(MaxPool2D(pool_size=(2,2), data_format="channels_first"))
    
    model.add(Conv2D(64, (3,3), padding="same",activation = 'relu'))
    model.add(MaxPool2D(pool_size=(2,2), data_format="channels_first"))
    
    model.add(Conv2D(128, (3,3), padding="same",activation = 'relu'))
    model.add(MaxPool2D(pool_size=(2,2), data_format="channels_first"))
    
    model.add(Conv2D(256, (3,3), padding="same",activation = 'relu'))
    model.add(MaxPool2D(pool_size=(2,2), data_format="channels_first"))
    
    model.add(Conv2D(512, (3,3), padding="same",activation = 'relu'))
    
    model.add(UpSampling2D(size=(2,2), data_format=None, interpolation='nearest'))
    model.add(Conv2D(256, (3,3), padding="same",activation = 'relu'))
    
    model.add(UpSampling2D(size=(2, 2), data_format=None, interpolation='nearest'))
    model.add(Conv2D(128, (3,3), padding="same",activation = 'relu'))
    
    model.add(UpSampling2D(size=(2, 2), data_format=None, interpolation='nearest'))
    model.add(Conv2D(64, (3,3), padding="same",activation = 'relu'))
    
    model.add(UpSampling2D(size=(2, 2), data_format=None, interpolation='nearest'))
    model.add(Conv2D(32, (3,3), padding="same",activation = 'relu'))
    
    model.add(UpSampling2D(size=(2, 2), data_format=None, interpolation='nearest'))
    model.add(Conv2D(16, (3,3), padding="same",activation = 'relu'))
    
    model.add(Conv2D(1, (1,1), activation = 'sigmoid'))
    #model.add(Flatten())
    
    #model.add(Dense(4, activation='relu'))
    #model.add(Dense(units= img_cols*img_rows*2, activation = 'sigmoid'))
    
    model.compile(optimizer= Adam(lr=0.001), loss='binary_crossentropy', metrics=[dice_coef])
    
    return model

In [None]:
# Build and compile the model
model = build_model()
model.summary()

In [None]:
# Fit the model to the training set and compute dice coefficient at each validation set

save_model = ModelCheckpoint('Best_model.h5', monitor='dice_coef', verbose=0, save_best_only=True)
model_run = model.fit_generator(train_gen, steps_per_epoch=50, epochs=40, validation_data=validation_gen, validation_steps=50, callbacks=[save_model])


In [None]:
#pd.DataFrame(model_run.history)[['dice_coef','val_dice_coef']].plot()

In [None]:
from keras.models import load_model
model1 = load_model('Best_model.h5')

img, msk = next(validation_gen)
predicted_mask = model.predict(img).swapaxes(1,3)
validation_image = img[0].swapaxes(0,2)

fig, ax = plt.subplots(1,2, figsize=(16, 16))
ax = ax.ravel()
ax[0].imshow(validation_image, cmap='gray') 
ax[0].set_title('Validation Image')
ax[1].imshow(grey2rgb(predicted_mask[0]), cmap='gray')
ax[1].set_title('Validation Image mask')

In [None]:
test_set = data_generator('test/', train_masks, test, dims=[img_rows, img_cols]) 
img_tst, msk_tst = next(test_set)
predicted_mask_tst = model.predict(img_tst)
predicted_mask_tst = predicted_mask_tst.swapaxes(1,3)
test_mask = grey2rgb(predicted_mask_tst[0])

test_image = img_tst[0].swapaxes(0,2)

fig, ax = plt.subplots(1,2, figsize=(16, 16))
ax = ax.ravel()
ax[0].imshow(test_image, cmap='gray') 
ax[0].set_title('Test Image')
ax[1].imshow(test_mask, cmap='gray')
ax[1].set_title('Test Image mask')

In [None]:
# Creating the submission file
from tqdm import tqdm_notebook
print(os.listdir())

def rle_encode(img):
    vector = img.flatten()
    vector = [0] + vector + [0]
    runs = np.where(vector[1:] != vector[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

mask_list = []
def test_predict_mask(images):
    dims = [img_rows, img_cols]
    for i in tqdm_notebook(range(20000)):
        original_img = cv2.imread(images[i])
        resized_img = imresize(original_img, dims + [3]) 
        array_img = resized_img/255
        array_img = array_img.swapaxes(0, 2)
        array_img = np.array([array_img]*batch_size)
        
        predicted_mask = model.predict(array_img).swapaxes(1,3)
        
        predicted_mask[predicted_mask>0.5]=1
        predicted_mask[predicted_mask<=0.5]=0
        mask=rle_encode(predicted_mask)
        
        file_name = images[i].split('/')[-1]
        mask_list.append([file_name, mask])
    mask_df = pd.DataFrame(mask_list, columns=['img','rle_mask'])    
    return mask_df

In [15]:
# Creating the submission file
mask = test_predict_mask(test)
print(mask)
mask.to_csv('Submission_csv.csv', index=False)

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


                      img                                           rle_mask
0     05aad5f1ce32_16.jpg  262575 2 263578 35 264596 66 265607 171 266625...
1     3f71f4e7a56f_11.jpg  336297 31 337304 99 338323 121 339340 138 3403...
2     41902e49c45d_10.jpg  186751 3 187773 10 188796 14 189818 19 190839 ...
3     1b8d5b1e8f16_13.jpg  403858 23 404873 44 405891 60 406911 73 407933...
4     a93fe16fe9d3_13.jpg  279997 34 280984 82 281994 108 283012 120 2840...
5     4ddedccf4439_14.jpg  281982 4 283002 16 284023 23 285045 30 286066 ...
6     0092275fcc63_08.jpg  301583 40 302600 62 303620 75 304621 112 30563...
7     e36b6abe0d79_12.jpg  399795 44 400805 90 401822 105 402837 121 4038...
8     eff68da50257_07.jpg  301615 28 302614 69 303630 89 304644 114 30566...
9     88d8f9610cff_02.jpg  299535 43 300538 81 301553 98 302561 124 30357...
10    8ae1ca908479_11.jpg  421337 24 422335 68 423349 87 424364 105 42537...
11    4e2a517b0eaa_08.jpg  166525 4 167551 2 168570 8 169594 8 170619 7 1..