In [None]:
# Data download and unzipping
%%capture
!wget https://raw.githubusercontent.com/AakashSudhakar/2018-data-science-bowl/master/compressed_files/stage1_test.zip -c
!wget https://raw.githubusercontent.com/AakashSudhakar/2018-data-science-bowl/master/compressed_files/stage1_train.zip -c

!mkdir stage1_train stage1_test

!unzip stage1_train.zip -d stage1_train/
!unzip stage1_test.zip -d stage1_test/

In [None]:
from google.colab import files
uploaded = files.upload() # upload .py files

In [None]:
!ls

In [None]:
%%capture
!pip install keras --upgrade

In [None]:
import os
import random
import sys
import warnings
import numpy as np
import pandas as pd
from skimage.io import imread, imshow, imread_collection, concatenate_images
from skimage.transform import resize
from keras.models import Model, load_model
from keras import optimizers
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore', category=UserWarning, module='skimage')

# Setting seed for reproducability
SEED = 42
np.random.seed(SEED)
%matplotlib inline

In [None]:
from data_utils import read_train_data, read_test_data, rle_encoding, mask_to_rle
from model import get_unet, iou_metric, iou_metric_batch, my_iou_metric

In [None]:
# get train_data
X_train,Y_train = read_train_data()
# get test_data
test_img,test_img_sizes = read_test_data()

Leaderboard: 0.329: BATCH_SIZE = 32, EPOCHS = 100, LEARNING_RATE = 1e-3, OPTIMIZER = optimizers.Adam(), val_size = 0.25, kfold=4<br>
Leaderboard: 0.320: BATCH_SIZE = 16, EPOCHS = 50, LEARNING_RATE = 1e-3, OPTIMIZER = optimizers.Adam(), val_size = 0.25<br>
Leaderboard: 0.317: BATCH_SIZE = 16, EPOCHS = 50, LEARNING_RATE = 1e-3, OPTIMIZER = optimizers.Adam(), val_size = 0.25, kfold=4<br>
Leaderboard: 0.277: BATCH_SIZE = 16, EPOCHS = 50, LEARNING_RATE = 1e-3, OPTIMIZER = optimizers.Adam(), val_size = 0.2<br>

In [None]:
#@title Hyperparameter { run: "auto", display-mode: "both" }
BATCH_SIZE = 16 #@param {type:"raw"}
EPOCHS = 50 #@param {type:"raw"}
LEARNING_RATE = 1e-3 #@param {type:"raw"}
OPTIMIZER = optimizers.Adam(lr=LEARNING_RATE) #@param {type:"raw"}
val_size = 0.25 #@param {type:"raw"}

### Train/Val split, U-net model

In [None]:

train_img, val_img, train_mask, val_mask = train_test_split(X_train, Y_train, test_size=val_size, random_state=SEED)

train_size = len(train_img)
val_size = len(val_img)

In [None]:
# get u_net model
u_net = get_unet(OPTIMIZER=OPTIMIZER)
# u_net.summary()

### Training

In [None]:
print("Training...\n")
filepath="best_model.h5" # save best model only
earlystopper = EarlyStopping(monitor='val_loss', patience=10, verbose=1)
checkpointer = ModelCheckpoint(filepath, monitor='val_my_iou_metric', 
                               verbose=1, save_best_only=True, mode='max')
results = u_net.fit(x=train_img, y=train_mask, validation_data=(val_img, val_mask), 
                   batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=1, 
                   callbacks=[earlystopper, checkpointer])

### Plot training history

In [None]:
loss = results.history['loss']
val_loss = results.history['val_loss']
acc = results.history['my_iou_metric']
val_acc = results.history['val_my_iou_metric']
plt.figure()
plt.plot(np.arange(len(loss)), np.array(loss), label='train loss')
plt.plot(np.arange(len(val_loss)), np.array(val_loss), label='val loss')
plt.legend(loc=1)

plt.figure()
plt.plot(np.arange(len(acc)), np.array(acc), label='train iou')
plt.plot(np.arange(len(val_acc)), np.array(val_acc), label='val iou')
plt.legend(loc=4)

### Load best model and evaluate

In [None]:
best_model = load_model('best_model.h5', custom_objects={'my_iou_metric': my_iou_metric})
print('Predicting...')
train_pred = best_model.predict(train_img, verbose=1)
val_pred = best_model.predict(val_img, verbose=1)
test_pred = best_model.predict(test_img, verbose=1)
train_iou = iou_metric_batch(train_mask, train_pred)
val_iou = iou_metric_batch(val_mask, val_pred)
print('Best train IOU = ', train_iou)
print('Best val IOU = ', val_iou)

### Visualizations

In [None]:
idx = np.random.choice(range(len(val_img)))
plt.figure(figsize=(12,12))
plt.subplot(131)
imshow(val_img[idx])
plt.title("val image")
plt.subplot(132)
imshow(np.squeeze(val_mask[idx]))
plt.title("val mask")
plt.subplot(133)
imshow(np.squeeze(val_pred[idx] > 0.5))
plt.title("val predictions")
plt.show()

In [None]:
idx = np.random.choice(range(len(test_img)))
plt.figure(figsize=(12,12))
plt.subplot(121)
imshow(test_img[idx])
plt.title("Image")
plt.subplot(122)
imshow(np.squeeze(test_pred[idx] > 0.5))
plt.title("Mask Prediction")
plt.show()

### KFold

In [None]:
from sklearn.model_selection import KFold
k = 4
kf = KFold(n_splits=k, shuffle=True, random_state=SEED)
historys = []
train_ious, val_ious = [], []
test_preds = []

for i, (train_index, val_index) in enumerate(kf.split(X_train)):
    
    train_img, train_mask = X_train[train_index], Y_train[train_index]
    val_img, val_mask = X_train[val_index], Y_train[val_index]
    u_net = get_unet(OPTIMIZER=OPTIMIZER)
    filepath= 'best_model' + str(i) + '.h5'
    print('Training...')
    earlystopper = EarlyStopping(monitor='val_my_iou_metric', patience=10, mode='max')
    checkpointer = ModelCheckpoint(filepath, monitor='val_my_iou_metric', 
                                   verbose=1, save_best_only=True, mode='max')
    result = u_net.fit(x=train_img, y=train_mask, validation_data=(val_img, val_mask), 
                       batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=1, 
                       callbacks=[earlystopper, checkpointer])
    historys.append(result)
    best_model = load_model(filepath, custom_objects={'my_iou_metric': my_iou_metric})
    print('Predicting...')
    train_pred = best_model.predict(train_img, verbose=1)
    val_pred = best_model.predict(val_img, verbose=1)
    test_pred = best_model.predict(test_img, verbose=1)
    train_iou = iou_metric_batch(train_mask, train_pred)
    val_iou = iou_metric_batch(val_mask, val_pred)
    test_preds.append(test_pred)
    train_ious.append(train_iou)
    val_ious.append(val_iou)
    print('Best train IOU = ', train_iou)
    print('Best val IOU = ', val_iou) 

### Submission

In [None]:
# Create list of upsampled test masks
test_mask_upsampled = []
for i in range(len(test_pred)):
    test_mask_upsampled.append(resize(np.squeeze(test_pred[i]),
                                       (test_img_sizes[i][0],test_img_sizes[i][1]), 
                                       mode='constant', preserve_range=True))

In [None]:
test_ids,rles = mask_to_rle(test_mask_upsampled)

In [None]:
# Create submission DataFrame
sub = pd.DataFrame()
sub['ImageId'] = test_ids
sub['EncodedPixels'] = pd.Series(rles).apply(lambda x: ' '.join(str(y) for y in x))

sub.to_csv('sub-dsbowl2018.csv', index=False)

In [None]:
# Code to download files from Google colab

from google.colab import files
files.download('sub-dsbowl2018.csv')
# files.download('best_model.h5')