# Install libraries

In [0]:
! pip install imageio
! pip install kaggle
! pip install ipywidgets

# Import

In [0]:
import os
import numpy as np
import imageio
import pandas as pd

import tensorflow as tf

from keras.models import Model, load_model
from keras.models import Sequential
from keras.layers import Input, BatchNormalization, Activation, UpSampling2D, ZeroPadding2D
from keras.layers.core import Lambda, RepeatVector, Reshape
from keras.layers.convolutional import Conv2D, Conv2DTranspose, Cropping2D
from keras.layers.pooling import MaxPooling2D, AveragePooling2D
from keras.layers.merge import concatenate
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras import backend as K

from keras.applications.resnet50 import ResNet50
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input, decode_predictions
from keras import optimizers

import sys
from tqdm import tqdm
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from skimage.transform import resize
from sklearn.model_selection import train_test_split

# Helper Classes

In [0]:
# convert rle to mask image
def rleToMask(rleString,height,width):
    rows,cols = height,width
    try:
        #get numbers
        rleNumbers = [int(numstring) for numstring in rleString.split(' ')]
        #get pairs
        rlePairs = np.array(rleNumbers).reshape(-1,2)
        #create an image
        img = np.zeros(rows*cols,dtype=np.uint8)
        #for each pair
        for index,length in rlePairs:
            #get the pixel value 
            index -= 1
            img[index:index+length] = 255
        #reshape
        img = img.reshape(cols,rows)
        img = img.T    
    #else return empty image
    except:
        img = np.zeros((cols,rows))
    return img

# resize predicted mask to original size  
def resize_mask(x):
  resized_mask = resize(x, (101, 101, 1), mode='constant', preserve_range=True)
  return resized_mask

# binarize the output probabilities  
def binarize(x, threshold):
  bz = np.where(x>threshold, 1, 0)
  return bz

# encode predicted mask to rle 
def run_length_encoding(x):
    # https://www.kaggle.com/c/data-science-bowl-2018/discussion/48561#
    bs = np.where(x.T.flatten())[0]
    rle = []
    prev = -2
    for b in bs:
        if (b > prev + 1):
            rle.extend((b + 1, 0))
        rle[-1] += 1
        prev = b
    return rle

# generate submission dataframe
def create_submission(metalist, predictions):
    output = []
    for image_id, mask in zip(metalist, predictions):
        resized_mask = resize_mask(mask)
        bz_mask = binarize(resized_mask, 0.5)
        rle_encoded = ' '.join(str(rle) for rle in run_length_encoding(bz_mask))
        output.append([image_id, rle_encoded])

    submission = pd.DataFrame(output, columns=['id', 'rle_mask']).astype(str)
    return submission

In [0]:
target_size = 128
def upsample(imglist):
  outlist = []
  for img in imglist:
    res = np.stack([img for i in range(3)], axis = -1)
    res = resize(res, (target_size, target_size), mode='constant', preserve_range=True) 
    outlist.append(res)
  return outlist

def upsample_mask(imglist):
  outlist = []
  for img in imglist:
    res = resize(img, (target_size, target_size), mode='constant', preserve_range=True) 
    outlist.append(res)
  return outlist

### Helper function to print status of layers (trainable/non-trainable)

In [0]:
def print_layer_trainable(model):
    for layer in model.layers:
        print("{0}:\t{1}".format(layer.trainable, layer.name))

# Import Dataset

In [0]:
# upload kaggle.json
from google.colab import files
files.upload()

In [0]:
#ensure its there
!ls -lha kaggle.json

In [0]:
# The Kaggle API client expects this file to be in ~/.kaggle,
# so lets move it there.
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

# This permissions change avoids a warning on Kaggle tool startup.
!chmod 600 ~/.kaggle/kaggle.json

In [0]:
#lets now download our dataset
!kaggle competitions download -c tgs-salt-identification-challenge 

In [0]:
#and we'll need those training images unzipped
!ls
!unzip train.zip

In [0]:
!unzip test.zip -d test/

# Initialize

In [0]:
!ls

In [0]:
train_df = pd.read_csv("train.csv", index_col="id", usecols=[0])
depths_df = pd.read_csv("depths.csv", index_col="id")
train_df = train_df.join(depths_df)

In [0]:
train_df["images"] = [np.array(load_img("images/{}.png".format(idx), grayscale=True)) / 255 for idx in tqdm(train_df.index)]

In [0]:
train_df["masks"] = [np.array(load_img("masks/{}.png".format(idx), grayscale=True)) / 255 for idx in tqdm(train_df.index)]

# Salt coverage stratification

In [0]:
train_df["coverage"] = train_df.masks.map(np.sum) / pow(101, 2)

In [0]:
def cov_to_class(val):    
    for i in range(0, 11):
        if val * 10 <= i :
            return i
train_df["coverage_class"] = train_df.coverage.map(cov_to_class)

In [0]:
X = upsample(train_df.images.values.tolist()) 
X = np.array(X).reshape(-1, 128, 128, 3)

In [0]:
Y = upsample_mask(train_df.masks.values.tolist())
Y = np.array(Y).reshape(-1, 128, 128, 1)

In [0]:
ids_train, ids_valid, x_train, x_valid, y_train, y_valid, cov_train, cov_valid, depth_train, depth_valid = train_test_split(
    train_df.index.values,
    X,
    Y,
    train_df.coverage.values,
    train_df.z.values,
    test_size=0.10, stratify=train_df.coverage_class)

# Augmentation

In [0]:
image_datagen = image.ImageDataGenerator(horizontal_flip=True, vertical_flip=True)
mask_datagen = image.ImageDataGenerator(horizontal_flip=True, vertical_flip=True)

# Provide the same seed and keyword arguments to the fit and flow methods
seed = 1

image_datagen.fit(x_train, augment=True, seed=seed)
mask_datagen.fit(y_train, augment=True, seed=seed)

x_generator = image_datagen.flow(x_train,depth_train,batch_size=16,seed=seed)
y_generator = mask_datagen.flow(y_train,batch_size=16,seed=seed)

# combine generators into one which yields image and masks
train_generator = zip(x_generator, y_generator)

# Build ResNet50+UNet model

In [0]:
ResNet50 = ResNet50(weights='imagenet', include_top=False, input_shape=(256, 256, 3))

In [0]:
ResNet50.summary()

## Merge layers

In [0]:
transfer_layers = [ResNet50.get_layer('conv1_pad'), ResNet50.get_layer('activation_1'), ResNet50.get_layer('activation_10'), ResNet50.get_layer('activation_22'), ResNet50.get_layer('activation_40'), ResNet50.get_layer('activation_49')]

In [0]:
len(transfer_layers)

In [0]:
for l in transfer_layers:
  print(l.output.shape)

In [0]:
im_width = 128
im_height = 128

im_chan = 3 
n_features = 1 # Number of extra features, like depth

In [0]:
resnet50 = Model(inputs=ResNet50.input, outputs=[transfer_layers[i].output for i in range(6)])

In [0]:
# Initial architecture
input_img = Input((im_height, im_width, im_chan), name='img')
input_features = Input((n_features, ), name='feat')
up_0 = UpSampling2D()(input_img)
Resnet = resnet50(up_0)

## Write Unet portion

In [0]:
# Join features information in the deepest layer
f_repeat = RepeatVector(8*8)(input_features)
f_conv = Reshape((8, 8, n_features))(f_repeat)
p4_feat = concatenate([Resnet[5], f_conv], -1)

# 8x8
u6 = Conv2DTranspose(1024, (2, 2), strides=(2, 2), padding='same') (p4_feat)
b6 = BatchNormalization()(u6)
# 16x16
u6 = concatenate([u6, Resnet[4]])
c6 = Conv2D(1024, (3, 3), activation='relu', padding='same') (u6)
b6 = BatchNormalization()(c6)
c6 = Conv2D(1024, (3, 3), activation='relu', padding='same') (b6)
b6 = BatchNormalization()(c6)

u7 = Conv2DTranspose(512, (2, 2), strides=(2, 2), padding='same') (c6)
b7 = BatchNormalization()(u7)
# 32x32
u7 = concatenate([u7, Resnet[3]])
c7 = Conv2D(512, (3, 3), activation='relu', padding='same') (u7)
b7 = BatchNormalization()(c7)
c7 = Conv2D(512, (3, 3), activation='relu', padding='same') (b7)
b7 = BatchNormalization()(c7)


u8 = Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same') (c7)
b8 = BatchNormalization()(u8)
# 64x64
###### adjustment 63x63
up8 = UpSampling2D()(b8)
cr8 = Cropping2D(cropping=(1,1)) (up8)
a8 = AveragePooling2D()(cr8)
######
u8 = concatenate([a8, Resnet[2]])
###### again 64x64
up8 = UpSampling2D()(u8)
z8 = ZeroPadding2D()(up8)
a8 = AveragePooling2D()(z8)
######
c8 = Conv2D(256, (3, 3), activation='relu', padding='same') (a8)
b8 = BatchNormalization()(c8)
c8 = Conv2D(256, (3, 3), activation='relu', padding=('same')) (c8)
b8 = BatchNormalization()(c8)

u9 = Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same') (c8)
b9 = BatchNormalization()(u9)
# 128x128
u9 = concatenate([u9, Resnet[1]], axis=3)
c9 = Conv2D(128, (3, 3), activation='relu', padding='same') (b9)
b9 = BatchNormalization()(c9)
c9 = Conv2D(128, (3, 3), activation='relu', padding='same') (b9)
b9 = BatchNormalization()(c9)
'''
u10 = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same') (c9)
# 256x256
###### adjustment 262x262
z10 = ZeroPadding2D(padding=(3, 3))(u10)
######
u10 = concatenate([z10, Resnet[0]], axis=3)
c10 = Conv2D(64, (3, 3), activation='relu', padding='valid') (u10)
c10 = Conv2D(64, (3, 3), activation='relu', padding='valid') (c10)
c10 = Conv2D(64, (3, 3), activation='relu', padding='valid') (c10)
'''

outputs = Conv2D(1, (1, 1), activation='sigmoid') (b9)

model = Model(inputs=[input_img, input_features], outputs=[outputs])

In [0]:
model.summary()

In [0]:
# check the layers by name
for i,layer in enumerate(model.layers):
    print(i,layer.name)

# Transfer Learning

## Fine Tuning

In [0]:
for layer in model.layers:
  layer.trainable = True

In [0]:
print_layer_trainable(model)

Once we have changed whether the model's layers are trainable, we need to compile the model for the changes to take effect.

**Lower learning rate**

In [0]:
optimizer_fine = optimizers.Adam(lr=1e-4)
loss = 'binary_crossentropy'

In [0]:
model.compile(optimizer=optimizer_fine, loss=loss)

## Training

In [0]:
callbacks = [
    EarlyStopping(patience=5, verbose=1),
    ReduceLROnPlateau(patience=3, verbose=1),
    ModelCheckpoint('model-tgs-salt-1.h5', verbose=1, save_best_only=True, save_weights_only=True)
]

results = model.fit(generator{'img': x_train, 'feat': depth_train}, y_train, batch_size=16, epochs=25, callbacks=callbacks,
                    validation_data=({'img': x_valid, 'feat': depth_valid}, y_valid))

# Load model from saved weights (if runtime dies)

In [0]:
# model.load_weights('model-tgs-salt-1.h5')

# Preprocess testing data

In [0]:
test_ids = next(os.walk("test/images"))[2]

In [0]:
X_test = np.zeros((len(test_ids), im_height, im_width, im_chan), dtype=np.float32)
X_test_feat = np.zeros((len(test_ids), n_features), dtype=np.float32)

In [0]:
# Getting and resizing test images... 
for n, ID in tqdm(enumerate(test_ids), total=len(test_ids)):
    # depth feature
    X_test_feat[n] = depths_df.loc[ID.replace('.png', ''), 'z']
    # Load X
    img = [np.array(load_img("test/images/" + ID, grayscale=True))/255]
    x_img = upsample(img)[0]
    X_test[n] = np.array(x_img).reshape(128, 128, 3)

# Prediction

In [0]:
outputs = model.predict({'img': X_test, 'feat': X_test_feat}, verbose=1)

# Create csv and submit to kaggle

In [0]:
for i in range(len(test_ids)):
  test_ids[i] = test_ids[i][:-4]

submission = create_submission(test_ids, outputs)
submission.to_csv('submission.csv', index=False)

In [0]:
!kaggle competitions submit -c tgs-salt-identification-challenge -f submission.csv -m "Trial submission"