In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import zipfile
import sys
import cv2
from PIL import Image
import matplotlib.pyplot as plt
import pandas as pd
from skimage.transform import resize
import tensorflow as tf
import sklearn.model_selection
from keras.callbacks import ModelCheckpoint, EarlyStopping ,ReduceLROnPlateau
from keras.layers.merge import concatenate
import tensorflow_addons as tfa
#from keras.layers import Input, Conv2D, Conv2DTranspose, MaxPooling2D, concatenate, Dropout , RepeatVector , Reshape
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img 
from tensorflow.keras.preprocessing import image_dataset_from_directory
from keras import backend as K
from keras.layers.core import Lambda, RepeatVector, Reshape
import os


In [None]:
trainImagesPath = "./train/images/" 
trainMaskImagesPath = "./train/masks/"

1. # LOAD DATA

In [None]:
!unzip ../input/tgs-salt-identification-challenge/train.zip -d train

In [None]:
!unzip ../input/tgs-salt-identification-challenge/test.zip -d test

In [None]:
depthDF = pd.read_csv('../input/tgs-salt-identification-challenge/depths.csv', index_col='id')
depthDF.head()

In [None]:
def load_images_from_folder(path,imagesIds):
    images = []
    for filename in imagesIds:
        img = img = cv2.imread(os.path.join(path,filename)) 
        img = tf.image.resize(img, (128,128) )
        img = tf.image.rgb_to_grayscale(img) / 255
        #img = resize(img, (128, 128), mode='constant', preserve_range=True)
        #img = cv2.imread(os.path.join(path,filename),cv2.IMREAD_GRAYSCALE)
        #img = cv2.resize(img,[128,128])
        if img is not None:
            images.append(img)
    return np.array(images)

In [None]:
def load_masks_from_folder(path,imagesIds):
    images = []
    for filename in imagesIds:
        #img = resize(img, (128, 128), mode='constant', preserve_range=True)
        #img = Image.open(os.path.join(path,filename))
        #img = img.convert('L')
        #img =  np.asarray(img.resize((128,128)))
        img = cv2.imread(os.path.join(path,filename)) 
        
        img = tf.image.resize(img, (128,128), method='nearest')
        img = tf.image.rgb_to_grayscale(img) / 255
        #img = cv2.resize(np.array(img),[128,128])
        
        #img = np.dot(img[...,:3], [0.2989, 0.5870, 0.1140]) / 254.97449999999998
        #img = cv2.resize(img,[64,64])
        if img is not None:
            images.append(img)
    return np.array(images)

In [None]:
#LOAD IMAGES
trainImagesIds = next(os.walk(trainImagesPath))[2]
trainMaskImagesIds = next(os.walk(trainMaskImagesPath))[2]

trainImages = load_images_from_folder(trainImagesPath,trainImagesIds)
trainMaskImages = load_masks_from_folder(trainMaskImagesPath,trainMaskImagesIds)

plt.imshow(trainMaskImages[0]) 

depth = np.zeros((len(trainImagesIds), 1))
# FILL DEPTH
for count, imageId in enumerate(trainImagesIds):
    depth[count][0] = depthDF.loc[imageId.replace('.png', ''), 'z']

> # Data Augmentation

In [None]:
trainImages = np.append(trainImages, [np.fliplr(x) for x in trainImages], axis=0)
trainMaskImages = np.append(trainMaskImages, [np.fliplr(x) for x in trainMaskImages], axis=0)
trainImages = np.append(trainImages, [np.rot90(x) for x in trainImages], axis=0)
trainMaskImages = np.append(trainMaskImages, [np.rot90(x) for x in trainMaskImages], axis=0)    
plt.imshow(trainImages[0]) 

In [None]:
plt.imshow(trainImages[4000]) 
print(trainImages.shape)

In [None]:
print(trainMaskImages[122])
plt.imshow(trainMaskImages[122]) 

> # Normalize data

In [None]:
np.set_printoptions(threshold=sys.maxsize)
print(np.amax(trainMaskImages[122]))
print(trainMaskImages[122].shape)
print(trainMaskImages[122])

In [None]:
depth = depth/ depth.max(axis=0)

In [None]:
depth = np.concatenate((depth,depth,depth,depth), axis=None)

> # Split Data

In [None]:
X_trainImages, X_validImages, X_trainDepth, X_validDepth, Y_train, Y_valid = sklearn.model_selection.train_test_split(trainImages, depth, trainMaskImages, 
                                                                                              test_size=0.1 , random_state=42)

> # Tarin Model

In [None]:
callbacks = [
    EarlyStopping(patience=6, verbose=1),
    ReduceLROnPlateau(factor=0.8, patience=2, min_lr=0.00001, verbose=1),
    ModelCheckpoint('tgs-salt-model-v1.h5', verbose=1, save_best_only=True)
]

In [None]:
# Build U-Net model
inputImg = tf.keras.layers.Input((128, 128,1) , name='image')
inputDepth = tf.keras.layers.Input((1,), name='depth')

c1 = tf.keras.layers.Conv2D(8, (3, 3), activation='relu', padding='same') (inputImg)
c1 = tf.keras.layers.Conv2D(8, (3, 3), activation='relu', padding='same') (c1)
p1 = tf.keras.layers.MaxPooling2D((2, 2)) (c1)

c2 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', padding='same') (p1)
c2 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', padding='same') (c2)
p2 = tf.keras.layers.MaxPooling2D((2, 2)) (c2)

c3 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same') (p2)
c3 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same') (c3)
p3 = tf.keras.layers.MaxPooling2D((2, 2)) (c3)

c4 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same') (p3)
c4 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same') (c4)
p4 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2)) (c4)
f1 = tf.keras.layers.Flatten()(p4)


# Join features information in the depthest layer
dense1 = tf.keras.layers.Dense(16, activation="relu")(inputDepth)
dense2 = tf.keras.layers.Dense(8, activation="relu")(dense1)
combinedBranches = concatenate([f1, dense2])
denseDec = tf.keras.layers.Dense(64, activation="relu")(combinedBranches)
conc = tf.reshape(denseDec, [-1, 8, 8, 1])


c5 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same') (conc)
c5 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same') (c5)

u6 = tf.keras.layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same') (c5)
u6 = concatenate([u6, c4])
c6 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same') (u6)
c6 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same') (c6)

u7 = tf.keras.layers.Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same') (c6)
u7 = concatenate([u7, c3])
c7 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same') (u7)
c7 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same') (c7)

u8 = tf.keras.layers.Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same') (c7)
u8 = concatenate([u8, c2])
c8 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', padding='same') (u8)
c8 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', padding='same') (c8)

u9 = tf.keras.layers.Conv2DTranspose(8, (2, 2), strides=(2, 2), padding='same') (c8)
u9 = concatenate([u9, c1], axis=3)
c9 = tf.keras.layers.Conv2D(8, (3, 3), activation='relu', padding='same') (u9)
c9 = tf.keras.layers.Conv2D(8, (3, 3), activation='relu', padding='same') (c9)

finalOutput = tf.keras.layers.Conv2D(1, (1, 1), activation='sigmoid') (c9)

model = tf.keras.Model(inputs=[inputImg, inputDepth], outputs=[finalOutput])
model.compile(optimizer=tf.keras.optimizers.RMSprop(
    learning_rate=0.001, rho=0.9, momentum=0.0, epsilon=1e-07, centered=False,
    name='RMSprop'),
    loss='binary_crossentropy',
    metrics=['accuracy'  ]) 
model.summary()

In [None]:
print(np.array(X_trainImages).shape)
print(np.array(X_trainDepth).shape)
print(np.array(Y_train).shape)
print(np.array(X_validImages).shape)
print(np.array(X_validDepth).shape)
print(np.array(Y_valid).shape)

#X_trainImages = np.array(X_trainImages)
#X_trainDepth = np.array(X_trainDepth)
#Y_trainImages = np.array(Y_train)
#X_validImages = np.array(X_validImages)
#X_validDepth = np.array(X_validDepth)
#Y_validDepth = np.array(Y_validDepth)

In [None]:
results = model.fit({'image': X_trainImages, 'depth' : X_trainDepth}, Y_train, batch_size=32, epochs=50, callbacks=callbacks,
                    validation_data=({ 'image' : X_validImages, 'depth': X_validDepth}, Y_valid))

> # Visualize Results

In [None]:
accuracy = results.history['accuracy']
val_accuracy = results.history['val_accuracy']
loss = results.history['loss']
val_loss = results.history['val_loss']
epochs = range(len(accuracy))
plt.plot(epochs, accuracy, 'bo', label='Training accuracy')
plt.plot(epochs, val_accuracy, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

 > # Visualize output

In [None]:
#trainPred = model.predict({'image': X_trainImages, 'depth' : X_trainDepth}, verbose=1)
valPred = model.predict({'image' : X_validImages, 'depth': X_validDepth}, verbose=1)
#threshPredTrain = (trainPred > 0.4).astype(np.uint8)
threshValTrain = (valPred > 0.63).astype(np.uint8)

In [None]:
plt.figure()
plt.imshow(Y_valid[3])
plt.figure()
plt.imshow(threshValTrain[3]) 

> # Read Test images

In [None]:
#LOAD IMAGES
testImagesPath = "./test/images/" 
testImagesIds = next(os.walk(testImagesPath))[2]
testImages = load_images_from_folder(testImagesPath,testImagesIds)

testDepth = np.zeros((len(testImagesIds), 1))
for count, imageId in enumerate(testImagesIds):
    testDepth[count][0] = depthDF.loc[imageId.replace('.png', ''), 'z']
testDepth = testDepth/ testDepth.max(axis=0)


> # Submission

In [None]:
testPred = model.predict({'image' : testImages, 'depth': testDepth}, verbose=1)
threshTestTrain = (testPred > 0.63).astype(np.uint8)

In [None]:
plt.figure()
plt.imshow(testPred[3])
plt.figure()
plt.imshow(testImages[3])

In [None]:
print(testPred[0].shape)
preds_test_upsampled = []
for i in range(len(threshTestTrain)):
     preds_test_upsampled.append(resize(np.squeeze(threshTestTrain[i]), 
                                       (101, 101), 
                                       mode='constant', preserve_range=True))

In [None]:
print(preds_test_upsampled[0].shape)

In [None]:
from tqdm import tqdm_notebook, tnrange

In [None]:
def RLenc(img, order='F', format=True):
    """
    img is binary mask image, shape (r,c)
    order is down-then-right, i.e. Fortran
    format determines if the order needs to be preformatted (according to submission rules) or not

    returns run length as an array or string (if format is True)
    """
    bytes = img.reshape(img.shape[0] * img.shape[1], order=order)
    runs = []  ## list of run lengths
    r = 0  ## the current run length
    pos = 1  ## count starts from 1 per WK
    for c in bytes:
        if (c == 0):
            if r != 0:
                runs.append((pos, r))
                pos += r
                r = 0
            pos += 1
        else:
            r += 1

    # if last run is unsaved (i.e. data ends with 1)
    if r != 0:
        runs.append((pos, r))
        pos += r
        r = 0

    if format:
        z = ''

        for rr in runs:
            z += '{} {} '.format(rr[0], rr[1])
        return z[:-1]
    else:
        return runs
    
pred_dict = {id_[:-4]:RLenc(np.round(preds_test_upsampled[i]) ) for i,id_ in tqdm_notebook(enumerate(testImagesIds))}

In [None]:
sub = pd.DataFrame.from_dict(pred_dict,orient='index')
sub.index.names = ['id']
sub.columns = ['rle_mask']
sub.to_csv('submission.csv')