In [19]:
# Data download and unzipping
#!wget https://raw.githubusercontent.com/AakashSudhakar/2018-data-science-bowl/master/compressed_files/stage1_test.zip -c
#!wget https://raw.githubusercontent.com/AakashSudhakar/2018-data-science-bowl/master/compressed_files/stage1_train.zip -c
#!wget https://raw.githubusercontent.com/AakashSudhakar/2018-data-science-bowl/master/compressed_files/stage2_test_final.zip -c

#!mkdir stage1_train stage1_test stage2_test

#!unzip stage1_train.zip -d stage1_train/
#!unzip stage1_test.zip -d stage1_test/
#!unzip stage2_test_final.zip -d stage2_test/

In [20]:
# Downloading and intstalling keras
#!pip install keras

In [21]:
import os
import random
import sys
import warnings
import numpy as np
import pandas as pd
from itertools import chain
from skimage.io import imread, imshow, imread_collection, concatenate_images
from skimage.transform import resize
from skimage.morphology import label
from keras.utils import Progbar

from keras.models import Model, load_model
from keras.layers import Input
from keras.layers import Dropout, Lambda
from keras.layers import Conv2D, Conv2DTranspose,Convolution2D
from keras.layers import MaxPooling2D
from keras.layers import concatenate
from keras import backend as K

warnings.filterwarnings('ignore', category=UserWarning, module='skimage')

# Setting seed for reproducability
seed = 42
random.seed = seed
np.random.seed = seed
smooth = 1.
epochs = 50

In [22]:
# Data Path
from pathlib import Path

TRAIN_PATH = Path('stage1_train/')
TEST_PATH_STAGE1 = Path('stage1_test/')
TEST_PATH_STAGE2 = Path('stage2_test_final/')

In [46]:
# Get train and test IDs
from pathlib import Path
train_ids = next(os.walk(TRAIN_PATH))[1]
test_paths_stage1 = list(TEST_PATH_STAGE1.rglob('*.png'))
test_paths_stage2 = list(TEST_PATH_STAGE2.rglob('*.png'))

In [24]:
# Function read train images and mask return as nump array
def read_train_data(IMG_WIDTH=256,IMG_HEIGHT=256,IMG_CHANNELS=3):
    X_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
    Y_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool_)
    print('Getting and resizing train images and masks ... ')
    sys.stdout.flush()
    if os.path.isfile("train_img.npy") and os.path.isfile("train_mask.npy"):
        print("Train file loaded from memory")
        X_train = np.load("train_img.npy")
        Y_train = np.load("train_mask.npy")
        return X_train,Y_train
    a = Progbar(len(train_ids))
    for path in enumerate(train_ids):
        path = TRAIN_PATH + id_
        img = imread(path + '/images/' + id_ + '.png')[:,:,:IMG_CHANNELS]
        img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
        X_train[n] = img
        mask = np.zeros((IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool_)
        for mask_file in next(os.walk(path + '/masks/'))[2]:
            mask_ = imread(path + '/masks/' + mask_file)
            mask_ = np.expand_dims(resize(mask_, (IMG_HEIGHT, IMG_WIDTH), mode='constant',
                                        preserve_range=True), axis=-1)
            mask = np.maximum(mask, mask_)
        Y_train[n] = mask
        a.update(n)
    np.save("train_img",X_train)
    np.save("train_mask",Y_train)
    return X_train,Y_train

In [56]:
# Function to read test images and return as numpy array
def read_test_data(paths_list, IMG_WIDTH=256,IMG_HEIGHT=256,IMG_CHANNELS=3, stage=1):
    stage_str = str(stage)

    X_test = np.zeros((len(paths_list), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
    sizes_test = []
    print('\nGetting and resizing test images ... ')
    sys.stdout.flush()
    if os.path.isfile(f"test_img_stage{stage_str}.npy") and os.path.isfile(f"test_size_stage{stage_str}.npy"):
        print("Test file loaded from memory")
        X_test = np.load(f"test_img_stage{stage_str}.npy")
        sizes_test = np.load(f"test_size_stage{stage_str}.npy")
        return X_test,sizes_test
    b = Progbar(len(paths_list))
    for n, path in enumerate(paths_list):
        #print(path.name)
        try:
            img_read = imread(path)
            img = img_read[:,:,:IMG_CHANNELS]
        except:
            #create black image
            print(f"couldn't read image {path.name}")
            img = np.zeros((IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
        sizes_test.append([img.shape[0], img.shape[1]])
        img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
        X_test[n] = img
        b.update(n)
    np.save(f"test_img_stage{stage_str}",X_test)
    np.save(f"test_size_stage{stage_str}",sizes_test)
    return X_test,sizes_test

In [26]:
# Run-length encoding stolen from https://www.kaggle.com/rakhlin/fast-run-length-encoding-python
def rle_encoding(x):
    dots = np.where(x.T.flatten() == 1)[0]
    run_lengths = []
    prev = -2
    for b in dots:
        if (b>prev+1): run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths

def prob_to_rles(x, cutoff=0.5):
    lab_img = label(x > cutoff)
    for i in range(1, lab_img.max() + 1):
        yield rle_encoding(lab_img == i)

In [27]:
# Iterate over the test IDs and generate run-length encodings for each seperate mask identified by skimage
def mask_to_rle(preds_test_upsampled, test_ids):
    print(len(test_ids))
    print(len(preds_test_upsampled))
    new_test_ids = []
    rles = []
    for n, id_ in enumerate(test_ids):
        rle = list(prob_to_rles(preds_test_upsampled[n]))
        rles.extend(rle)
        new_test_ids.extend([id_] * len(rle))
    return new_test_ids,rles

In [28]:
# Metric function
def dice_coef(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

# Loss funtion
def dice_coef_loss(y_true, y_pred):
    return -dice_coef(y_true, y_pred)

In [29]:
def get_unet(IMG_WIDTH=256,IMG_HEIGHT=256,IMG_CHANNELS=3):
    inputs = Input((IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
    s = Lambda(lambda x: x / 255) (inputs)
    c1 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (s)
    c1 = Dropout(0.1) (c1)
    c1 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c1)
    p1 = MaxPooling2D((2, 2)) (c1)
    c2 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (p1)
    c2 = Dropout(0.1) (c2)
    c2 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c2)
    p2 = MaxPooling2D((2, 2)) (c2)

    c3 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (p2)
    c3 = Dropout(0.2) (c3)
    c3 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c3)
    p3 = MaxPooling2D((2, 2)) (c3)

    c4 = Conv2D(128, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (p3)
    c4 = Dropout(0.2) (c4)
    c4 = Conv2D(128, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c4)
    p4 = MaxPooling2D(pool_size=(2, 2)) (c4)

    c5 = Conv2D(256, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (p4)
    c5 = Dropout(0.3) (c5)
    c5 = Conv2D(256, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c5)

    u6 = Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same') (c5)
    u6 = concatenate([u6, c4])
    c6 = Conv2D(128, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (u6)
    c6 = Dropout(0.2) (c6)
    c6 = Conv2D(128, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c6)

    u7 = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same') (c6)
    u7 = concatenate([u7, c3])
    c7 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (u7)
    c7 = Dropout(0.2) (c7)
    c7 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c7)

    u8 = Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same') (c7)
    u8 = concatenate([u8, c2])
    c8 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (u8)
    c8 = Dropout(0.1) (c8)
    c8 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c8)

    u9 = Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same') (c8)
    u9 = concatenate([u9, c1], axis=3)
    c9 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (u9)
    c9 = Dropout(0.1) (c9)
    c9 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c9)

    outputs = Conv2D(1, (1, 1), activation='sigmoid') (c9)

    model = Model(inputs=[inputs], outputs=[outputs])
    model.compile(optimizer='adam',loss='binary_crossentropy', metrics=[dice_coef])
    return model

In [33]:
# get train_data
train_img,train_mask = read_train_data()

# get test_data for stage1
test_img_stage1, test_img_sizes_stage1 = read_test_data(test_paths_stage1, stage=1)

# get test_data for stage2
test_img_stage2,test_img_sizes_stage2 = read_test_data(test_paths_stage2, stage=2)

Getting and resizing train images and masks ... 
Train file loaded from memory

Getting and resizing test images ... 
Test file loaded from memory

Getting and resizing test images ... 


In [34]:
# get u_net model
u_net = get_unet()

In [37]:
from keras.callbacks import ModelCheckpoint
import os

# Define the model checkpoint callback
model_save_path = "saved_model.h5"
model_checkpoint = ModelCheckpoint(model_save_path, monitor='dice_coef_loss', save_best_only=True)

# Check if the model file exists
if os.path.exists(model_save_path):
    print("Loading saved model...")
    u_net.load_weights(model_save_path)
else:
    # fit model on train_data
    print("\nTraining...")
    history = u_net.fit(train_img, train_mask, batch_size=16, epochs=epochs, callbacks=[model_checkpoint])



Training...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [51]:
print("Predicting Stage1")
# Predict on test data
test_mask_stage1 = u_net.predict(test_img_stage1,verbose=1)

Predicting Stage1


In [57]:
print("Predicting Stage2")
# Predict on test data
test_mask_stage2 = u_net.predict(test_img_stage2,verbose=1)

Predicting Stage2

Getting and resizing test images ... 
4edad1bb8932fc0d1b1d7af10d31918c075e26af50c2c6d7084d8ae032502e5c.png
   0/3019 [..............................] - ETA: 0sddbe5e29f520ce459afc9f49da87d1d07a2d558e618e7645efb426eda9c2f162.png
fa46454ed30dd1e1681d7e4926baee039c3cd15f9c27135717f968483e68e139.png
   2/3019 [..............................] - ETA: 2:4322cbcbc0560173500c108580e0caab8c899b4dbe439809d29a32a50e3d9e18c4.png
eb7e8d208dfa73bfe5d91ee51486bbb558c91cd3be753bd65f1054c51ea19b82.png
   4/3019 [..............................] - ETA: 2:1403975f1fb6d6780c2d90f69aa94d754e75232c6feeb20f32ad3d438b72f1be2e.png
827fcdb675b8ab08c5dfc8dd90115a23ae96224738bbb25ab81bfc8424163326.png
   6/3019 [..............................] - ETA: 2:0111cb351f5a05f071ae3376532913de1d418cf7fbed9db4684f9350b356dce2e5.png
a583b792fe9a97c9daef508250945d49375cdcfeea8d2df39db0185ad896a391.png
   8/3019 [..............................] - ETA: 2:0334a0aebe4ea2d48810fcab88475ed0a2af3446960e7ad17945eb8b

In [65]:
# Create list of upsampled test masks
def upsample(test_mask, test_img_sizes):
    test_mask_upsampled = []
    for i in range(len(test_mask)):
      test_mask_upsampled.append(resize(np.squeeze(test_mask[i]),
                                        (test_img_sizes[i][0],test_img_sizes[i][1]),
                                        mode='constant', preserve_range=True))
    return test_mask_upsampled

test_mask_stage1_upsampled = upsample(test_mask_stage1, test_img_sizes_stage1)
test_mask_stage2_upsampled = upsample(test_mask_stage2, test_img_sizes_stage2)


In [66]:
test_ids_stage1,rles_stage1 = mask_to_rle(test_mask_stage1_upsampled, test_ids_stage1)
test_ids_stage2,rles_stage2 = mask_to_rle(test_mask_stage2_upsampled, test_ids_stage2)

3019
3019


In [67]:
# Create submission DataFrame
sub = pd.DataFrame()
sub['ImageId'] = test_ids_stage1
sub['EncodedPixels'] = pd.Series(rles_stage1).apply(lambda x: ' '.join(str(y) for y in x))

sub.to_csv('kaggle_stage1_answers.csv', index=False)

# Create submission DataFrame
sub = pd.DataFrame()
sub['ImageId'] = test_ids_stage2
sub['EncodedPixels'] = pd.Series(rles_stage2).apply(lambda x: ' '.join(str(y) for y in x))

sub.to_csv('kaggle_stage2_answers.csv', index=False)