In [None]:
import cv2
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import glob
import gc
gc.enable() 

import matplotlib.pyplot as plt 
import os 
import tensorflow as tf 

# This stops pandas from spitting 
# out warnings at us. 
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split

from scipy.ndimage.filters import gaussian_filter
from scipy.ndimage.interpolation import map_coordinates
from skimage.io import imread
from skimage.transform import resize

from tqdm import tqdm_notebook

from keras.preprocessing.image import load_img, ImageDataGenerator
from keras import Model
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.models import load_model
from keras.optimizers import Adam, SGD
from keras import optimizers
from keras.layers import Activation, Input, Conv2D, Conv2DTranspose, MaxPooling2D, SpatialDropout2D 
from keras.layers import UpSampling2D, Concatenate, Dropout, Lambda, BatchNormalization, Add, ZeroPadding2D
from keras.layers import concatenate
from keras.losses import binary_crossentropy
from keras import backend as K 

%matplotlib inline

In [None]:
RANDOM_SEED = 555
np.random.seed(RANDOM_SEED)

BASE_DIRECTORY = '../input/'

# Image sizes for loading. 
IMAGE_WIDTH, IMAGE_HEIGHT = 101, 101
IMAGE_CHANNELS = 1

# Netowrk parameters 
MAX_EPOCHS        = 100
EPOCHS_1          = 75
BATCH_SIZE_1      = 128
BATCH_SIZE_2      = 64
BATCHES_PER_EPOCH = 200

In [None]:
bad_ids = ['00950d1627', '0280deb8ae', '09152018c4', 
           '09b9330300', '130229ec15', '15d76f1672', 
           '1eaf42beee', '23afbccfb5', '24522ec665', 
           '285f4b2e82', '2bc179b78c', '2f746f8726', 
           '33887a0ae7', '33dfce3a76', '36ad52a2e8', 
           '3975043a11', '39cd06da7d', '3cb59a4fdc', 
           '403cb8f4b3', '483b35d589', '4f5df40ab2', 
           '4fbda008c7', '4fdc882e4b', '50d3073821', 
           '52667992f8', '52ac7bb4c1', '5b217529e7', 
           '608567ed23', '60dccbc52f', '62aad7556c', 
           '62d30854d7', '640ceb328a', '7f0825a2f0', 
           '80a458a2b6', '81fa3d59b8', '834861f1b6', 
           '849881c690', '876e6423e6', '88a5c49514', 
           '89dfb7ba1d', '9067effd34', '916aff36ae', 
           '919bc0e2ba', '937ea43a65', '93a1541218', 
           '95f6e2b2d1', '96216dae3b', '96523f824a', 
           '99ee31b5bc', '9a4b15919d', '9eb4a10b98', 
           'a266a2a9df', 'a6625b8937', 'a9ee40cf0d', 
           'aeba5383e4', 'b1be1fa682', 'b24d3673e1', 
           'b35b1b412b', 'b525824dfc', 'b8a9602e21', 
           'ba1287cb48', 'be90ab3e56', 'c2973c16f1', 
           'c387a012fc', 'c98dfd50ba', 'caccd6708f', 
           'd0bbe4fd97', 'd4d2ed6bd2', 'd4d34af4f7', 
           'd9a52dc263', 'dd6a04d456', 'ddcb457a07', 
           'de7202d286', 'e12cd094a6', 'e73ed6e7f2', 
           'f0c401b64b', 'f19b7d20bb', 'f641699848', 
           'f6e87c1458', 'f7380099f6', 'f75842e215', 
           'fb3392fee0', 'fb47e8e74e', 'fd63516ff4', 
           'febd1d2a67']

In [None]:
def prepare_training_sample(path_to_train='../input/train', bad_ids=None, sample_size=100):
    
    # Get list of images and masks. 
    image_files = glob.glob(path_to_train + '/images/*.png')
    extract_id = lambda x: x.split('.png')[0].split('/')[-1]
    image_ids = [extract_id(file) for file in image_files]

    if bad_ids is not None:
        images_ids = [id for id in image_ids if id not in bad_ids]
    
    sample_size = (sample_size if sample_size < len(image_ids) else len(image_ids))

    x = np.zeros(shape=(sample_size, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS))
    y = np.zeros(shape=(sample_size, IMAGE_HEIGHT, IMAGE_WIDTH, 1))

    for index, id in enumerate(image_ids[:sample_size]):
        x[index, :, :, :] = np.array(load_img('{}/images/{}.png'.format(path_to_train, id), grayscale=True), dtype=np.float32).reshape(IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS) / 255
        y[index, :, :, :] = np.array(load_img('{}/masks/{}.png'.format(path_to_train, id), grayscale=True), dtype=np.float32).reshape(IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS) / 255
        
    return x, y

In [None]:
x, y = prepare_training_sample(sample_size=4001) # Load everything (there are 4000 images)
salt_fraction = np.sum(np.sum(y, axis=1), axis=1)
salt_fraction = np.digitize(salt_fraction, np.linspace(0,1,10))

In [None]:
x_train, x_valid, y_train, y_valid = train_test_split(x, y, test_size=0.2, 
                                                      stratify=salt_fraction, 
                                                      random_state=RANDOM_SEED)

In [None]:
x_train = np.concatenate((x_train, x_train[:,:,::-1,:]), axis=0)
y_train = np.concatenate((y_train, y_train[:,:,::-1,:]), axis=0)

### Data Augmentation
In this section, we're going to flip the training images left to right and append them to the existing training set.  For validation purposes, I do not to add augmented images before splitting.

In [None]:
# Ideas and codes. 
# https://github.com/asanakoy/kaggle_carvana_segmentation/blob/master/albu/src/transforms.py
def clip(img, dtype, maxval):
    return np.clip(img, 0, maxval).astype(dtype)

def randomize_brightness(image, limit=0.1):
        alpha = 1.0 + limit * np.random.uniform(-1, 1)
        # maxval = np.max(image[..., :3])
        maxval = np.max(image)
        dtype = image.dtype
        image = clip(alpha * image, dtype, maxval)
#        image[..., :3] = clip(alpha * image[...,:3], dtype, maxval)
        return image 

def deform(image, mask, num_steps=3, distort_limit=0.01):
    height, width, channel = image.shape

    x_step = width // num_steps
    xx = np.zeros(width, np.float32)
    prev = 0
    for x in range(0, width, x_step):
        start = x
        end = x + x_step
        if end > width:
            end = width
            cur = width
        else:
            cur = prev + x_step*(1+np.random.uniform(-distort_limit, distort_limit))

        xx[start:end] = np.linspace(prev, cur, end-start)
        prev = cur

    y_step = height // num_steps
    yy = np.zeros(height, np.float32)
    prev = 0
    for y in range(0, height, y_step):
        start = y
        end = y + y_step
        if end > width:
            end = height
            cur = height
        else:
            cur = prev + y_step*(1+np.random.uniform(distort_limit, distort_limit))

    yy[start:end] = np.linspace(prev, cur, end-start)
    prev = cur

    map_x, map_y = np.meshgrid(xx, yy)
    map_x = map_x.astype(np.float32)
    map_y = map_y.astype(np.float32)
    image = cv2.remap(image, map_x, map_y, interpolation=cv2.INTER_LINEAR, 
                    borderMode=cv2.BORDER_REFLECT_101).reshape(height, width, channel)
    if mask is not None:
        mask = cv2.remap(mask, map_x, map_y, interpolation=cv2.INTER_LINEAR, 
                         borderMode=cv2.BORDER_REFLECT_101).reshape(height, width, 1)

    return image, mask
    
def augment(image, mask, prob=0.5):
    if np.random.uniform() < prob:
        image = np.fliplr(image)
        mask = np.fliplr(mask)
        
    if np.random.uniform() < prob:
        image = randomize_brightness(image)

    if np.random.uniform() < prob:
        image, mask = deform(image, mask)

    return image, mask 

def generate_images(images, masks, batch_size=32):

    n_samples, height, width, channels = images.shape
    x_batch = np.zeros(shape=(batch_size, height, width, channels))
    y_batch = np.zeros(shape=(batch_size, height, width, 1)) 
    while True:
        indices = np.random.randint(0, n_samples, batch_size)
        
        for batch_index, data_index in enumerate(indices):
            x_batch[batch_index], y_batch[batch_index] = augment(images[data_index], masks[data_index])
                
        yield (x_batch, y_batch)

In [None]:
'''
generation_options = dict(
    featurewise_center=False,
    featurewise_std_normalization=False,
    rotation_range=0.0,
    width_shift_range=0.0,
    height_shift_range=0.0,
    zoom_range=0.0,
    shear_range=0.0,
    horizontal_flip=True,
    vertical_flip=False,
    zca_whitening=False
)

image_generator = ImageDataGenerator(**generation_options)
mask_generator  = ImageDataGenerator(**generation_options)
image_generator.fit(x_train, augment=True, seed=RANDOM_SEED)
mask_generator.fit(y_train, augment=True, seed=RANDOM_SEED)
'''

In [None]:
def dice_loss(y_true, y_pred):
    smooth = 1.
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = y_true_f * y_pred_f
    score = (2. * K.sum(intersection) + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
    return 1. - score

def bce_dice_loss(y_true, y_pred):
    return binary_crossentropy(y_true, y_pred) + 5.0 * dice_loss(y_true, y_pred)

iou_thresholds = np.array([0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95])

def iou(img_true, img_pred):
    i = np.sum((img_true*img_pred) > 0)
    u = np.sum((img_true + img_pred) > 0)
    if u == 0:
        return u
    return i/u

def iou_metric(imgs_true, imgs_pred):
    num_images = len(imgs_true)
    scores = np.zeros(num_images)
    
    for i in range(num_images):
        if imgs_true[i].sum() == imgs_pred[i].sum() == 0:
            scores[i] = 1
        else:
            scores[i] = (iou_thresholds <= iou(imgs_true[i], imgs_pred[i])).mean()
            
    return scores.mean()

def mean_iou(y_true, y_pred):
    prec = []
    for t in np.arange(0.5, 1.0, 0.05):
        y_pred_ = tf.to_int32(y_pred > t)
        score, up_opt = tf.metrics.mean_iou(y_true, y_pred_, 2)
        K.get_session().run(tf.local_variables_initializer())
        with tf.control_dependencies([up_opt]):
            score = tf.identity(score)
        prec.append(score)
    return K.mean(K.stack(prec), axis=0)

def get_iou_vector(A, B):
    batch_size = A.shape[0]
    metric = []
    for batch in range(batch_size):
        t, p = A[batch]>0, B[batch]>0
        
        intersection = np.logical_and(t, p)
        union = np.logical_or(t, p)
        iou = (np.sum(intersection > 0) + 1e-10 )/ (np.sum(union > 0) + 1e-10)
        thresholds = np.arange(0.5, 1, 0.05)
        s = []
        for thresh in thresholds:
            s.append(iou > thresh)
        metric.append(np.mean(s))

    return np.mean(metric)

def my_iou_metric(label, pred):
    return tf.py_func(get_iou_vector, [label, pred > 0.5], tf.float64)

def my_iou_metric_2(label, pred):
    return tf.py_func(get_iou_vector, [label, pred > 0], tf.float64)

In [None]:
def BatchActivate(x):
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    return x

def convolution_block(x, filters, size, strides=(1,1), padding='same', activation=True):
    x = Conv2D(filters, size, strides=strides, padding=padding)(x)
    if activation == True:
        x = BatchActivate(x)
    return x

def residual_block(blockInput, num_filters=16, batch_activate = False):
    x = BatchActivate(blockInput)
    x = convolution_block(x, num_filters, (3,3) )
    x = convolution_block(x, num_filters, (3,3), activation=False)
    x = Add()([x, blockInput])
    if batch_activate:
        x = BatchActivate(x)
    return x

In [None]:
def build_model(input_layer, start_neurons, DropoutRatio = 0.5):
    # 101 -> 50
    conv1 = Conv2D(start_neurons * 1, (3, 3), activation=None, padding="same")(input_layer)
    conv1 = residual_block(conv1,start_neurons * 1)
    conv1 = residual_block(conv1,start_neurons * 1, True)
    pool1 = MaxPooling2D((2, 2))(conv1)
    pool1 = Dropout(DropoutRatio/2)(pool1)

    # 50 -> 25
    conv2 = Conv2D(start_neurons * 2, (3, 3), activation=None, padding="same")(pool1)
    conv2 = residual_block(conv2,start_neurons * 2)
    conv2 = residual_block(conv2,start_neurons * 2, True)
    pool2 = MaxPooling2D((2, 2))(conv2)
    pool2 = Dropout(DropoutRatio)(pool2)

    # 25 -> 12
    conv3 = Conv2D(start_neurons * 4, (3, 3), activation=None, padding="same")(pool2)
    conv3 = residual_block(conv3,start_neurons * 4)
    conv3 = residual_block(conv3,start_neurons * 4, True)
    pool3 = MaxPooling2D((2, 2))(conv3)
    pool3 = Dropout(DropoutRatio)(pool3)

    # 12 -> 6
    conv4 = Conv2D(start_neurons * 8, (3, 3), activation=None, padding="same")(pool3)
    conv4 = residual_block(conv4,start_neurons * 8)
    conv4 = residual_block(conv4,start_neurons * 8, True)
    pool4 = MaxPooling2D((2, 2))(conv4)
    pool4 = Dropout(DropoutRatio)(pool4)

    # Middle
    convm = Conv2D(start_neurons * 16, (3, 3), activation=None, padding="same")(pool4)
    convm = residual_block(convm,start_neurons * 16)
    convm = residual_block(convm,start_neurons * 16, True)
    
    # 6 -> 12
    deconv4 = Conv2DTranspose(start_neurons * 8, (3, 3), strides=(2, 2), padding="same")(convm)
    uconv4 = concatenate([deconv4, conv4])
    uconv4 = Dropout(DropoutRatio)(uconv4)
    
    uconv4 = Conv2D(start_neurons * 8, (3, 3), activation=None, padding="same")(uconv4)
    uconv4 = residual_block(uconv4,start_neurons * 8)
    uconv4 = residual_block(uconv4,start_neurons * 8, True)
    
    # 12 -> 25
    #deconv3 = Conv2DTranspose(start_neurons * 4, (3, 3), strides=(2, 2), padding="same")(uconv4)
    deconv3 = Conv2DTranspose(start_neurons * 4, (3, 3), strides=(2, 2), padding="valid")(uconv4)
    uconv3 = concatenate([deconv3, conv3])    
    uconv3 = Dropout(DropoutRatio)(uconv3)
    
    uconv3 = Conv2D(start_neurons * 4, (3, 3), activation=None, padding="same")(uconv3)
    uconv3 = residual_block(uconv3,start_neurons * 4)
    uconv3 = residual_block(uconv3,start_neurons * 4, True)

    # 25 -> 50
    deconv2 = Conv2DTranspose(start_neurons * 2, (3, 3), strides=(2, 2), padding="same")(uconv3)
    uconv2 = concatenate([deconv2, conv2])
        
    uconv2 = Dropout(DropoutRatio)(uconv2)
    uconv2 = Conv2D(start_neurons * 2, (3, 3), activation=None, padding="same")(uconv2)
    uconv2 = residual_block(uconv2,start_neurons * 2)
    uconv2 = residual_block(uconv2,start_neurons * 2, True)
    
    # 50 -> 101
    #deconv1 = Conv2DTranspose(start_neurons * 1, (3, 3), strides=(2, 2), padding="same")(uconv2)
    deconv1 = Conv2DTranspose(start_neurons * 1, (3, 3), strides=(2, 2), padding="valid")(uconv2)
    uconv1 = concatenate([deconv1, conv1])
    
    uconv1 = Dropout(DropoutRatio)(uconv1)
    uconv1 = Conv2D(start_neurons * 1, (3, 3), activation=None, padding="same")(uconv1)
    uconv1 = residual_block(uconv1,start_neurons * 1)
    uconv1 = residual_block(uconv1,start_neurons * 1, True)
    
    #uconv1 = Dropout(DropoutRatio/2)(uconv1)
    #output_layer = Conv2D(1, (1,1), padding="same", activation="sigmoid")(uconv1)
    output_layer_noActi = Conv2D(1, (1,1), padding="same", activation=None)(uconv1)
    output_layer =  Activation('sigmoid')(output_layer_noActi)
    
    return output_layer

In [None]:
# code download from: https://github.com/bermanmaxim/LovaszSoftmax
def lovasz_grad(gt_sorted):
    """
    Computes gradient of the Lovasz extension w.r.t sorted errors
    See Alg. 1 in paper
    """
    gts = tf.reduce_sum(gt_sorted)
    intersection = gts - tf.cumsum(gt_sorted)
    union = gts + tf.cumsum(1. - gt_sorted)
    jaccard = 1. - intersection / union
    jaccard = tf.concat((jaccard[0:1], jaccard[1:] - jaccard[:-1]), 0)
    return jaccard


# --------------------------- BINARY LOSSES ---------------------------

def lovasz_hinge(logits, labels, per_image=True, ignore=None):
    """
    Binary Lovasz hinge loss
      logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
      labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
      per_image: compute the loss per image instead of per batch
      ignore: void class id
    """
    if per_image:
        def treat_image(log_lab):
            log, lab = log_lab
            log, lab = tf.expand_dims(log, 0), tf.expand_dims(lab, 0)
            log, lab = flatten_binary_scores(log, lab, ignore)
            return lovasz_hinge_flat(log, lab)
        losses = tf.map_fn(treat_image, (logits, labels), dtype=tf.float32)
        loss = tf.reduce_mean(losses)
    else:
        loss = lovasz_hinge_flat(*flatten_binary_scores(logits, labels, ignore))
    return loss


def lovasz_hinge_flat(logits, labels):
    """
    Binary Lovasz hinge loss
      logits: [P] Variable, logits at each prediction (between -\infty and +\infty)
      labels: [P] Tensor, binary ground truth labels (0 or 1)
      ignore: label to ignore
    """

    def compute_loss():
        labelsf = tf.cast(labels, logits.dtype)
        signs = 2. * labelsf - 1.
        errors = 1. - logits * tf.stop_gradient(signs)
        errors_sorted, perm = tf.nn.top_k(errors, k=tf.shape(errors)[0], name="descending_sort")
        gt_sorted = tf.gather(labelsf, perm)
        grad = lovasz_grad(gt_sorted)
        loss = tf.tensordot(tf.nn.relu(errors_sorted), tf.stop_gradient(grad), 1, name="loss_non_void")
        return loss

    # deal with the void prediction case (only void pixels)
    loss = tf.cond(tf.equal(tf.shape(logits)[0], 0),
                   lambda: tf.reduce_sum(logits) * 0.,
                   compute_loss,
                   strict=True,
                   name="loss"
                   )
    return loss


def flatten_binary_scores(scores, labels, ignore=None):
    """
    Flattens predictions in the batch (binary case)
    Remove labels equal to 'ignore'
    """
    scores = tf.reshape(scores, (-1,))
    labels = tf.reshape(labels, (-1,))
    if ignore is None:
        return scores, labels
    valid = tf.not_equal(labels, ignore)
    vscores = tf.boolean_mask(scores, valid, name='valid_scores')
    vlabels = tf.boolean_mask(labels, valid, name='valid_labels')
    return vscores, vlabels

def lovasz_loss(y_true, y_pred):
    y_true, y_pred = K.cast(K.squeeze(y_true, -1), 'int32'), K.cast(K.squeeze(y_pred, -1), 'float32')
    #logits = K.log(y_pred / (1. - y_pred))
    logits = y_pred #Jiaxin
    loss = lovasz_hinge(logits, y_true, per_image = True, ignore = None)
    return loss

In [None]:
model_checkpoint = ModelCheckpoint("keras.model", save_best_only=True, verbose=1)
reduce_lr = ReduceLROnPlateau(factor=5e-1, patience=5, min_lr=1e-6, verbose=1, monitor='val_my_iou_metric')
#early_stopping = EarlyStopping(patience=15, verbose=1, monitor='val_my_iou_metric')

#x_generator = image_generator.flow(x_train, batch_size=BATCH_SIZE, seed=RANDOM_SEED)
#y_generator = mask_generator.flow(y_train, batch_size=BATCH_SIZE, seed=RANDOM_SEED)
#train_generator = zip(x_generator, y_generator)

#train_generator = generate_images(images=x_train, masks=y_train, batch_size=BATCH_SIZE)

# model
input_layer = Input((IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS))
output_layer = build_model(input_layer, 16, 0.5)
model = Model(input_layer, output_layer)

optimizer = optimizers.adam(lr = 0.005)
model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=[my_iou_metric])

#history = model.fit_generator(
#    train_generator, 
#    validation_data=[x_valid, y_valid], 
#    epochs=MAX_EPOCHS,
#    steps_per_epoch=BATCHES_PER_EPOCH,
#    callbacks=[model_checkpoint, reduce_lr], 
#    shuffle=True
#)

history = model.fit(
    x_train, y_train,
    validation_data=[x_valid, y_valid], 
    epochs=EPOCHS_1,
    batch_size=BATCH_SIZE_1,
    callbacks=[model_checkpoint, reduce_lr], 
    verbose=2)

In [None]:
model1 = load_model('keras.model', custom_objects={'my_iou_metric':my_iou_metric})

input_x = model1.layers[0].input
output_layer = model1.layers[-1].input
model = Model(input_x, output_layer)
c = optimizers.adam(lr=0.01)

early_stopping = EarlyStopping(monitor='val_my_iou_metric_2', mode = 'max',patience=30, verbose=1)
model_checkpoint = ModelCheckpoint('keras.model', monitor='val_my_iou_metric_2', 
                                   mode = 'max', save_best_only=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_my_iou_metric_2', mode = 'max',factor=0.5, patience=5, 
                              min_lr=0.00005, verbose=1)
model.compile(loss=lovasz_loss, optimizer=c, metrics=[my_iou_metric_2])
history = model.fit(x_train, y_train,
                    validation_data=[x_valid, y_valid], 
                    epochs=MAX_EPOCHS,
                    batch_size=BATCH_SIZE_2,
                    callbacks=[model_checkpoint, reduce_lr, early_stopping], 
                    verbose=2)

In [None]:
model = load_model('keras.model', custom_objects={
    'my_iou_metric_2':my_iou_metric_2,
    'lovasz_loss':lovasz_loss
})

In [None]:
y_valid_pred = 0.5 * ( model.predict(x_valid) + model.predict(x_valid[:,:,::-1,:])[:,:,::-1,:])

In [None]:
def filter_image(img):
    if img.sum() < 100:
        return np.zeros(img.shape)
    else:
        return img

In [None]:
thresholds = np.linspace(0.35, 0.65, 31)
thresholds = np.log(thresholds / (1-thresholds))
ious = np.array([iou_metric(y_valid, [filter_image(img) for img in y_valid_pred > threshold]) for threshold in tqdm_notebook(thresholds)])

In [None]:
threshold_best_index = np.argmax(ious) 
iou_best = ious[threshold_best_index]
threshold_best = thresholds[threshold_best_index]

plt.plot(thresholds, ious)
plt.plot(threshold_best, iou_best, "xr", label="Best threshold")
plt.xlabel("Threshold")
plt.ylabel("IoU")
plt.title("Threshold vs IoU ({}, {})".format(threshold_best, iou_best))
plt.legend()

In [None]:
def rle_encode(im):
    pixels = im.flatten(order = 'F')
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [None]:
def batch_predict(testing_ids, testing_path, batch_size=1000, threshold=0.5, model=None):
    ''' Predict batches of images to save memory. '''
        
    total_batches = len(testing_ids) // batch_size
    
    data_store = []
    for batch_index in tqdm_notebook(range(total_batches), total=total_batches):
        if batch_index < (total_batches - 1):
            ids = testing_ids[batch_index * batch_size:(batch_index + 1) * batch_size]
        else:            
            ids = testing_ids[batch_index * batch_size:]

        test_df = pd.DataFrame({'id':ids})
        test_df['image'] = [np.array(load_img("{}test/images/{}.png".format(testing_path, id), grayscale=True)) / 255 for id in ids]
        x_batch = np.array(test_df['image'].values.tolist()).reshape(-1, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS)
        y_pred = 0.5 * (model.predict(x_batch) + model.predict(x_batch[:,:,::-1,:])[:,:,::-1,:])
        y_pred = np.round(y_pred > threshold)
                
        test_df['rle_mask'] = [rle_encode(y) for y in y_pred]
        data_store.append(test_df.drop(columns=['image']))
        
    return pd.concat(data_store)

In [None]:
path_to_test = '../input/'
extract_id = lambda x: x.split('.png')[0].split('/')[-1]
testing_ids = [extract_id(f) for f in glob.glob(path_to_test+'test/images/*.png')]

In [None]:
pred = batch_predict(testing_ids, path_to_test, batch_size=200, model=model, threshold=threshold_best)
pred.to_csv('submission.csv', index=False)