In [None]:
!mkdir train

! unzip -q ../input/carvana-image-masking-challenge/train.zip -d ./train && mv train/train train/images
! unzip -q ../input/carvana-image-masking-challenge/train_masks.zip -d ./train && mv train/train_masks train/masks

In [None]:
import cv2
from glob import glob
import os
import numpy as np
import imageio

l = glob('train/masks/*')
print(len(l))
m = imageio.imread(l[100])
m.shape,np.unique(m)

In [None]:
import numpy as np
import cv2
import os
from glob import glob
from sklearn.model_selection import train_test_split
import tensorflow as tf

def load_dataset(dataset_path):
    images = sorted(glob(os.path.join(dataset_path, "images/*")))
    masks = sorted(glob(os.path.join(dataset_path, "masks/*")))

    train_x, test_x, train_y, test_y = train_test_split(images,masks, test_size=0.0981, 
                                                        random_state=168, shuffle=True)
    return train_x, train_y, test_x, test_y

train_x, train_y, val_x, val_y = load_dataset('train')
print(len(train_x), len(train_y), len(val_x), len(val_y))

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
SEED = 42
BATCH_SIZE = 8
BUFFER_SIZE = 2*BATCH_SIZE
# IMG_SIZE = (1280, 1888) 
# IMG_SIZE = (640, 944)
IMG_SIZE = (320, 480)

In [None]:
mask_temp = !ls train/masks/*
image_temp = !ls train/images/*

mask_temp[0],image_temp[0]

In [None]:
IMG_WIDTH, IMG_HEIGHT = 320 , 480

def parse_x_y(img_path,mask_path):
    image = tf.io.read_file(img_path)
    image = tf.io.decode_jpeg(image,channels=3)
    image = tf.image.convert_image_dtype(image, tf.uint8)
    
    mask = tf.io.read_file(mask_path)    
    mask = tf.io.decode_jpeg(mask,0)
    return {'image': image, 'segmentation_mask': mask}

@tf.function
def normalize(input_image: tf.Tensor, input_mask: tf.Tensor) -> tuple:
    input_image = tf.cast(input_image, tf.float32) / 255.0
    input_mask = tf.cast(input_mask, tf.float32) / 255.0
#     input_mask = tf.cast(input_mask, tf.uint8)
    return input_image, input_mask

@tf.function
def load_image_train(datapoint: dict) -> tuple:
    input_image = tf.image.resize(datapoint['image'], (IMG_SIZE[0], IMG_SIZE[1]))
    input_mask = tf.image.resize(datapoint['segmentation_mask'], (IMG_SIZE[0], IMG_SIZE[1]),method='nearest')    
#     if tf.random.uniform(()) > 0.5:
#         input_image = tf.image.flip_left_right(input_image)
#         input_mask = tf.image.flip_left_right(input_mask)

    input_image, input_mask = normalize(input_image, input_mask)
    input_mask = input_mask[:,:,0]
    input_mask = tf.expand_dims(input_mask,axis=-1)
#     input_mask = tf.one_hot(input_mask, 1)
#     input_mask = tf.reshape(input_mask, (IMG_SIZE[0], IMG_SIZE[1],1))
    return input_image, input_mask

train_dataset = tf.data.Dataset.from_tensor_slices((train_x,train_y))
train_dataset = train_dataset.map(parse_x_y)

val_dataset = tf.data.Dataset.from_tensor_slices((val_x,val_y))
val_dataset =val_dataset.map(parse_x_y)

dataset = {"train": train_dataset, "val": val_dataset}

dataset['train'] = dataset['train'].map(
    load_image_train,
    num_parallel_calls=tf.data.experimental.AUTOTUNE
).shuffle(buffer_size=BUFFER_SIZE, seed=SEED).batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)

dataset['val'] = dataset['val'].map(
    load_image_train, 
    num_parallel_calls=tf.data.experimental.AUTOTUNE
).shuffle(buffer_size=BUFFER_SIZE, seed=SEED).cache().batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)

In [None]:
train_X_y_paths = list(zip(train_x, train_y))
val_X_y_paths = list(zip(val_x, val_y))

In [None]:
for image,label in dataset['train'].take(1):
    print("Train image: ",image.shape)
    print("Train label: ",label.shape,"\n\tunique values", np.unique(label[0]))
    
for image,label in dataset['val'].take(1):
    print("Val image: ",image.shape)
    print("Val label: ",label.shape,"\n\tunique values", np.unique(label[0]))

In [None]:
import matplotlib.pyplot as plt

def display_sample(display_list):
    """Show side-by-side an input image,
    the ground truth and the prediction.
    """
    plt.figure(figsize=(7, 7))

    title = ['Input Image', 'True Mask', 'Predicted Mask']

    for i in range(len(display_list)):
        plt.subplot(1, len(display_list), i+1)
        plt.title(title[i])
        plt.imshow(tf.keras.preprocessing.image.array_to_img(display_list[i]))#,cmap="gray")
        plt.axis('off')
    plt.show()

In [None]:
i=0
for image, mask in dataset['train'].take(5):
    i=i+1
    sample_image, sample_mask = image, mask
    display_sample([sample_image[0],sample_mask[0]])

In [None]:
# %env SM_FRAMEWORK=tf.keras

!pip freeze | grep keras
!pip freeze | grep Keras
!pip freeze | grep tensorflow
!pip freeze | grep h5py
!pip freeze | grep opencv 
!pip freeze | grep pandas

## Unet_mobilenetV2_custom

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from tensorflow.keras.layers import Conv2D, Activation, BatchNormalization
from tensorflow.keras.layers import UpSampling2D, Input, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.metrics import Recall, Precision
from tensorflow.keras import backend as K

def Unet_MobilenetV2(IMAGE_SIZE=IMG_SIZE, num_classes=1):
    inputs = Input(shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3), name="input_image")
    encoder = MobileNetV2(
        input_tensor=inputs,
        weights="imagenet",
        include_top=False, alpha=1.0)
    skip_connection_names = ["input_image", "block_1_expand_relu", "block_3_expand_relu", "block_6_expand_relu"]
    encoder_output = encoder.get_layer("block_13_expand_relu").output
    
#     encoder.trainable = False
    
    f = [16, 32, 48, 64]
    x = encoder_output
    for i in range(1, len(skip_connection_names)+1, 1):
        x_skip = encoder.get_layer(skip_connection_names[-i]).output
        x = UpSampling2D((2, 2))(x)
        x = Concatenate()([x, x_skip])
        
        x = Conv2D(f[-i], (3, 3), padding="same")(x)
        x = BatchNormalization()(x)
        x = Activation("relu")(x)
        
        x = Conv2D(f[-i], (3, 3), padding="same")(x)
        x = BatchNormalization()(x)
        x = Activation("relu")(x)
        
    x = Conv2D(num_classes, (1, 1), padding="same")(x)
    x = Activation("sigmoid")(x)
    
    model = Model(inputs, x)
    return model

In [None]:
# model = Unet_MobilenetV2(IMAGE_SIZE=IMG_SIZE, num_classes=1)
# model.summary()

## Deeplabv3p_bonlime_official

In [None]:
# -*- coding: utf-8 -*-

""" Deeplabv3+ model for Keras.
This model is based on TF repo:
https://github.com/tensorflow/models/tree/master/research/deeplab
On Pascal VOC, original model gets to 84.56% mIOU
MobileNetv2 backbone is based on this repo:
https://github.com/JonathanCMitchell/mobilenet_v2_keras
# Reference
- [Encoder-Decoder with Atrous Separable Convolution
    for Semantic Image Segmentation](https://arxiv.org/pdf/1802.02611.pdf)
- [Xception: Deep Learning with Depthwise Separable Convolutions]
    (https://arxiv.org/abs/1610.02357)
- [Inverted Residuals and Linear Bottlenecks: Mobile Networks for
    Classification, Detection and Segmentation](https://arxiv.org/abs/1801.04381)
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

from tensorflow.python.keras.models import Model
from tensorflow.python.keras import layers
from tensorflow.python.keras.layers import Input
from tensorflow.python.keras.layers import Reshape
from tensorflow.python.keras.layers import Activation
from tensorflow.python.keras.layers import Concatenate
from tensorflow.python.keras.layers import Add
from tensorflow.python.keras.layers import Dropout
from tensorflow.python.keras.layers import BatchNormalization
from tensorflow.python.keras.layers import Conv2D
from tensorflow.python.keras.layers import DepthwiseConv2D
from tensorflow.python.keras.layers import ZeroPadding2D
from tensorflow.python.keras.layers import GlobalAveragePooling2D
from tensorflow.python.keras.utils.layer_utils import get_source_inputs
from tensorflow.python.keras.utils.data_utils import get_file
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.applications.imagenet_utils import preprocess_input

WEIGHTS_PATH_X = "https://github.com/bonlime/keras-deeplab-v3-plus/releases/download/1.1/deeplabv3_xception_tf_dim_ordering_tf_kernels.h5"
WEIGHTS_PATH_MOBILE = "https://github.com/bonlime/keras-deeplab-v3-plus/releases/download/1.1/deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels.h5"

WEIGHTS_PATH_X_CS = "https://github.com/bonlime/keras-deeplab-v3-plus/releases/download/1.2/deeplabv3_xception_tf_dim_ordering_tf_kernels_cityscapes.h5"
WEIGHTS_PATH_MOBILE_CS = "https://github.com/bonlime/keras-deeplab-v3-plus/releases/download/1.2/deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels_cityscapes.h5"


def SepConv_BN(x, filters, prefix, stride=1, kernel_size=3, rate=1, depth_activation=False, epsilon=1e-3):
    """ SepConv with BN between depthwise & pointwise. Optionally add activation after BN
        Implements right "same" padding for even kernel sizes
        Args:
            x: input tensor
            filters: num of filters in pointwise convolution
            prefix: prefix before name
            stride: stride at depthwise conv
            kernel_size: kernel size for depthwise convolution
            rate: atrous rate for depthwise convolution
            depth_activation: flag to use activation between depthwise & poinwise convs
            epsilon: epsilon to use in BN layer
    """

    if stride == 1:
        depth_padding = 'same'
    else:
        kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
        pad_total = kernel_size_effective - 1
        pad_beg = pad_total // 2
        pad_end = pad_total - pad_beg
        x = ZeroPadding2D((pad_beg, pad_end))(x)
        depth_padding = 'valid'

    if not depth_activation:
        x = Activation(tf.nn.relu)(x)
    x = DepthwiseConv2D((kernel_size, kernel_size), strides=(stride, stride), dilation_rate=(rate, rate),
                        padding=depth_padding, use_bias=False, name=prefix + '_depthwise')(x)
    x = BatchNormalization(name=prefix + '_depthwise_BN', epsilon=epsilon)(x)
    if depth_activation:
        x = Activation(tf.nn.relu)(x)
    x = Conv2D(filters, (1, 1), padding='same',
               use_bias=False, name=prefix + '_pointwise')(x)
    x = BatchNormalization(name=prefix + '_pointwise_BN', epsilon=epsilon)(x)
    if depth_activation:
        x = Activation(tf.nn.relu)(x)

    return x


def _conv2d_same(x, filters, prefix, stride=1, kernel_size=3, rate=1):
    """Implements right 'same' padding for even kernel sizes
        Without this there is a 1 pixel drift when stride = 2
        Args:
            x: input tensor
            filters: num of filters in pointwise convolution
            prefix: prefix before name
            stride: stride at depthwise conv
            kernel_size: kernel size for depthwise convolution
            rate: atrous rate for depthwise convolution
    """
    if stride == 1:
        return Conv2D(filters,
                      (kernel_size, kernel_size),
                      strides=(stride, stride),
                      padding='same', use_bias=False,
                      dilation_rate=(rate, rate),
                      name=prefix)(x)
    else:
        kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
        pad_total = kernel_size_effective - 1
        pad_beg = pad_total // 2
        pad_end = pad_total - pad_beg
        x = ZeroPadding2D((pad_beg, pad_end))(x)
        return Conv2D(filters,
                      (kernel_size, kernel_size),
                      strides=(stride, stride),
                      padding='valid', use_bias=False,
                      dilation_rate=(rate, rate),
                      name=prefix)(x)


def _xception_block(inputs, depth_list, prefix, skip_connection_type, stride,
                    rate=1, depth_activation=False, return_skip=False):
    """ Basic building block of modified Xception network
        Args:
            inputs: input tensor
            depth_list: number of filters in each SepConv layer. len(depth_list) == 3
            prefix: prefix before name
            skip_connection_type: one of {'conv','sum','none'}
            stride: stride at last depthwise conv
            rate: atrous rate for depthwise convolution
            depth_activation: flag to use activation between depthwise & pointwise convs
            return_skip: flag to return additional tensor after 2 SepConvs for decoder
            """
    residual = inputs
    for i in range(3):
        residual = SepConv_BN(residual,
                              depth_list[i],
                              prefix + '_separable_conv{}'.format(i + 1),
                              stride=stride if i == 2 else 1,
                              rate=rate,
                              depth_activation=depth_activation)
        if i == 1:
            skip = residual
    if skip_connection_type == 'conv':
        shortcut = _conv2d_same(inputs, depth_list[-1], prefix + '_shortcut',
                                kernel_size=1,
                                stride=stride)
        shortcut = BatchNormalization(name=prefix + '_shortcut_BN')(shortcut)
        outputs = layers.add([residual, shortcut])
    elif skip_connection_type == 'sum':
        outputs = layers.add([residual, inputs])
    elif skip_connection_type == 'none':
        outputs = residual
    if return_skip:
        return outputs, skip
    else:
        return outputs


def _make_divisible(v, divisor, min_value=None):
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v


def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id, skip_connection, rate=1):
    in_channels = inputs.shape[-1]#.value  # inputs._keras_shape[-1]
    pointwise_conv_filters = int(filters * alpha)
    pointwise_filters = _make_divisible(pointwise_conv_filters, 8)
    x = inputs
    prefix = 'expanded_conv_{}_'.format(block_id)
    if block_id:
        # Expand

        x = Conv2D(expansion * in_channels, kernel_size=1, padding='same',
                   use_bias=False, activation=None,
                   name=prefix + 'expand')(x)
        x = BatchNormalization(epsilon=1e-3, momentum=0.999,
                               name=prefix + 'expand_BN')(x)
        x = Activation(tf.nn.relu6, name=prefix + 'expand_relu')(x)
    else:
        prefix = 'expanded_conv_'
    # Depthwise
    x = DepthwiseConv2D(kernel_size=3, strides=stride, activation=None,
                        use_bias=False, padding='same', dilation_rate=(rate, rate),
                        name=prefix + 'depthwise')(x)
    x = BatchNormalization(epsilon=1e-3, momentum=0.999,
                           name=prefix + 'depthwise_BN')(x)

    x = Activation(tf.nn.relu6, name=prefix + 'depthwise_relu')(x)

    # Project
    x = Conv2D(pointwise_filters,
               kernel_size=1, padding='same', use_bias=False, activation=None,
               name=prefix + 'project')(x)
    x = BatchNormalization(epsilon=1e-3, momentum=0.999,
                           name=prefix + 'project_BN')(x)

    if skip_connection:
        return Add(name=prefix + 'add')([inputs, x])

    # if in_channels == pointwise_filters and stride == 1:
    #    return Add(name='res_connect_' + str(block_id))([inputs, x])

    return x


def Deeplabv3(weights='pascal_voc', input_tensor=None, input_shape=(512, 512, 3), classes=21, backbone='mobilenetv2',
              OS=16, alpha=1., activation=None):
    """ Instantiates the Deeplabv3+ architecture
    Optionally loads weights pre-trained
    on PASCAL VOC or Cityscapes. This model is available for TensorFlow only.
    # Arguments
        weights: one of 'pascal_voc' (pre-trained on pascal voc),
            'cityscapes' (pre-trained on cityscape) or None (random initialization)
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: shape of input image. format HxWxC
            PASCAL VOC model was trained on (512,512,3) images. None is allowed as shape/width
        classes: number of desired classes. PASCAL VOC has 21 classes, Cityscapes has 19 classes.
            If number of classes not aligned with the weights used, last layer is initialized randomly
        backbone: backbone to use. one of {'xception','mobilenetv2'}
        activation: optional activation to add to the top of the network.
            One of 'softmax', 'sigmoid' or None
        OS: determines input_shape/feature_extractor_output ratio. One of {8,16}.
            Used only for xception backbone.
        alpha: controls the width of the MobileNetV2 network. This is known as the
            width multiplier in the MobileNetV2 paper.
                - If `alpha` < 1.0, proportionally decreases the number
                    of filters in each layer.
                - If `alpha` > 1.0, proportionally increases the number
                    of filters in each layer.
                - If `alpha` = 1, default number of filters from the paper
                    are used at each layer.
            Used only for mobilenetv2 backbone. Pretrained is only available for alpha=1.
    # Returns
        A Keras model instance.
    # Raises
        RuntimeError: If attempting to run this model with a
            backend that does not support separable convolutions.
        ValueError: in case of invalid argument for `weights` or `backbone`
    """

    if not (weights in {'pascal_voc', 'cityscapes', None}):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `pascal_voc`, or `cityscapes` '
                         '(pre-trained on PASCAL VOC)')

    if not (backbone in {'xception', 'mobilenetv2'}):
        raise ValueError('The `backbone` argument should be either '
                         '`xception`  or `mobilenetv2` ')

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        img_input = input_tensor

    if backbone == 'xception':
        if OS == 8:
            entry_block3_stride = 1
            middle_block_rate = 2  # ! Not mentioned in paper, but required
            exit_block_rates = (2, 4)
            atrous_rates = (12, 24, 36)
        else:
            entry_block3_stride = 2
            middle_block_rate = 1
            exit_block_rates = (1, 2)
            atrous_rates = (6, 12, 18)

        x = Conv2D(32, (3, 3), strides=(2, 2),
                   name='entry_flow_conv1_1', use_bias=False, padding='same')(img_input)
        x = BatchNormalization(name='entry_flow_conv1_1_BN')(x)
        x = Activation(tf.nn.relu)(x)

        x = _conv2d_same(x, 64, 'entry_flow_conv1_2', kernel_size=3, stride=1)
        x = BatchNormalization(name='entry_flow_conv1_2_BN')(x)
        x = Activation(tf.nn.relu)(x)

        x = _xception_block(x, [128, 128, 128], 'entry_flow_block1',
                            skip_connection_type='conv', stride=2,
                            depth_activation=False)
        x, skip1 = _xception_block(x, [256, 256, 256], 'entry_flow_block2',
                                   skip_connection_type='conv', stride=2,
                                   depth_activation=False, return_skip=True)

        x = _xception_block(x, [728, 728, 728], 'entry_flow_block3',
                            skip_connection_type='conv', stride=entry_block3_stride,
                            depth_activation=False)
        for i in range(16):
            x = _xception_block(x, [728, 728, 728], 'middle_flow_unit_{}'.format(i + 1),
                                skip_connection_type='sum', stride=1, rate=middle_block_rate,
                                depth_activation=False)

        x = _xception_block(x, [728, 1024, 1024], 'exit_flow_block1',
                            skip_connection_type='conv', stride=1, rate=exit_block_rates[0],
                            depth_activation=False)
        x = _xception_block(x, [1536, 1536, 2048], 'exit_flow_block2',
                            skip_connection_type='none', stride=1, rate=exit_block_rates[1],
                            depth_activation=True)

    else:
        OS = 8
        first_block_filters = _make_divisible(32 * alpha, 8)
        x = Conv2D(first_block_filters,
                   kernel_size=3,
                   strides=(2, 2), padding='same', use_bias=False,
                   name='Conv' if input_shape[2] == 3 else 'Conv_')(img_input)
        x = BatchNormalization(
            epsilon=1e-3, momentum=0.999, name='Conv_BN')(x)
        x = Activation(tf.nn.relu6, name='Conv_Relu6')(x)

        x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1,
                                expansion=1, block_id=0, skip_connection=False)

        x = _inverted_res_block(x, filters=24, alpha=alpha, stride=2,
                                expansion=6, block_id=1, skip_connection=False)
        x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1,
                                expansion=6, block_id=2, skip_connection=True)

        x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2,
                                expansion=6, block_id=3, skip_connection=False)
        x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1,
                                expansion=6, block_id=4, skip_connection=True)
        x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1,
                                expansion=6, block_id=5, skip_connection=True)

        # stride in block 6 changed from 2 -> 1, so we need to use rate = 2
        x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1,  # 1!
                                expansion=6, block_id=6, skip_connection=False)
        x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=2,
                                expansion=6, block_id=7, skip_connection=True)
        x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=2,
                                expansion=6, block_id=8, skip_connection=True)
        x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=2,
                                expansion=6, block_id=9, skip_connection=True)

        x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=2,
                                expansion=6, block_id=10, skip_connection=False)
        x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=2,
                                expansion=6, block_id=11, skip_connection=True)
        x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=2,
                                expansion=6, block_id=12, skip_connection=True)

        x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, rate=2,  # 1!
                                expansion=6, block_id=13, skip_connection=False)
        x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, rate=4,
                                expansion=6, block_id=14, skip_connection=True)
        x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, rate=4,
                                expansion=6, block_id=15, skip_connection=True)

        x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, rate=4,
                                expansion=6, block_id=16, skip_connection=False)

    # end of feature extractor

    # branching for Atrous Spatial Pyramid Pooling

    # Image Feature branch
    shape_before = tf.shape(x)
    b4 = GlobalAveragePooling2D()(x)
    b4_shape = tf.keras.backend.int_shape(b4)
    # from (b_size, channels)->(b_size, 1, 1, channels)
    b4 = Reshape((1, 1, b4_shape[1]))(b4)
    b4 = Conv2D(256, (1, 1), padding='same',
                use_bias=False, name='image_pooling')(b4)
    b4 = BatchNormalization(name='image_pooling_BN', epsilon=1e-5)(b4)
    b4 = Activation(tf.nn.relu)(b4)
    # upsample. have to use compat because of the option align_corners
    size_before = tf.keras.backend.int_shape(x)
    b4 = tf.keras.layers.experimental.preprocessing.Resizing(
            *size_before[1:3], interpolation="bilinear"
        )(b4)
    # simple 1x1
    b0 = Conv2D(256, (1, 1), padding='same', use_bias=False, name='aspp0')(x)
    b0 = BatchNormalization(name='aspp0_BN', epsilon=1e-5)(b0)
    b0 = Activation(tf.nn.relu, name='aspp0_activation')(b0)

    # there are only 2 branches in mobilenetV2. not sure why
    if backbone == 'xception':
        # rate = 6 (12)
        b1 = SepConv_BN(x, 256, 'aspp1',
                        rate=atrous_rates[0], depth_activation=True, epsilon=1e-5)
        # rate = 12 (24)
        b2 = SepConv_BN(x, 256, 'aspp2',
                        rate=atrous_rates[1], depth_activation=True, epsilon=1e-5)
        # rate = 18 (36)
        b3 = SepConv_BN(x, 256, 'aspp3',
                        rate=atrous_rates[2], depth_activation=True, epsilon=1e-5)

        # concatenate ASPP branches & project
        x = Concatenate()([b4, b0, b1, b2, b3])
    else:
        x = Concatenate()([b4, b0])

    x = Conv2D(256, (1, 1), padding='same',
               use_bias=False, name='concat_projection')(x)
    x = BatchNormalization(name='concat_projection_BN', epsilon=1e-5)(x)
    x = Activation(tf.nn.relu)(x)
    x = Dropout(0.1)(x)
    # DeepLab v.3+ decoder

    if backbone == 'xception':
        # Feature projection
        # x4 (x2) block
        skip_size = tf.keras.backend.int_shape(skip1)
        x = tf.keras.layers.experimental.preprocessing.Resizing(
                *skip_size[1:3], interpolation="bilinear"
            )(x)
        dec_skip1 = Conv2D(48, (1, 1), padding='same',
                           use_bias=False, name='feature_projection0')(skip1)
        dec_skip1 = BatchNormalization(
            name='feature_projection0_BN', epsilon=1e-5)(dec_skip1)
        dec_skip1 = Activation(tf.nn.relu)(dec_skip1)
        x = Concatenate()([x, dec_skip1])
        x = SepConv_BN(x, 256, 'decoder_conv0',
                       depth_activation=True, epsilon=1e-5)
        x = SepConv_BN(x, 256, 'decoder_conv1',
                       depth_activation=True, epsilon=1e-5)

    # you can use it with arbitary number of classes
    if (weights == 'pascal_voc' and classes == 21) or (weights == 'cityscapes' and classes == 19):
        last_layer_name = 'logits_semantic'
    else:
        last_layer_name = 'custom_logits_semantic'

    x = Conv2D(classes, (1, 1), padding='same', name=last_layer_name)(x)
    size_before3 = tf.keras.backend.int_shape(img_input)
    x = tf.keras.layers.experimental.preprocessing.Resizing(
            *size_before3[1:3], interpolation="bilinear"
        )(x)
    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input

    if activation in {'softmax', 'sigmoid'}:
        x = tf.keras.layers.Activation(activation)(x)

    model = Model(inputs, x, name='deeplabv3plus')

    # load weights

    if weights == 'pascal_voc':
        if backbone == 'xception':
            weights_path = get_file('deeplabv3_xception_tf_dim_ordering_tf_kernels.h5',
                                    WEIGHTS_PATH_X,
                                    cache_subdir='models')
        else:
            weights_path = get_file('deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels.h5',
                                    WEIGHTS_PATH_MOBILE,
                                    cache_subdir='models')
        model.load_weights(weights_path, by_name=True)
    elif weights == 'cityscapes':
        if backbone == 'xception':
            weights_path = get_file('deeplabv3_xception_tf_dim_ordering_tf_kernels_cityscapes.h5',
                                    WEIGHTS_PATH_X_CS,
                                    cache_subdir='models')
#             weights_path = "pretrained_weights/deeplabv3_xception_tf_dim_ordering_tf_kernels_cityscapes.h5"    
        else:
            weights_path = get_file('deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels_cityscapes.h5',
                                    WEIGHTS_PATH_MOBILE_CS,
                                    cache_subdir='models')
#            weights_path = "pretrained_weights/deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels_cityscapes.h5"
            
        model.load_weights(weights_path, by_name=True)
    return model

def preprocess_input(x):
    """Preprocesses a numpy array encoding a batch of images.
    # Arguments
        x: a 4D numpy array consists of RGB values within [0, 255].
    # Returns
        Input array scaled to [-1.,1.]
    """
    return preprocess_input(x, mode='tf')

In [None]:
# model = Deeplabv3(weights='cityscapes', input_shape=(*IMG_SIZE,3), classes=1, 
#                   backbone= 'mobilenetv2', #'xception'
#                   OS=16, alpha=1, activation='sigmoid')
# model.summary()

## DeeplabV3p custom

In [None]:
def convolution_block(
    block_input,
    num_filters=256,
    kernel_size=3,
    dilation_rate=1,
    padding="same",
    use_bias=False,
):
    x = layers.SeparableConv2D(
        num_filters,
        kernel_size=kernel_size,
        dilation_rate=dilation_rate,
        padding="same",
        use_bias=use_bias,
        kernel_initializer=keras.initializers.HeNormal(),
    )(block_input)
    x = layers.BatchNormalization()(x)
    return tf.nn.relu(x)


def DilatedSpatialPyramidPooling(dspp_input):
    dims = dspp_input.shape
    x = layers.AveragePooling2D(pool_size=(dims[-3], dims[-2]))(dspp_input)
    x = convolution_block(x, kernel_size=1, use_bias=True)
    out_pool = layers.UpSampling2D(
        size=(dims[-3] // x.shape[1], dims[-2] // x.shape[2]), interpolation="bilinear",
    )(x)

    out_1 = convolution_block(dspp_input, kernel_size=1, dilation_rate=1)
    out_6 = convolution_block(dspp_input, kernel_size=3, dilation_rate=6)
    out_12 = convolution_block(dspp_input, kernel_size=3, dilation_rate=12)
    out_18 = convolution_block(dspp_input, kernel_size=3, dilation_rate=18)

    x = layers.Concatenate(axis=-1)([out_pool, out_1, out_6, out_12, out_18])
    output = convolution_block(x, kernel_size=1)
    return output

In [None]:
from tensorflow.keras.layers import Conv2D, Activation, BatchNormalization
from tensorflow.keras.layers import UpSampling2D, Input, Concatenate,SeparableConv2D
from tensorflow.keras.models import Model
from tensorflow.keras.applications import MobileNetV2
def DeeplabV3Plus(image_size, num_classes):
    inputs = keras.Input(shape=(image_size[0], image_size[1], 3),name="input_image")
    encoder = MobileNetV2(
        input_tensor=inputs,
        weights="imagenet",
        include_top=False, alpha=0.35)
    skip_connection_names = ["input_image", "block_3_expand_relu"]
    encoder_output = encoder.get_layer("block_13_expand_relu").output    
    x = DilatedSpatialPyramidPooling(encoder_output)

    f = [16, 32]
    for i in range(1, len(skip_connection_names)+1, 1):                
        x_skip = encoder.get_layer(skip_connection_names[-i]).output
        x = Conv2D(f[-i], (1, 1))(x) 
        x = UpSampling2D((4, 4),interpolation="bilinear")(x)
        print(x.shape)
        x = Concatenate()([x, x_skip])        
        x = SeparableConv2D(f[-i], (3, 3), padding="same")(x)
        x = BatchNormalization()(x)
        x = Activation("relu")(x)
        
#         x = SeparableConv2D(f[-i], (3, 3), padding="same")(x)
#         x = BatchNormalization()(x)
#         x = Activation("relu")(x)        
    
    x = Conv2D(num_classes, (1, 1), padding="same")(x)
    x = Activation("sigmoid")(x)
    
    model = Model(inputs, x)
    
    return model


model = DeeplabV3Plus(image_size=IMG_SIZE, num_classes=1)
model.summary()

# model = Deeplabv3(weights='cityscapes', input_shape=(*IMG_SIZE,3), classes=1, 
#                   backbone= 'mobilenetv2', #'xception'
#                   OS=16, alpha=1, activation='sigmoid')
# model.summary()

## Model training

In [None]:
!pip install segmentation_models

In [None]:
from segmentation_models.losses import bce_jaccard_loss, dice_loss, JaccardLoss
from segmentation_models.metrics import iou_score, f1_score, precision, recall
ls = dice_loss + bce_jaccard_loss
metrics = [precision, recall, f1_score, iou_score] 

In [None]:
# import os, time, keras
# %env SM_FRAMEWORK=tf.keras

# import numpy as np
# import tensorflow as tf

In [None]:
import os, time, keras
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, CSVLogger, EarlyStopping

base_dir = 'RESULTS/Dv3p_MobilenetV2_alpha0_35_custom'
if not os.path.exists("RESULTS"):
    os.mkdir("RESULTS/")
if not os.path.exists(base_dir):
    os.mkdir(base_dir)
    os.mkdir(f"{base_dir}/ckpt_path")
    
csv_path = f"{base_dir}/history.csv"

""" callbacks """
root_logdir = os.path.join(os.curdir, f"{base_dir}/logs","fit","")
def get_run_logdir():
    run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S")
    return os.path.join(root_logdir, run_id)
run_logdir = get_run_logdir()
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)#, histogram_freq=1,profile_batch='10,15')

checkpoint_filepath = f'{base_dir}/'+'ckpt_path/epoch.h5'
model_checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=False,
    # monitor='val_iou_score',
    # mode='max',
    verbose = 1,
    period = 1,
    save_best_only=False
    )

callbacks = [
    model_checkpoint_callback,
    ReduceLROnPlateau(monitor="val_loss", patience=5, factor=0.1, verbose=1),
    CSVLogger(csv_path),
#     EarlyStopping(monitor="val_loss", patience=10),
    tensorboard_cb
]

""" steps per epochs """
train_steps = len(train_x)//BATCH_SIZE
if len(train_x) % BATCH_SIZE != 0:
    train_steps += 1

test_steps = len(val_x)//BATCH_SIZE
if len(val_x) % BATCH_SIZE != 0:
    test_steps += 1

print("train_steps", train_steps, "test_steps",test_steps)

# """ Model training """
# for layer in model.layers:
#     if layer.name == "global_average_pooling2d":
#         break
#     else:
#         layer.trainable = False

# for layer in model.layers:
#     print(layer.name,layer.trainable)

model.compile(
    loss=ls,
    optimizer= "adam", #tf.keras.optimizers.Adam(lr),
    metrics=metrics
)
# model.summary()

In [None]:
# # !pip install tensorboard
# %load_ext tensorboard
# %tensorboard --logdir ./RESULTS/MobilenetV2_alpha1/logs

In [None]:
# pretrain model decoder
history = model.fit(
    dataset["train"],
    validation_data=dataset["val"],
    epochs=10,
    initial_epoch = 0,
    steps_per_epoch=train_steps,
    validation_steps=test_steps,
    callbacks=callbacks
)

In [None]:
# # pretrain model decoder
# history = model.fit(
#     dataset["train"],
#     validation_data=dataset["val"],
#     epochs= 25,
#     initial_epoch = 10,
#     steps_per_epoch=train_steps,
#     validation_steps=test_steps,
#     callbacks=callbacks
# )

## DATA Visualization
https://www.kaggle.com/vfdev5/data-visualization

## Augmentation methods
https://www.kaggle.com/gaborfodor/augmentation-methods

Contrast is the difference between light and dark.
Saturation is the intensity of color.
Brightness refers to the relative lightness or darkness of a color.

random_channel_shift(img, limit=0.05)
random_brightness(img, limit=(-0.5, 0.5), u=0.5)
random_contrast(img, limit=(-0.5, 0.5), u=0.5)
random_saturation(img, limit=(-0.5, 0.5), u=0.5)
random_gray(img, u=0.2)
random_rotate(img, mask, rotate_limit=(-20, 20), u=0.5)
random_shear(img, mask, intensity_range=(-0.3, 0.3), u=0.2)
random_flip(img, mask, u=0.3)
random_shift(img, mask, w_limit=(-0.1, 0.1), h_limit=(-0.1, 0.1), u=0.3)
random_zoom(img, mask, zoom_range=(0.8, 1), u=0.3)

## Predictions on Validation set

In [None]:
req_cmap = {
        0: (0,0,0), # background                        
        1: (255,0,0),    # car 
        2: (255,255,255)    # miou label
        }
req_mask_labels = {
    0:"Background",    
    1:"Car",
    2:"miou"
}

In [None]:

def convert_to_rgb(predi,_cmap={}):
    pred_image = np.zeros((predi.shape[0], predi.shape[1], 3),dtype=np.uint8) + 255
    for i in np.unique(predi):
        pred_image[predi==i] = _cmap[i]
    return pred_image

def plot_imgs(i,img,mask,pred=np.zeros((1024,1024,3)),cmap={},mask_labels={},label_iou={}):  
    fig,(ax1,ax2,ax3, ax4) = plt.subplots(1,4,figsize=(20,4))
    if img.shape[-1]==3:
        ax1.imshow(img)
    else:
        ax1.imshow(img,cmap=plt.get_cmap('gray'))
    ax1.axis('off')
    ax1.title.set_text(f"Input Image {i}") 

    ax2.imshow(mask)
    ax2.axis('off')
    ax2.title.set_text(f"Ground truth {i}")

    ax3.imshow(pred)
    ax3.axis('off')   
    ax3.title.set_text(f"Prediction {i}")  

    dst = cv2.addWeighted(np.asarray(img*255.0,dtype=np.uint8),1,pred,0.5,0)
    ax4.imshow(dst)
    ax4.axis('off')
    ax4.title.set_text(f"Overlay {i}") 

    patches = [ mpatches.Patch(color=np.array(cmap[i])/255.0, label="{:<15}:{:2.3f} ".format(mask_labels[i],label_iou[i])) for i in label_iou.keys()]
    plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0. )

#     path = "xception/output_result"
#     if not os.path.exists(path):
#         os.mkdir(path)
    img_name = f"{i}.png"
#     fig.set_size_inches(25, 10)
#     plt.savefig(f'{path}/{img_name}', dpi=100)
    plt.show()
    plt.close(fig)

    
def IoU(Yi,y_predi,mask_labels={}):
    
#     y_true_f = K.flatten(Yi)
#     y_pred_f = K.flatten(y_predi)
#     intersection = K.sum(y_true_f * y_pred_f)
#     smooth = 0.001
#     dice_loss = (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
#     return [dice_loss]
    
  ## mean Intersection over Union
  ## Mean IoU = TP/(FN + TP + FP)
  
    IoUs = [] 
    precisions = []
    recalls = []
    f1_scores=[]
    f2_scores=[]
    #   dice_scores = []

    labels_iou = {}  
    for c in mask_labels.keys():      
        TP = np.sum( (Yi == c)&(y_predi==c) )
        FP = np.sum( (Yi != c)&(y_predi==c) )
        FN = np.sum( (Yi == c)&(y_predi != c)) 
        TN = np.sum( (Yi != c)&(y_predi != c)) 

        IoU = TP/float(TP + FP + FN)
        precision = TP/float(TP + FP)
        recall = TP/float(TP + FN)

        beta= 1
        f1_score = ((1+beta**2)*precision*recall)/float(beta**2*precision + recall)

        beta= 2
        f2_score  = ((1+beta**2)*precision*recall)/float(beta**2*precision + recall)

        #     dice_score = (2*TP)/float(2*TP + FP + FN)

        if IoU > 0:
#             print("class {:2.0f} {:10}:\t TP= {:6.0f},\t FP= {:6.0f},\t FN= {:6.0f},\t TN= {:6.0f},\t IoU= {:6.3f}".format(c,mask_labels[c],TP,FP,FN,TN,IoU))                    

            labels_iou[c] = IoU
            IoUs.append(IoU)  
            precisions.append(precision) 
            recalls.append(recall)  
            f1_scores.append(f1_score)  
            f2_scores.append(f2_score) 
            #       dice_scores.append(dice_score) 

    mIoU = np.mean(IoUs)
    labels_iou[len(req_mask_labels)-1] = mIoU
#     print("Mean IoU: {:4.6f}".format(mIoU))  

    return labels_iou, [mIoU, np.mean(precisions),np.mean(recalls),np.mean(f1_scores), np.mean(f2_scores)]


In [None]:
from tqdm import tqdm
import matplotlib.patches as mpatches
# df = pd.DataFrame(columns=columns)
val_data = iter(dataset["val"])

for j in tqdm(range(0,len(val_X_y_paths),BATCH_SIZE)):
    img, mask = next(val_data)                         
    y_pred = model.predict(img) 
    
    for i in range(BATCH_SIZE):
        maski = np.squeeze(mask[i], axis=-1)
        y_pred_converted = np.where(y_pred[i] > 0.5, 1, 0)
        y_predi = tf.image.resize(y_pred_converted,IMG_SIZE ,method='nearest')
        y_predi = np.squeeze(y_predi, axis=-1)        
        label_iou, eval_=IoU(maski, y_predi, mask_labels=req_mask_labels)
        
        input_img = tf.image.resize(img[i],IMG_SIZE ,method='nearest') 
        ground_truth = np.squeeze(convert_to_rgb(maski, req_cmap))    
        prediction = np.squeeze(convert_to_rgb(y_predi, req_cmap))
        
        plot_imgs(i+j+1,input_img, ground_truth, prediction,cmap=req_cmap,mask_labels=req_mask_labels,label_iou=label_iou)
    break

## TEST RESULT

In [None]:
!rm -r train/

In [None]:
! unzip -q ../input/carvana-image-masking-challenge/test.zip -d ./

In [None]:
import gc
gc.collect()

In [None]:
!pip install -q segmentation-models

In [None]:
from tqdm import tqdm
from tensorflow import keras
import tensorflow as tf

from segmentation_models.losses import bce_jaccard_loss, dice_loss, JaccardLoss
from segmentation_models.metrics import iou_score, f1_score, precision, recall
ls = dice_loss + bce_jaccard_loss
metrics = [precision, recall, f1_score, iou_score] 

model = keras.models.load_model('./RESULTS/Dv3p_MobilenetV2_alpha0_35_custom/ckpt_path/epoch.h5',
                               custom_objects={"dice_loss_plus_binary_crossentropy_plus_jaccard_loss": ls,
                                              "precision":precision,
                                               "recall":recall,
                                               "f1-score":f1_score,
                                               "iou_score":iou_score
                                              }
                               )

In [None]:
IMG_WIDTH, IMG_HEIGHT = 320 , 480

AUTO = tf.data.experimental.AUTOTUNE
import pandas as pd
import numpy as np
def process_img(imagePath):
#     print("process_img acivated...")
    img = tf.io.read_file(imagePath)
    #color images
    img = tf.image.decode_jpeg(img, channels=3) 
    #convert unit8 tensor to floats in the [0,1]range
    img = tf.image.convert_image_dtype(img, tf.float32) 
    #resize 
    return tf.image.resize(img, [IMG_WIDTH, IMG_HEIGHT])

def resize_img2org(img):
    return tf.image.resize(255.*img, (1280,1918))

def one_hot(img):    
    return np.where(img>127,1,0).astype(np.uint8)

def rle_encode(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
#     print(np.squeeze(img).shape,np.unique(img))
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
        
    runs[1::2] -= runs[:-1:2]
    return ' '.join(str(x) for x in runs)

filelist_ds = tf.data.Dataset.list_files('./test/*',shuffle=False)
ds_size= filelist_ds.cardinality().numpy()
print("Number of files in the dataset: ", ds_size)

print("3 samples:")
for a in filelist_ds.take(3):
    fname= a.numpy().decode("utf-8")
    print(fname)
#     display(PIL.Image.open(fname))
    
BATCH_SIZE = 25
    
filelist = filelist_ds.batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE)
ds_test_batched=filelist_ds.map(process_img).batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE)

print("Number of batches in test: ", ds_test_batched.cardinality().numpy())


In [None]:
df = pd.DataFrame({'img': [], 'rle_mask': []})
df.to_csv('submission.csv', mode='w', index=False, header=True)

fl = iter(filelist)
ds = iter(ds_test_batched)

for a in tqdm(range(ds_test_batched.cardinality().numpy())):
    files = next(fl)
    fnames= list(map(lambda x:x.numpy().decode("utf-8"),files))
    
    data = next(ds)
    pred_mask = model.predict(data)
    
    rle = list(map(rle_encode,map(one_hot,map(resize_img2org,pred_mask))))       
    test_paths__ = list(map(lambda x: x.split("/")[-1],fnames))
    df = pd.DataFrame({'img': test_paths__, 'rle_mask': rle})
    df.to_csv('submission.csv', mode='a', index=False, header=False)
    
#     break

In [21]:
df2=pd.read_csv('./submission.csv')
len(df2), len(np.unique(df2['img'])),df2.tail()

(100064,
 100064,
                         img                                           rle_mask
 100059  fff341f26a19_12.jpg  377721 16 379624 33 381540 36 383457 37 385372...
 100060  fff341f26a19_13.jpg  379626 30 381542 33 383458 36 385373 17 385404...
 100061  fff341f26a19_14.jpg  379626 30 381541 33 383457 35 385372 23 387287...
 100062  fff341f26a19_15.jpg  377710 27 377756 2 379623 35 379672 4 381539 3...
 100063  fff341f26a19_16.jpg  377709 28 379623 34 381539 37 383455 39 385371...)

In [None]:
# df2=pd.read_csv('./submission.csv.gz')

# df2 = df2.drop_duplicates()

# !rm ./submission.csv.gz

# df2.to_csv("submission.csv.gz",compression='gzip',index=False)

In [22]:
!gzip submission.csv

In [23]:
!rm -r test/
# !rm submission.csv