# Image Segmentation - 2nd Challenge

## Pretrained encoder (Resnet50), UNet architecture

In this notebook we create an architecture based on UNet, importing a pretrained model as the encoder and the first convolution of VGG16.

We performed some trials with VGG16, VGG19 and Resnet. Resnet performs better than others as the encoder part.


Main aspects:
- Skip connections between the encoder and the decoder
- Finetuning the overall encoder starting from the pretrained weights (from imagenet) using a small learning rate (1e-4)
- Dice loss implementation
- Last layer of encoder formed by a concatenation between resnet, VGG16 and previous layers of the encoder


As a reference for this implementation, we have used this repo:
https://github.com/killthekitten/kaggle-carvana-2017

In [None]:
import numpy as np 
import pandas as pd 
import os
import tensorflow as tf
import numpy as np


from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"


SEED = 1234
tf.random.set_seed(SEED)  

cwd = os.getcwd()


gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

apply_data_augmentation = True


if apply_data_augmentation:
    train_img_data_gen = ImageDataGenerator(featurewise_center=True,
                                            featurewise_std_normalization=True,
                                            horizontal_flip=True,
                                            vertical_flip=True,
                                            validation_split=0.2,
                                            rescale=1./255,
                                            fill_mode="reflect")
    train_mask_data_gen = ImageDataGenerator(featurewise_center=True,
                                            featurewise_std_normalization=True,
                                            horizontal_flip=True,
                                            vertical_flip=True,
                                            rescale=1./255,
                                            fill_mode="reflect",
                                            validation_split=0.2)
else:
    train_img_data_gen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
    train_mask_data_gen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

In [None]:
dataset_dir = "/kaggle/input/ann-and-dl-image-segmentation/Segmentation_Dataset"

bs = 4

img_h = 256
img_w = 256


#------------------- TRAINING DATASET ---------------------

training_dir = os.path.join(dataset_dir, 'training')
train_img_gen = train_img_data_gen.flow_from_directory(os.path.join(training_dir, 'images'),
                                                       target_size=(img_h, img_w),
                                                       batch_size=bs, 
                                                       class_mode=None,
                                                       shuffle=True,
                                                       interpolation='bilinear',
                                                       seed=SEED,
                                                       color_mode = 'rgb',
                                                       subset='training')  
train_mask_gen = train_mask_data_gen.flow_from_directory(os.path.join(training_dir, 'masks'),
                                                         target_size=(img_h, img_w),
                                                         batch_size=bs,
                                                         class_mode=None,
                                                         shuffle=True,
                                                         interpolation='bilinear',
                                                         seed=SEED,
                                                         color_mode='grayscale',
                                                         subset='training')
train_gen = zip(train_img_gen, train_mask_gen)



#------------------- VALIDATION DATASET ---------------------

valid_img_gen = train_img_data_gen.flow_from_directory(os.path.join(training_dir, 'images'),
                                                       target_size=(img_h, img_w),
                                                       batch_size=bs, 
                                                       class_mode=None,
                                                       shuffle=False,
                                                       interpolation='bilinear',
                                                       seed=SEED,
                                                       color_mode = 'rgb',
                                                       subset='validation')
valid_mask_gen = train_mask_data_gen.flow_from_directory(os.path.join(training_dir, 'masks'),
                                                         target_size=(img_h, img_w),
                                                         batch_size=bs, 
                                                         class_mode=None,
                                                         shuffle=False,
                                                         interpolation='bilinear',
                                                         seed=SEED,
                                                         color_mode='grayscale',
                                                         subset='validation')
valid_gen = zip(valid_img_gen, valid_mask_gen)

In [None]:
def prepare_target(x_, y_):
    y_ = tf.cast(y_, tf.int32)
    return x_, y_




#------------------- TRAIN DATASET ---------------------
train_dataset = tf.data.Dataset.from_generator(lambda: train_gen,
                                               output_types=(tf.float32, tf.float32),
                                               output_shapes=([None, img_h, img_w, 3], [None, img_h, img_w, 1]))

train_dataset = train_dataset.map(prepare_target)
train_dataset = train_dataset.repeat()




#------------------- VALIDATION DATASET ---------------------
valid_dataset = tf.data.Dataset.from_generator(lambda: valid_gen, 
                                               output_types=(tf.float32, tf.float32),
                                               output_shapes=([None, img_h, img_w, 3], [None, img_h, img_w, 1]))

valid_dataset = valid_dataset.map(prepare_target)
valid_dataset = valid_dataset.repeat()

In [None]:
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras import Input
from tensorflow.keras import Model
from tensorflow.keras.layers import Conv2D, UpSampling2D, Conv2DTranspose
from tensorflow.keras.layers import Activation, SpatialDropout2D
from tensorflow.keras.layers import concatenate
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import MaxPooling2D



def conv_block(prevlayer, filters, prefix, strides=(1, 1)):
    conv = Conv2D(filters, (3, 3), padding="same", kernel_initializer="he_normal", strides=strides, name=prefix + "_conv")(prevlayer)
    conv = BatchNormalization(name=prefix + "_bn")(conv)
    conv = Activation('relu', name=prefix + "_activation")(conv)
    return conv


def resnetUnet(resnet,input_shape):
    
    #Set trainable layers in the encoder --> [small learning rate]
    for l in resnet.layers:
        l.trainable = True
        
        
    # Get layers from resnet to add skip connections between encoder and decoder
    conv1 = resnet.get_layer('conv1_relu').output
    conv2 = resnet.get_layer('conv2_block3_out').output
    conv3 = resnet.get_layer('conv3_block4_out').output
    conv4 = resnet.get_layer('conv4_block6_out').output
    conv5 = resnet.get_layer('conv5_block3_out').output
    
    
    
    #     ------ DECODER  ------
    
    # -- 1 --
    up6 = concatenate([UpSampling2D()(conv5), conv4], axis=-1)
    conv6 = conv_block(up6, 256, "conv6_1")
    conv6 = conv_block(conv6, 256, "conv6_2")
    
    # -- 2 --
    up7 = concatenate([UpSampling2D()(conv6), conv3], axis=-1)
    conv7 = conv_block(up7, 192, "conv7_1")
    conv7 = conv_block(conv7, 192, "conv7_2")
    
    # -- 3 --
    up8 = concatenate([UpSampling2D()(conv7), conv2], axis=-1)
    conv8 = conv_block(up8, 128, "conv8_1")
    conv8 = conv_block(conv8, 128, "conv8_2")
    
    # -- 4 --
    up9 = concatenate([UpSampling2D()(conv8), conv1], axis=-1)
    conv9 = conv_block(up9, 64, "conv9_1")
    conv9 = conv_block(conv9, 64, "conv9_2")

    vgg = tf.keras.applications.VGG16(input_shape=input_shape, input_tensor=resnet.input, include_top=False)
    
    # Fix pretrained weights
    for l in vgg.layers:
        l.trainable = False
        
    # -- 5 --
    vgg_first_conv = vgg.get_layer("block1_conv2").output
    up10 = concatenate([UpSampling2D()(conv9), resnet.input, vgg_first_conv], axis=-1)
    conv10 = conv_block(up10, 32, "conv10_1")
    conv10 = conv_block(conv10, 32, "conv10_2")
    conv10 = SpatialDropout2D(0.2)(conv10)
    
    
    x = Conv2D(1, (1, 1), activation="sigmoid", name="prediction")(conv10)
    
    # Create Model
    model = Model(resnet.input, x)
    
    
    return model

### Implementation of Dice loss

In [None]:
from keras import backend as K

def dice_coef(y_true, y_pred, smooth=1):
    
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)
    
    intersection = K.sum(K.abs(y_true * y_pred), axis=-1)
    return (2. * intersection + smooth) / (K.sum(K.square(y_true),-1) + K.sum(K.square(y_pred),-1) + smooth)

def dice_coef_loss(y_true, y_pred):
    return 1-dice_coef(y_true, y_pred)

### Implementation of Intersection over Union metric

In [None]:
def my_IoU(y_true, y_pred):
    
    y_pred = tf.cast(y_pred > 0.5, tf.float32) 

    
    intersection = tf.reduce_sum(y_true * y_pred)
    
    union = tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) - intersection
    
    return intersection / union

### Import RESNET and create the custom NN

In [None]:
resnet = tf.keras.applications.ResNet50(input_shape=(img_h, img_w, 3), include_top=False)

resnet.summary()


model = resnetUnet(resnet, (img_h, img_w, 3))

In [None]:
#loss = tf.keras.losses.BinaryCrossentropy()
loss = dice_coef_loss

lr = 1e-4

optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

metrics = [my_IoU]

model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

In [None]:
model.fit(x=train_dataset,
          epochs=5,
          steps_per_epoch=len(train_img_gen),
          validation_data=valid_dataset,
          validation_steps=len(valid_img_gen)) 

In [None]:
def rle_encode(img):
      # Flatten column-wise
      pixels = img.T.flatten()
      pixels = np.concatenate([[0], pixels, [0]])
      runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
      runs[1::2] -= runs[::2]
      return ' '.join(str(x) for x in runs)

In [None]:
import os
from datetime import datetime

def create_csv(results, results_dir='./'):

    csv_fname = 'results_'
    csv_fname += datetime.now().strftime('%b%d_%H-%M-%S') + '.csv'

    with open(csv_fname, 'w') as f:

      f.write('ImageId,EncodedPixels,Width,Height\n')

      for key, value in results.items():
          f.write(key + ',' + str(value) + ',' + '256' + ',' + '256' + '\n')

In [None]:
from PIL import Image

test_dir = "/kaggle/input/ann-and-dl-image-segmentation/Segmentation_Dataset/test"
test_img_dir = os.path.join(test_dir, 'images/img')

img_filenames = next(os.walk(test_img_dir))[2]
results = {}

for image_name in img_filenames:
    img = Image.open(os.path.join(test_img_dir,'{}').format(image_name)).convert('RGB')
    img = img.resize((img_h, img_w))
    img_array = np.array(img)
    img_array = np.expand_dims(img_array, 0)
    
    out = model.predict(x=img_array / 255.)
    
    out = np.round(out)
    
    predicted = rle_encode(out)
    
    name = os.path.splitext(image_name)[0]
    
    results[name] = predicted
 
create_csv(results)