In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:

# os.environ["CUDA_VISIBLE_DEVICES"]="-1" 
import tensorflow as tf
import numpy as np
import os
import shutil
import random
import json
from PIL import Image
import time
from datetime import datetime
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Set the seed for random operations. 
# This let our experiments to be reproducible. 
SEED = 1234
tf.random.set_seed(SEED)  

# Get current working directory
cwd = os.getcwd()

# Set GPU memory growth
# Allows to only as much GPU memory as needed
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

**Support Functions**

In [None]:
#JSON file for validation split, create CSV for results, encode mask, IoU metric

# dictionary with the format shown in the Evaluation tab
def create_json():
    training = os.listdir(os.path.join(destination_training, 'images', 'img'))
    validation = os.listdir(os.path.join(destination_validation, 'images', 'img'))

    dataset_split = {'training': training, 'validation': validation}   
    with open('dataset_split.json', 'w') as fp:
          json.dump(dataset_split, fp)
        
        



#IoU Metric
def my_IoU(y_true, y_pred):
    # from pobability to predicted class {0, 1}
    y_pred = tf.cast(y_pred > 0.5, tf.float32) # when using sigmoid. Use argmax for softmax

    # A and B
    intersection = tf.reduce_sum(y_true * y_pred)
    # A or B
    union = tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) - intersection
    # IoU
    return intersection / union


#Encode mask for CSV file
def rle_encode(img):
   
    img = np.round(np.squeeze(img)).astype(np.int32)
  
    #Flatten column-wise
    pixels = img.T.flatten()
    pixels = pixels[:-65536]   #provo a togliere la metà dei pixel che viene tutta 1
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)





#Create CSV
def create_csv(results, results_dir='./'):
    
    csv_fname = 'results_'
    csv_fname += datetime.now().strftime('%b%d_%H-%M-%S') + '.csv'

    with open(os.path.join('./', csv_fname), 'w') as f:
        f.write('ImageId,EncodedPixels,Width,Height\n')
        count = 0
        for key, value in results.items():
            #f.write(key + ',' + str(value) + '\n')
            f.write(key + ',' + str(value) + ',' + '256' + ',' + '256' + '\n')

In [None]:
# ImageDataGenerator
# ------------------

from tensorflow.keras.preprocessing.image import ImageDataGenerator

apply_data_augmentation = False

# Create training ImageDataGenerator object
# We need two different generators for images and corresponding masks
if apply_data_augmentation:
    train_img_data_gen = ImageDataGenerator(rotation_range=10,
                                            width_shift_range=10,
                                            height_shift_range=10,
                                            zoom_range=0.3,
                                            horizontal_flip=True,
                                            vertical_flip=True,
                                            fill_mode='constant',
                                            cval=0,
                                            rescale=1./255)
    train_mask_data_gen = ImageDataGenerator(rotation_range=10,
                                             width_shift_range=10,
                                             height_shift_range=10,
                                             zoom_range=0.3,
                                             horizontal_flip=True,
                                             vertical_flip=True,
                                             fill_mode='constant',
                                             rescale=1./255,
                                             cval=0)
else:
    train_img_data_gen = ImageDataGenerator(rescale=1./255)
    train_mask_data_gen = ImageDataGenerator(rescale=1./255)

# Create validation and test ImageDataGenerator objects
valid_img_data_gen = ImageDataGenerator(rescale=1./255)
valid_mask_data_gen = ImageDataGenerator(rescale=1./255)
test_img_data_gen = ImageDataGenerator(rescale=1./255)
test_mask_data_gen = ImageDataGenerator(rescale=1./255)

In [None]:
#Create working directory. Splitting training and validation


#Directories
source_dir = '/kaggle/input/ann-and-dl-image-segmentation/Segmentation_Dataset'
destination_dir = '/kaggle/working/dataset'
destination_training = os.path.join(destination_dir, 'training')
destination_validation = os.path.join(destination_dir, 'validation')
destination_test = os.path.join(destination_dir, 'test')


#Se nella directory di destinazione non ci sono le cartelle dataset, training, validation e test, le creo.
#Se ci sono già, le elimino e le ricreo (per rifare ogni volta il validation diverso, penso)

print("Creating main directories..")
if os.path.exists(destination_dir):
    shutil.rmtree(destination_dir)
if not os.path.exists(destination_dir):
    os.mkdir(destination_dir)
if not os.path.exists(destination_training):
    os.mkdir(destination_training)
if not os.path.exists(destination_validation):
    os.mkdir(destination_validation)
if not os.path.exists(destination_test):
    os.mkdir(destination_test)
    
#Create images/img and masks/img folder into training and validation directories. Create img into test directory

print("Creating subdirectories..")
if not os.path.exists(os.path.join(destination_training, 'images')):
    os.mkdir(os.path.join(destination_training, 'images'))
if not os.path.exists(os.path.join(destination_training, 'masks')):
    os.mkdir(os.path.join(destination_training, 'masks'))
    
if not os.path.exists(os.path.join(destination_training, 'images/img')):
    os.mkdir(os.path.join(destination_training, 'images/img'))
if not os.path.exists(os.path.join(destination_training, 'masks/img')):
    os.mkdir(os.path.join(destination_training, 'masks/img'))
    
    
if not os.path.exists(os.path.join(destination_validation, 'images')):
    os.mkdir(os.path.join(destination_validation, 'images'))
if not os.path.exists(os.path.join(destination_validation, 'masks')):
    os.mkdir(os.path.join(destination_validation, 'masks'))
    
if not os.path.exists(os.path.join(destination_validation, 'images/img')):
    os.mkdir(os.path.join(destination_validation, 'images/img'))
if not os.path.exists(os.path.join(destination_validation, 'masks/img')):
    os.mkdir(os.path.join(destination_validation, 'masks/img'))
    
if not os.path.exists(os.path.join(destination_test, 'images')):
    os.mkdir(os.path.join(destination_test, 'images'))
if not os.path.exists(os.path.join(destination_test, 'images/img')):
    os.mkdir(os.path.join(destination_test, 'images/img'))



#Split training_set e validation_set, spostandoli dalla directory di input a quella di lavoro



print("Splitting train from input into Training and Validation inside working directory...")

#messi due numeri divisibili per 8. 7 immagini su 7647 vengono però scartate (a random). (valid= circa 15%)
validation_size = 1048 #1528    #int(7647 * validation_split)
train_size = 6592  #6112        #7467 - validation_size

files = os.listdir(source_dir + '/training/images/img')
random.shuffle(files)
files = iter(files)
i=0

while i < validation_size:
    file=str(next(files))
    result = shutil.copy(source_dir + '/training/images/img' + '/' + file, destination_validation + '/images/img')
    result = shutil.copy(source_dir + '/training/masks/img' + '/' + file, destination_validation + '/masks/img')
    i += 1

while i < validation_size + train_size:
    file=str(next(files))
    result = shutil.copy(source_dir + '/training/images/img' + '/' + file, destination_training + '/images/img')
    result = shutil.copy(source_dir + '/training/masks/img' + '/' + file, destination_training + '/masks/img')
    i += 1
    

        

#Copio i file del test dalla directory di input a quella di lavoro

print("Copying test set from input inside working directory...")
for file in os.listdir(source_dir + '/test/images/img'):
        result = shutil.copy(source_dir + '/test/images/img' + '/' + file, destination_test + '/images/img')
       
print("creating JSON file..")
create_json()

print("Done!")

In [None]:
# Create generators to read images from dataset directory
# -------------------------------------------------------

# Batch size
bs = 4

# img shape
img_h = 256
img_w = 256

num_classes=2

# Training
# Two different generators for images and masks
# ATTENTION: here the seed is important!! We have to give the same SEED to both the generator
# to apply the same transformations/shuffling to images and corresponding masks

train_img_gen = train_img_data_gen.flow_from_directory(os.path.join(destination_training, 'images'),
                                                       target_size=(img_h, img_w),
                                                       batch_size=bs, 
                                                       class_mode=None, # Because we have no class subfolders in this case
                                                       shuffle=True,
                                                       interpolation='bilinear',
                                                       seed=SEED)  
train_mask_gen = train_mask_data_gen.flow_from_directory(os.path.join(destination_training, 'masks'),
                                                         target_size=(img_h, img_w),
                                                         batch_size=bs,
                                                         class_mode=None, # Because we have no class subfolders in this case
                                                         shuffle=True,
                                                         interpolation='bilinear',
                                                         color_mode='grayscale',
                                                         seed=SEED)
train_gen = zip(train_img_gen, train_mask_gen)  #Iteratore che mappa l'i-esimo elemento di train_img_gen con l i-esimo di train_mask_gen

# Validation
valid_img_gen = valid_img_data_gen.flow_from_directory(os.path.join(destination_validation, 'images'),
                                                       target_size=(img_h, img_w),
                                                       batch_size=bs, 
                                                       class_mode=None, # Because we have no class subfolders in this case
                                                       shuffle=False,
                                                       interpolation='bilinear',
                                                       seed=SEED)
valid_mask_gen = valid_mask_data_gen.flow_from_directory(os.path.join(destination_validation, 'masks'),
                                                         target_size=(img_h, img_w),
                                                         batch_size=bs, 
                                                         class_mode=None, # Because we have no class subfolders in this case
                                                         shuffle=False,
                                                         interpolation='bilinear',
                                                         color_mode='grayscale',
                                                         seed=SEED)
valid_gen = zip(valid_img_gen, valid_mask_gen)


In [None]:
# Create Dataset objects
# ----------------------

# Training
# --------
train_dataset = tf.data.Dataset.from_generator(lambda: train_gen,
                                               output_types=(tf.float32, tf.float32),
                                               
                                               #1: Batch_size  2:height  3:width  4:channels
                                               output_shapes=([bs, img_h, img_w, 3], [bs, img_h, img_w, 1]))

def prepare_target(x_, y_):
    y_ = tf.cast(tf.expand_dims(y_[..., 0], -1), tf.int32)
    return x_, tf.where(y_ > 0, y_ - 1, y_ + 1)

train_dataset = train_dataset.map(prepare_target)
# Repeat
train_dataset = train_dataset.repeat()

# Validation
# ----------
valid_dataset = tf.data.Dataset.from_generator(lambda: valid_gen, 
                                               output_types=(tf.float32, tf.float32),
                                               output_shapes=([bs, img_h, img_w, 3], [bs, img_h, img_w, 1]))
valid_dataset = valid_dataset.map(prepare_target)

# Repeat
valid_dataset = valid_dataset.repeat()


In [None]:
#np.unique(target_img)


## Convolutional Neural Network (CNN)
### Encoder-Decoder

In [None]:
# Create Model
# ------------

def create_model(depth, start_f, num_classes, dynamic_input_shape):

    model = tf.keras.Sequential()
    
    # Encoder
    # -------
    for i in range(depth):
        
        if i == 0:
            if dynamic_input_shape:
                input_shape = [None, None, 3]
            else:
                input_shape = [img_h, img_w, 3]
        else:
            input_shape=[None]
        
        model.add(tf.keras.layers.Conv2D(filters=start_f, 
                                         kernel_size=(3, 3),
                                         strides=(1, 1),
                                         padding='same',
                                         input_shape=input_shape))
        model.add(tf.keras.layers.ReLU())
        model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2)))
        
        start_f *= 2
    

    # Decoder
    # -------
    for i in range(depth):
        model.add(tf.keras.layers.UpSampling2D(2, interpolation='bilinear')) #2 al posto di 1
        model.add(tf.keras.layers.Conv2D(filters=start_f // 2,
                                         kernel_size=(3, 3),
                                         strides=(1, 1),
                                         padding='same'))

        model.add(tf.keras.layers.ReLU())

        start_f = start_f // 2
    

    # Prediction Layer
    # ----------------
    model.add(tf.keras.layers.Conv2D(filters=num_classes,
                                     kernel_size=(1, 1),
                                     strides=(1, 1),
                                     padding='same',
                                     activation='sigmoid'))
    
    return model       

In [None]:
model = create_model(depth=4, 
                     start_f=4, 
                     num_classes=2, 
                     dynamic_input_shape=False)

# Visualize created model as a table
model.summary()

# Visualize initialized weights
model.weights

## Prepare the model for training

In [None]:
# Optimization params
# -------------------

# Loss
# Sparse Categorical Crossentropy to use integers (mask) instead of one-hot encoded labels
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False) 
# learning rate
lr = 1e-3
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
# -------------------

# Validation metrics
# ------------------

metrics = [my_IoU]  #metrics='accuracy'
# ------------------

# Compile Model
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)  

## Training with callbacks

In [None]:
import os
from datetime import datetime

# from tensorflow.compat.v1 import ConfigProto
# from tensorflow.compat.v1 import InteractiveSession

# config = ConfigProto()
# config.gpu_options.allow_growth = True
# session = InteractiveSession(config=config)

cwd = os.getcwd()

exps_dir = os.path.join(cwd, 'segmentation_experiments')
if not os.path.exists(exps_dir):
    os.makedirs(exps_dir)

now = datetime.now().strftime('%b%d_%H-%M-%S')

model_name = 'CNN'

exp_dir = os.path.join(exps_dir, model_name + '_' + str(now))
if not os.path.exists(exp_dir):
    os.makedirs(exp_dir)
    
callbacks = []

# Model checkpoint
# ----------------
ckpt_dir = os.path.join(exp_dir, 'ckpts')
if not os.path.exists(ckpt_dir):
    os.makedirs(ckpt_dir)

ckpt_callback = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(ckpt_dir, 'cp_{epoch:02d}.ckpt'), 
                                                   save_weights_only=True)  # False to save the model directly
callbacks.append(ckpt_callback)

# Visualize Learning on Tensorboard
# ---------------------------------
tb_dir = os.path.join(exp_dir, 'tb_logs')
if not os.path.exists(tb_dir):
    os.makedirs(tb_dir)
    
# By default shows losses and metrics for both training and validation
tb_callback = tf.keras.callbacks.TensorBoard(log_dir=tb_dir,
                                             profile_batch=0,
                                             histogram_freq=0)  # if 1 shows weights histograms
callbacks.append(tb_callback)

# Early Stopping
# --------------
early_stop = False
if early_stop:
    es_callback = tf.keras.callback.EarlyStopping(monitor='val_loss', patience=10)
    callbacks.append(es_callback)


model.fit(x=train_dataset,
          epochs=50,  #### set repeat in training dataset
          steps_per_epoch=len(train_img_gen),
          validation_data=valid_dataset,
          validation_steps=len(valid_img_gen), 
          callbacks=callbacks
         )

# How to visualize Tensorboard

# 1. tensorboard --logdir EXPERIMENTS_DIR --port PORT     <- from terminal
# 2. localhost:PORT   <- in your browser

## Test model

## Compute prediction

In [None]:
import time
import matplotlib.pyplot as plt

from PIL import Image

%matplotlib notebook

# Cycle over test images

test_img_dir = os.path.join(destination_test, 'images', 'img') #test_dir

img_filenames = next(os.walk(test_img_dir))[2]


results = {}
i=0
for img_filename in img_filenames:
    
    i += 1
    mask_filename = img_filename[:-4] + '.tif' #'.png'
    
    img = Image.open(os.path.join(test_img_dir, img_filename))
    img = img.convert('RGB')
    img = img.resize((256, 256))
  
    
    
    img_arr = np.array(img)
    
    img_arr = np.expand_dims(np.array(img), 0)
    
    pred = model.predict(x=img_arr / 255., batch_size = 8)
   
    pred= (pred >0.5).astype(np.uint8)
    

    # Get predicted class as the index corresponding to the maximum value in the vector probability
    
    #predicted_class = tf.argmax(out_softmax, -1)
    mask = pred[0]
  
    
    
    mask_pred = rle_encode(mask)
    mask_name = os.path.splitext(img_filename)[0]
    results[mask_name] = mask_pred   
    #if i >4: 
      #  break
    
    #time.sleep(1000)
create_csv(results)