# Computer vision Final project

Import (most of) required package first. 

**it may be required to restart Runtime because of tensorflow addons**

In [0]:
!sudo apt-get install megatools

In [0]:
import os
import cv2
import tarfile
import zipfile
import shutil

import xml.etree.ElementTree as ET

import numpy as np
# from numpy.testing import assert_array_almost_equal

import random
import logging

from urllib import request
from socket import timeout
from urllib.error import HTTPError, URLError

from google.colab import drive
from google.colab.patches import cv2_imshow

from distutils.dir_util import copy_tree

from math import sqrt

from matplotlib import pyplot as plt
from matplotlib.gridspec import GridSpec
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
%matplotlib inline

import seaborn as sns
import pandas as pd

from scipy.interpolate import RectBivariateSpline
from scipy.linalg import svd as scipy_linalg_svd
from scipy import ndimage, misc


import tensorflow as tf
import tensorflow_addons as tfa
# from tensorflow.keras import tf.keras.layers, models
# !pip install cloud-tpu-client

import sklearn
from sklearn.metrics import multilabel_confusion_matrix, confusion_matrix


import pickle


logging.basicConfig(level=logging.DEBUG)

mpl_logger = logging.getLogger("matplotlib")
mpl_logger.setLevel(logging.WARNING)

pil_logger = logging.getLogger("PIL.Image")
pil_logger.setLevel(logging.ERROR)
# logging.basicConfig(level=logging.ERROR # show only error msgs,
#                     format='%(asctime)s - %(message)s',
#                     datefmt='%Y-%m-%d %H:%M:%S')

AUTOTUNE = tf.data.experimental.AUTOTUNE


# !pip install tfa-nightly


In [0]:
print(tf.__version__)
print(tfa.__version__)
!pip list | grep tensorflow


In [0]:
# from google.colab import drive
# drive.mount('/content/drive')

In [0]:
os.environ['LOAD_FROM_GITHUB'] = "TRUE"

need_to_download = not os.path.isdir('/content/VOCdevkit') 
if need_to_download:
    os.environ['NEED_TO_DOWNLOAD'] = "TRUE"
else:
    os.environ['NEED_TO_DOWNLOAD'] = "FALSE"
print(need_to_download)

In [0]:
%%bash
if [ $NEED_TO_DOWNLOAD = "TRUE" ]
then
    if [ $LOAD_FROM_GITHUB = "TRUE" ]
    then
        echo "github"

    else
        echo "oxford"
    fi
else
    echo "no need to download"
fi

####Parameters used

In [0]:
tf.random.set_seed(426473)
voc_root_folder = "/content/VOCdevkit/VOC2009"
sq_size = 224 #
n_samples = None
num_classes = 20
classification_type = "multilabel" # {"single", "multilabel"}

BS = 32
BATCH_SIZE = BS

EPOCHS = 100

path_image_folder = voc_root_folder + r'/JPEGImages/'
classes_names = ('aeroplane','bicycle', 'bird','boat','bottle','bus','car',
                'cat', 'chair','cow','diningtable','dog','horse','motorbike',
                'person','pottedplant','sheep','sofa','train','tvmonitor')

use_mean_subtraction = False
enlarged_training_set = True

# filenames
# base_folder = r'/content/drive/My Drive/ComputerVision/models/'
base_folder_class = r'/content/weights_class_224/'
base_folder_seg = r'/content/'

# Classification
model_v1_save_filename                  = base_folder_class + r'model_v1_class_from_scratch_weights.h5'
model_v1_hist_save_filename             = base_folder_class + r'model_v1_class_from_scratch_weights_hist.sav'

model_v2_save_filename                  = base_folder_class + r'model_v2_class_from_scratch_weights.h5'
model_v2_hist_save_filename             = base_folder_class + r'model_v2_class_from_scratch_weights_hist.sav'

model_v3_save_filename                  = base_folder_class + r'model_v3_class_from_scratch_weights.h5'
model_v3_hist_save_filename             = base_folder_class + r'model_v3_class_from_scratch_weights_hist.sav'

class_tl_save_filename                  = base_folder_class + r'class_transfer_learning_model.h5'
class_tl_hist_save_filename             = base_folder_class + r'class_transfer_learning_model_hist.sav'

class_tl_finetuned_save_filename        = base_folder_class + r'class_transfer_learning_model_fine_tuned.h5'
class_tl_finetuned_hist_save_filename   = base_folder_class + r'class_transfer_learning_model_fine_tuned_hist.h5'

single_model_save_filename              = base_folder_class + r'single_model_save_filename.h5'
single_model_hist_save_filename         = base_folder_class + r'single_model_save_filename_hist.sav'

# Segmentation
model_seg_tl_save_filename              = base_folder_seg + r'model_seg_tl_weights.h5'
model_seg_tl_hist_save_filename         = base_folder_seg + r'model_seg_tl_hist.sav'

model_seg_fs_save_filename              = base_folder_seg + r'model_seg_fs_weights.h5'
model_seg_fs_hist_save_filename         = base_folder_seg + r'model_seg_fs_hist.sav'

model_ah_save_filename                  = base_folder_seg + r'model_ah_weights.h5'
model_ah_hist_save_filename             = base_folder_seg + r'model_ah_weights_hist.sav'

model_seg_fs_ah_save_filename           = base_folder_seg + r'model_seg_fs_ah_weights.h5'
model_seg_fs_ah_hist_save_filename      = base_folder_seg + r'model_seg_fs_ah_hist.sav'

histories = {}

train_class_model_v1        = False
train_class_model_v2        = False
train_class_model_v3        = False
train_class_model_tl        = False
train_class_model_tl_ft     = False and False # should be False, unless really sure conditions are met ! (note: another parameter block it at the appropriate cell)
train_class_model_sp        = False

train_seg_model_tl          = False
train_seg_model_fs          = False
train_class_ah              = False
train_seg_fs_ah             = False


Fetch data

Based on the `load_from_local_drive` parameter, the data are fetched either from the official website or from a *personal-controled* github drive. 

The goal of this manoeuvre is to gain speed every time the notebook is restarted from scratch and the session loss, where the data need to be downloaded anew:
* official download and untar: ~3.05 minutes
* github download and unzip: ~0.55 minutes

Whatever the solution chosen, in the end, the data are the stored at the exact same position. 

In [0]:
%%time
%%bash
#!/bin/bash
if [ $NEED_TO_DOWNLOAD = "TRUE" ]
then
    if [ $LOAD_FROM_GITHUB = "TRUE" ]
    then
        for i in {001..044};
            do
            wget -nc -nv https://raw.githubusercontent.com/gherbin/cv_gr8_finalProject/master/final_project_db.zip.$i 
            done 
        7z x final_project_db.zip.001 
        mv /content/final_project_db/* /content/

    else
        wget -nc http://host.robots.ox.ac.uk/pascal/VOC/voc2009/VOCtrainval_11-May-2009.tar
        tar -xf VOCtrainval_11-May-2009.tar --totals
        # wget -nc http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2009test.tar
        # tar -xf VOC2009test.tar --totals
    fi
else
    echo "No need to download"
fi

In [0]:
%%bash
if [ $NEED_TO_DOWNLOAD = "TRUE" ]
then
    if [ $LOAD_FROM_GITHUB = "TRUE" ]
    then
        rm -rf final_project_db*
    else
        rm -rf VOCtrainval_11-May-2009.tar*
    fi
fi

###Download weights and histories


In [0]:
need_to_download_weights = not os.path.isdir('/content/weights_class_224') 
if need_to_download_weights:
    try:
        print("Trying to download weights and histories")
        !megadl 'https://mega.nz/#!gZh2SIgQ!2DfCFh_W2js8QbXKnCU015ZreGgilkb85AURuBkM1G0'
        !megadl 'https://mega.nz/#!0dA3WICS!rzGUdyHE5-liTE4M-gPjuxMJUaIU9vqQGug9WHt64aU'
        !7z x weights_class_224-20200531T094420Z-001.zip 
        !7z x weights_seg_224-20200601T110727Z-001.zip
    except:
        print("Error while Downloading and extracting weights. \n Please make sure that all files mentioned in the first snippet are located in /content/")
else:
    print("Weights already downloaded")



# Data Preparation

In this very first part, I build some tools and knowledge on the VOC dataset. 
The details of the code is in the comments.

## Split the dataset


<!--In ImageSets
- layout -> person layout taster task
- main -> {class}_trainval.txt; {class}_train.txt; {class}_val.txt ==> in trainval.txt: 7054 images
- segmentation -> trainval.txt: 1499 images

=> idea: from the 7054 images in "ImageSets/main/trainval.txt" => x % -> train, y% val, z% test; 
and from the 1499 images for "ImageSets/segmentation/trainval.txt" => x% for train, y% for validation, z% for test sets.

In total, 6 tests, produced using a seed so that it's repeatable (pseudo-random)
-->
[COMMENTS]
In order to split the data, different ways are possible. In this work, I simply use the VOC2009 suggested training and validation set. 

- classification:
    - training: 3473
    - validation: 3581
- segmentation:
    - training: 749
    - validation: 750

[VOC DB stats](http://host.robots.ox.ac.uk/pascal/VOC/voc2009/dbstats.html)


## Get to know the data


#### get the ids of the images
The following cell create the identifiers list that are used for the training and validation sets. The identifiers are string names, usually of the form "year_xxxxxxx". An extract is printed (first two elements of the sets)

In [0]:
def get_ids(dataset_type, task_type):
    '''
    Based on dataset_type {training, validation} and task_type{classification}, parse the corresponding txt file {train.txt, val.txt} in 
    order to get the identifiers of the images of the requested set.
    '''

    if dataset_type == "training":
        stem = "train"
    elif dataset_type == "validation":
        stem = "val"
    else:
        raise ValueError("dataset_type unknown")
    
    if task_type == "classification":
        stem_folder = r'/ImageSets/Main/'
    else:
        stem_folder = r'/ImageSets/Segmentation/'

    file_path = voc_root_folder + stem_folder + stem + r'.txt'
    with open(file_path, "r") as f:
        content = f.readlines()
    ids = [x.strip() for x in content] 

    # shuffle list of ids
    random.seed(str(426473) + dataset_type + task_type )
    random.shuffle(ids)
    # random.seed()

    return ids 


A parameter `enlarged_training_set` can set up experiment with an enlarged training set. To enlarge, 2000 out of the ~3500 items from the validation set are transfered to the training ids set. This eventually gives:
- training set: ~77 %
- validation set: ~23 %

The goal is to observe the change in behavior when enlarging the dataset. The remaining validation set should be enough to reach ot to conclusion.

We can use the ids using the function just defined. A summary of this loading is printed 

In [0]:
training_ids = get_ids("training", "classification")
val_ids = get_ids("validation", "classification")

if enlarged_training_set:
    training_ids = training_ids + val_ids[0:2000]
    val_ids = val_ids[2001:]

print("Classification task:")
print("training set,   #elements = ", len(training_ids))
print("validation set, #elements = ", len(val_ids))

print(training_ids[0:2])
print(val_ids[0:2])

#### Helper functions

Helper functions:
- `my_reshape`
- `plot_matrix`
- `get_images`
- `get_class_labels_str`

In [0]:
from sklearn import preprocessing

# # Load in the images
# for filepath in os.listdir(path_image_folder):
def my_reshape(image_vector, sq_size, color):
    '''
    returns a reshape version of an image represented as an image array, depending of the color parameter.
    If color is True, it returns a colored RGB format image of size (sq_size x sq_size) (useable as is by matplotlib)
    If color is False, it returns a grayscale image (sq_size x sq_size)
    '''
    flattened = image_vector.ndim == 1
    if flattened:
        if color:
            img_reshaped = (np.reshape(image_vector, (sq_size, sq_size, 3))).astype('uint8')
            return img_reshaped # cv2.cvtColor(img_reshaped, cv2.COLOR_BGR2RGB)
        else:
            return np.reshape(image_vector, (sq_size, sq_size))
    else:
        if color:
            # img_reshaped = (np.reshape(image_vector, (sq_size, sq_size, 3))).astype('uint8')
            if "Tensor" in image_vector.__class__.__name__ :
                return image_vector
            else:
                # print(image_vector.__class__.__name__ + " => not a Tensor")
                return image_vector#.astype('float32') # cv2.cvtColor(img_reshaped, cv2.COLOR_BGR2RGB)
        else:
            return image_vector

def plot_matrix(images_matrix, labels = None, sq_size = 32, color=True, my_color_map="viridis", h=8, w=5, transpose = False, return_figure = False, scale = 1):
    '''
    plots the images contained in a matrix of data, reshaping and coloring them
    ''' 
    if h == 8 and w == 5:
        h = np.ceil(np.sqrt(images_matrix.shape[0]))
        w = np.ceil(np.sqrt(images_matrix.shape[0]))

    fig = plt.figure(figsize=(w*scale,h*scale)) 
    fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.25, wspace=0.05) 
    # plot the images, each image is 64 by 64 pixels 

    if transpose:
        images_matrix_used = images_matrix.T # .copy()
    else:
        images_matrix_used = images_matrix # .copy()

    i=0
    for img_vector in images_matrix_used: 
        ax = fig.add_subplot(h, w, i+1, xticks=[], yticks=[]) 
        ax.imshow(my_reshape(img_vector, sq_size, color), cmap = my_color_map, interpolation='nearest') 
        if labels is None:
            ax.set_xlabel('label =' + str(i), color = 'r')
        else:
            # ax.set_xlabel('label =' + str(i), color = 'g')
            ax.set_xlabel(labels[i], color = 'w', backgroundcolor = "k")
        i+=1
    plt.show()

    if return_figure:
        return fig

def get_images(ids, path_image_folder, width=5, height=5, channel=3, n_samples = None):
    if n_samples == None:
        n_samples = len(ids)
    
    print_once = False
    images = np.empty((n_samples, height, width, channel), dtype=np.uint8)
    for i in range(0, n_samples):
        name = ids[i]
        # images are loaded in uint8
        src_img = cv2.imread(path_image_folder+name+r'.jpg', )
        if print_once:
            print(src_img.dtype)
        src_img_resized = cv2.resize(src_img, (height, width))
        if print_once:
            print(src_img_resized.dtype)
        src_img_resized_rgb = cv2.cvtColor(src_img_resized, cv2.COLOR_BGR2RGB, )   # BGR -> RGB
        if print_once:
            print(src_img_resized_rgb.dtype)        # cv2_imshow(src_img)
        # cv2_imshow(src_img_resized_rgb)
        
        images[i,:,:,:] = src_img_resized_rgb # flatten returns a copy - may be not efficient
        if print_once:
            print(images[0].dtype)
            print_once = False

    return images

def get_class_labels_str(ids, unique=True, remove_difficult=True, classification_type="multilabel", n_samples = None):
    '''
    Based on the Annotation, parse the xml, and build a tuple of the classes
    return a tuple ordered as ids containing all the tuples of the 
    "Unique": if unique is True, returns only one element of the class (ex : 2 bikes => returns only once)
    '''
    ids_labels = []
    if n_samples == None:
        n_samples = len(ids)
    
    for i in range(0, n_samples):
        class_labels = []
        tree = ET.parse( voc_root_folder+r'/Annotations/' + ids[i] + r'.xml')
        root = tree.getroot()
        for object_ in root.findall('object'):
            class_name = object_.find('name').text

            is_difficult = object_.find('difficult').text == "1"
            if not remove_difficult:
                # raise NotImplementedError("remove_difficult needs to be True")
                if unique:
                    if class_name not in class_labels:
                        class_labels.append(class_name)
                    else:
                        continue
                elif not unique:
                    class_labels.append(class_name)
            else:
                if unique and (not is_difficult):
                    if class_name not in class_labels:
                        class_labels.append(class_name)
                    else:
                        continue
                elif (not unique) and (not is_difficult):
                    class_labels.append(class_name)
                else:
                    ##
                    # print("Difficult for label " + str(ids[i]) + ', object of class : ' + str(class_name))
                    continue

        if classification_type == "single":
            ids_labels.append(tuple([class_labels[0]]))
        elif classification_type == "multilabel":
            ids_labels.append(tuple(class_labels))
        else:
            raise ValueError('classification_type unknown value => ' + str(classification_type))     

        
    return tuple(ids_labels)

The following code cell sets up appropriately the training and validation sets, based on the identifiers. 

At this point, the images returned are not !! flattened, but are resized.

The labels (multiple labels per image) are stored in a tuple of tuples; the order is the one defined by ids.



#### One hot encoding
multilabels : 
[sklearn multilabel binarization](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MultiLabelBinarizer.html)

In [0]:
'''
Get the labels (string) for the training and validation sets, baes on the ids obtained from get_ids
'''
training_labels_str = get_class_labels_str(training_ids, classification_type=classification_type, remove_difficult = False, n_samples = None)
val_labels_str = get_class_labels_str(val_ids, classification_type=classification_type, remove_difficult = False, n_samples = None)

'''
Create a Binarizer object, and fit it to the classes names of the problem
'''
# use to encore the labels as one hot encoding
multiLabelBinarizer = preprocessing.MultiLabelBinarizer(classes=classes_names)
multiLabelBinarizer.fit(classes_names)


Based on the training_ids, load all images in memory. 
Note: this is not used by the tf_generator principle !!!

####Load all images in memory

In [0]:
''' 
Get the images based on the ids
'''
training_images = get_images(training_ids, path_image_folder, width=sq_size, height=sq_size, n_samples=None)
val_images = get_images(val_ids, path_image_folder, width=sq_size, height=sq_size, n_samples=None)

'''
convert the labels using the binarize
'''

training_labels_binarized =  multiLabelBinarizer.fit_transform(training_labels_str)
val_labels_binarized =  multiLabelBinarizer.fit_transform(val_labels_str)


At this point, images are loaded in ram, and corresponding labels are available (both in string and one hot encoded format). We verify the images/labels in the next snippet. 

In [0]:
print("Training")
print("Type = " + str(training_images[0].dtype))
print("min = " + str(np.min(training_images[0:1,:])))
print("max = " + str(np.max(training_images[0:1,:])))

# The images loaded are in float64 format
# To plot them, I first need to cast them to acceptable format
plot_matrix(training_images[0:16,:], labels = training_labels_str[0:16], sq_size=sq_size, scale = 2)

print("Validation")
plot_matrix(val_images[0:16,:], labels = val_labels_str[0:16], sq_size=sq_size, scale = 2)

we check also the matching of the labels in string and binary formats

In [0]:
index_training = 14
index_val = 12

print(training_ids[index_training])
print(training_labels_str[index_training])
print(training_labels_binarized[index_training])
print("- "*20)
print(val_ids[index_training])
print(val_labels_str[index_val])
print(val_labels_binarized[index_val])

print("--"*20)
print("Are all Training Labels well binarized: ", np.all(np.any(training_labels_binarized == 1, axis=1)))
print("Are all Validation Labels well Binarized: " , np.all(np.any(val_labels_binarized == 1, axis=1)))

We can confirm the shapes of the different datastructures

In [0]:
print("Data Structures shapes:")
print("--"*13 + "\n")
print("Training images shape         : ", training_images.shape)
print("Training images type         : ", training_images[0].dtype)
print("Training class labels length  :" , len(training_labels_str))
print("--"*40)
print("Validation images shape       : ", val_images.shape)
print("Validation images type         : ", val_images[0].dtype)
print("Validation class labels length:" , len(val_labels_str))


Have the color values between 0 and 1 (or -1 -> 1)

In [0]:
'''
Training images are 

'''
if np.max(np.max(training_images)) > 1:
    training_images = np.divide(training_images, 255.0, dtype = np.float32)
    val_images = np.divide(val_images,255.0, dtype = np.float32)
 
MEAN_TRAIN_IMAGE = np.mean(training_images, axis = 0, dtype = np.float32)
MEAN_TRAIN_IMAGE = np.expand_dims(MEAN_TRAIN_IMAGE,0)
STD_TRAIN_IMAGE  = np.std(training_images, dtype=np.float32)



print("shape MEAN TRAIN IMG = ", MEAN_TRAIN_IMAGE.shape)
print("Type MEAN TRAIN IMG = ", MEAN_TRAIN_IMAGE.dtype)
print("max MEAN TRAIN IMG = ", np.max(MEAN_TRAIN_IMAGE))
print("min MEAN TRAIN IMG = ", np.min(MEAN_TRAIN_IMAGE))

print("Type STD TRAIN IMG = ", STD_TRAIN_IMAGE.dtype)
print("STD_TRAIN_IMAGE  = ", STD_TRAIN_IMAGE)
plot_matrix(MEAN_TRAIN_IMAGE, ["mean"])


In [0]:
def dataset_remove_mean(dataset, mean = MEAN_TRAIN_IMAGE):
    '''
    remove the mean image of the training set to all image from dataset
    type : np.float32
    '''
    return (dataset - mean).astype(np.float32)   #np.apply_along_axis(_normalize, 0, dataset, mean, std)
def dataset_normalize(dataset, mean = MEAN_TRAIN_IMAGE):
    '''
    @param dataset: should be np.float32 type, limited in [0, 1]
    @return a dataset between [0,1], for which the mean image was substracted to all images
    '''
    return ((dataset_remove_mean(dataset, mean) + 1) / 2.0).astype(np.float32)

def dataset_add_mean(dataset,mean = MEAN_TRAIN_IMAGE):
    '''
    add the mean image of the training set to all image from dataset
    type : np.float32
    '''
    return (dataset + mean).astype(np.float32)

def dataset_denormalize(dataset, mean = MEAN_TRAIN_IMAGE):
    '''
    @param dataset: should be np.float32 type, limited in [0, 1]
    @return a dataset between [0,1], for which the mean image was added to all images
    typ. usage : plot_matrix(dataset_denormalize(dataset), labels, sq_size, scale = 1)
    '''
    return np.clip(dataset_add_mean(dataset*2.0 - 1, mean).astype(np.float32),0,1.0)




backup_train = training_images.copy()
backup_val = val_images.copy()


if use_mean_subtraction:
    print("Training before normalization")
    print("Type = " + str(training_images[0].dtype))
    print("min = " + str(np.min(training_images[0:1,:])))
    print("max = " + str(np.max(training_images[0:1,:])))


    training_images = dataset_normalize(training_images, MEAN_TRAIN_IMAGE)
    val_images = dataset_normalize(val_images, MEAN_TRAIN_IMAGE)

    print("Training after normalization")
    print("Type = " + str(training_images[0].dtype))
    print("min = " + str(np.min(training_images[0:1,:])))
    print("max = " + str(np.max(training_images[0:1,:])))


    print("Training after denormalization")
    print("Type = " + str(dataset_denormalize(training_images[0]).dtype))
    print("min = " + str(np.min(dataset_denormalize(training_images[0:1,:]))))
    print("max = " + str(np.max(dataset_denormalize(training_images[0:1,:]))))


    plot_matrix(dataset_denormalize(training_images[0:12,:]), labels = training_labels_str[0:12], sq_size=sq_size, scale = 1)
    plot_matrix(dataset_denormalize(val_images[0:12,:]), labels = val_labels_str[0:12], sq_size=sq_size, scale = 1)


### Data distribution analysis

In [0]:
'''
plot dataset distribution
'''
local_training_ids = get_ids('training', 'classification')
local_validation_ids = get_ids('validation', 'classification')

local_training_labels = get_class_labels_str(local_training_ids,remove_difficult = False,)
local_training_labels =  multiLabelBinarizer.fit_transform(local_training_labels)
local_val_labels = get_class_labels_str(local_validation_ids,remove_difficult = False,)
local_val_labels =  multiLabelBinarizer.fit_transform(local_val_labels)

print("VOC training ids (not enlarged) = ", len(local_training_ids))
print("VOC validation ids (not enlarged) = ", len(local_validation_ids))

local_hist_train = np.sum(local_training_labels, axis=0)
local_hist_val = np.sum(local_val_labels, axis=0)

# fig, axes = plt.subplots(2,2,sharex = True)

In [0]:
local_df = pd.DataFrame({ "class" : classes_names, \
                         "training_original" : 100*local_hist_train/np.sum(local_hist_train), \
                         "training_used": 100*np.sum(training_labels_binarized, axis=0) / np.sum(np.sum(training_labels_binarized, axis=0)), \
                         "validation_original" : 100*local_hist_val/np.sum(local_hist_val), \
                         "validation_used": 100*np.sum(val_labels_binarized, axis=0) / np.sum(np.sum(val_labels_binarized, axis=0))})
print(local_df.to_string())
fig, ax = plt.subplots(1,1, figsize = (12,6))
local_df.set_index( local_df["class"] ).plot.bar(ax = ax);

plt.show()


> How many are multilabels ?
> 

In [0]:
# print(training_labels_binarized.shape)
sum_training_labels_binarized = np.sum(training_labels_binarized, axis=-1)
# print(sum_training_labels_binarized.shape)
sum_training_labels_binarized_count=np.bincount(sum_training_labels_binarized)
# print(sum_training_labels_binarized_count)

# print(val_labels_binarized.shape)
sum_val_labels_binarized = np.sum(val_labels_binarized, axis=-1)
# print(sum_val_labels_binarized.shape)
sum_val_labels_binarized_count=np.bincount(sum_val_labels_binarized)
# print(sum_val_labels_binarized_count)

print("Proportion of multilabeled images in Classification training:\n" + str( 1- sum_training_labels_binarized_count[1] / np.sum(sum_training_labels_binarized_count) ))
print("Proportion of multilabeled images in Classification validation:\n" + str( 1- sum_val_labels_binarized_count[1] / np.sum(sum_val_labels_binarized_count) ))

Classes balance discussion



In [0]:
'''
From the number of samples, we known also the original classes weights in the training (and val)
distribution
'''
classes_weights = np.sum(training_labels_binarized == 1, axis = 0) / training_labels_binarized.shape[0]
print(classes_weights)


# Data Preparation - tensorflow & Dataframe

In order to ease the manipulation of the data, it appears important to be able to:
- easily get the images from particular classes only
- generate batches of images in an efficient way, because running from images can take a lot of time, and as training requires several tens (hundreds) of epoch runs, one may want to efficiently load the data. It has been a key exercice to try and optimize, without completely refactoring the structure, to load the data in an efficient and convenient way.
    * including data augmentation
    * including caching if possible
    * including pre-fetching

To build the input pipeline, I choose first to gather all the relevant information that have been extracted so far into a dataframe (from Pandas). A df can be later used by different generators according to the needs.


#### Selecting subset of classes

If the problem appears unbalanced, it may be good to analyze the behavior for only a subset of classes. The few code snippets below give that opportunity. 

$\Rightarrow$ It results in a dataframe containing only (identifiers of) images from subset of classes, with the labels modified accordingly

In short we:
* need to remove from the labels the classes not used (so that binarization can happen well)
* need functions to properly select images based on the selected classes


In [0]:
''' 
returns the tuple given as input cleared out from undesired classes. 
'''
def _keep_classes_only(tuple_of_classes, tuple_of_classes_to_keep):
    return tuple([ x for x in tuple_of_classes if x in tuple_of_classes_to_keep])

'''
example of used
'''
print(_keep_classes_only(('bicycle', 'car', 'cat', 'dog'), ('cat', 'dog')))
print(_keep_classes_only(('bicycle', 'horse', 'cat', 'boat', 'aeroplane'), ('cat', 'dog')))

In [0]:
'''
gets a dataframe containing only filepath and classes for given tuple of classes
'''
def get_dataframe_from_classes(my_df, tuple_of_classes):
    '''
    tuple_of_classes = tuple of string classes 
    returns a sliced dataframe containing rows of desired classes. 

    '''
    classes_of_interest_binarized =  multiLabelBinarizer.transform(( tuple_of_classes ,))[0]    
    bits_of_interest = np.where(classes_of_interest_binarized == 1)[0]
    indexes_to_retain = [index_row for index_row in range(0, len(my_df["class"])) if sum( multiLabelBinarizer.transform(( my_df["class"][index_row] ,))[0][bits_of_interest]) > 0]
    df_sliced = my_df.loc[indexes_to_retain,:].copy()
    df_sliced["class"] = [ _keep_classes_only(x, tuple_of_classes) for x in df_sliced['class']]    
    return df_sliced


Toy example of use:

In [0]:
dico = {"filename":["f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8"], 
        "class": [('aeroplane',), ('bicycle',), ('aeroplane', 'bicycle'), ('bird',), ('aeroplane', 'bird'), ('bicycle', 'bird'), ('aeroplane', 'bicycle', 'bird'), ('boat',)]}
df_dico = pd.DataFrame(data = dico)
print("Original dataframe:\n")
print(df_dico)

print("\n\nOutput dataframe:\n")
df_sliced = get_dataframe_from_classes(df_dico, ('aeroplane','bicycle'))
print(df_sliced)

####Create base dictionary

At this point, we have the tools to build the dataframe. The next code snippets:  
1. Build a dictionary
2. Build a pandas Dataframe from this dictionary
3. Build the tensorflow efficient data pipeline


The name of the variable is kept as explicit as possible

>`name` | type | description
>---|---|---
>`d_class_train` | dictionary | training dictionary for classification
>`d_class_val` | dictionary | validation dictionary for classification
>`df_class_train` | dataframe | training dataframe for classification
>`df_class_val` | dataframe | validation dataframe for classification


The two datagrames are the reference and the very input of the data pipeline preparation. 



In [0]:
'''
classification
'''
d_class_train = {'filename': [sub + '.jpg' for sub in training_ids] , 'class': training_labels_str}
d_class_val   = {'filename': [sub + '.jpg' for sub in val_ids] , 'class': val_labels_str}

df_class_train = pd.DataFrame(data=d_class_train)
df_class_val   = pd.DataFrame(data=d_class_val)

df_class_train_cat_dog= get_dataframe_from_classes(df_class_train, ('cat','dog'))
df_class_val_cat_dog = get_dataframe_from_classes(df_class_val, ('cat','dog'))




# df_class_train
# df_class_val
# df_class_train_cat_dog
# df_class_val_cat_dog

'''
Dataframe for segmentation isn't exactly the same format
suggestion TODO => change : one dictionary for all...

'''



Final step: definition of the dataframe that are used in the training

In [0]:
'''
classification part
'''
df_class_train_to_generate = df_class_train #df_class_train_cat_dog
df_class_val_to_generate = df_class_val # df_class_val_cat_dog



#### Input pipeline

So far, we have loaded images in RAM, and we have structures containing information about where is what. 

Considering the resources available, we want an input pipeline as efficient as possible. We need:
- to provide to training (later) the images and the labels
- to augment the data, as preliminary tests indicated huge overfitting, very fast. the database being rather small after all, we need to make it larger
- to do it FAST! considering the resources available (human and machine)

Two alternatives are built and benchmarked (the benchmark tests are skipped in the nominal version, results are in the presentation)

1. tf.keras.ImageDataGenerator API
2. tf.Data API

This last is the fastest, by far, and is selected (although harder to get acquainted with...)

##### From tf.Data

In [0]:
# https://www.tensorflow.org/tutorials/load_data/images

def parse_function(filename, label):
    '''
    based on the filename and label, returns a resized float32, "normalized" images from JPEGImages folder
    '''
    filename = voc_root_folder+r'/JPEGImages/'+filename
    image_string = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(image_string, channels=3)

    #This will convert to float values in [0, 1]
    #  get the input_image in float32, between 0 and 1
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.resize(image, [sq_size, sq_size])
    # image, label = classification_normalize(image, label)

    return image, label

def classification_normalize(input_image, input_label):
    '''
    SKIPPED - does not seem to help!

    pre: the MEAN_TRAIN_IMAGE shall be available in the environmnet

    input: an input_image, tensor image,  dtype = tf.float32, values between 0.0 and 1.0; 
           an input_label,
    returns: - the image input_image with the mean of the training set substracted, 
            with px values still within 0.0 and 1.0;
             - the input_label (wo transformation)
    '''
    # substract the mean
    input_image = ((input_image - MEAN_TRAIN_IMAGE) + 1) / 2.0

    return input_image[0], input_label



def classification_denormalize(input_image, input_label):
    '''
    pre: the MEAN_TRAIN_IMAGE shall be available in the environmnet

    input: an input_image, tensor image,  dtype = tf.float32, values between 0.0 and 1.0; 
           an input_label,
    returns: - the image input_image with the mean of the training set added, 
            with px values still within 0.0 and 1.0;
             - the input_label (wo transformation)
    '''
    input_image = (( (input_image*2.0) -1) + MEAN_TRAIN_IMAGE)
    input_image = tf.clip_by_value(input_image, 0.0, 1.0)
    return input_image[0], input_label



def train_classification_preprocess(image, label):
    '''
    input:  - a tensor image, dtype float32, values between 0.0 and 1.0
            - the corresponding label
    returns a preprocessed image (values clipped between 0.0 and 1.0) and corresponding label.
    The preprocessing steps are:
    - random horizontal flip
    - random brightness change
    - random saturation change
    - random contrast change
    - random cropping 
    - random rotation
    
    '''

    image = tf.image.random_flip_left_right(image)
    
    
    image = tf.image.random_brightness(image, max_delta=0.3)
    image = tf.image.random_saturation(image, lower=0.8, upper=1.2)
    image = tf.image.random_contrast(image, 0.85, 1.15)

    
    if tf.random.uniform(()) > 0.5:
        image = tf.image.resize_with_crop_or_pad(image, sq_size+25, sq_size+25) # Add 6 pixels of padding
        image = tf.image.random_crop(image, size=[sq_size, sq_size, 3])
       
    # randomly rotate image
    # Requires tfa nightly built
    if int(tfa.__version__.split('.')[1]) > 8:
        angle=tf.random.uniform(shape=[], minval=-np.pi/7, maxval=np.pi/7)
        image = tfa.image.rotate(image, angle)
        image = tf.image.central_crop(image, 0.70)
        image = tf.image.resize(image,  size = [sq_size, sq_size])


    #Make sure the image is still in [0, 1]
    image = tf.clip_by_value(image, 0.0, 1.0)
    return image, label



def get_classification_generator(dataframe, multiLabelBinarizer, to_augment = True, cache=True, model = None):
    '''
    input:  - a dataframe containing a column "filename" and a column "class" (see above)
            - a multiLabelBinarizer to convert the label into one hot encoding
            - a boolean "to_augment" indicating if the pre-processing steps needs to be applied (training: True, validation: False)
            - cache: if string, indicates where to cache. Else, cache in RAM
            - model: Not used   

    returns a tensorflow efficient and cached classification generator
    '''
    _filenames = list(dataframe["filename"])
    _labels = multiLabelBinarizer.fit_transform(tuple(dataframe["class"]))

    tf_generator = tf.data.Dataset.from_tensor_slices(( _filenames, _labels))
    tf_generator = tf_generator.map(parse_function, num_parallel_calls=AUTOTUNE)


    if isinstance(cache, str):
        tf_generator = tf_generator.cache(cache)
    else:
        tf_generator = tf_generator.cache()
    

    if to_augment:
        tf_generator = tf_generator.map(train_classification_preprocess, num_parallel_calls=AUTOTUNE)
    
    tf_generator = tf_generator.shuffle(len(_filenames), seed = 426473 )
    tf_generator = tf_generator.repeat()
    tf_generator = tf_generator.batch(BATCH_SIZE)
    tf_generator = tf_generator.prefetch(buffer_size = AUTOTUNE)
    return tf_generator



In [0]:
'''
Effectively build the generators
'''
training_set_class_tf_generator = get_classification_generator(df_class_train_to_generate, multiLabelBinarizer, to_augment = True)
val_set_class_tf_generator      = get_classification_generator(df_class_val_to_generate, multiLabelBinarizer, to_augment = False)


Toy test of the pipeline to make sure steps are well respected. This is purely local and is skipped in the nominal run.

In [0]:
'''
test the classification pipeline (specifically normalization and denormalization)
'''
test_the_classification_pipeline = False
if test_the_classification_pipeline:
    _filenames = list(df_class_train_to_generate["filename"])
    _labels = multiLabelBinarizer.fit_transform(tuple(df_class_train_to_generate["class"]))
    filename = _filenames[0]
    filename = voc_root_folder+r'/JPEGImages/'+filename
    label = _labels[0]
    image_string = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(image_string, channels=3)
    tf.print("original")
    tf.print("min = " + str(np.min(image.numpy())))
    tf.print("max = " + str(np.max(image.numpy())))
    #This will convert to float values in [0, 1]
    print(image.shape)

    image = tf.cast(image, tf.float32)/255.0
    tf.print("after convert to float32")
    tf.print("min = " + str(np.min(image.numpy())))
    tf.print("max = " + str(np.max(image.numpy())))

    image = tf.image.resize(image, [sq_size, sq_size])
    image, label = classification_normalize(image, label) #    input_image  = (input_image - MEAN_TRAIN_IMAGE) / tf.cast(tf.sqrt(STD_TRAIN_IMAGE + 1e-8), dtype = 'float32') return ...[0]
    tf.print("after normalization")
    tf.print("min = " + str(np.min(image.numpy())))
    tf.print("max = " + str(np.max(image.numpy())))

    image, label = classification_denormalize(image, label) #    input_image  = (input_image - MEAN_TRAIN_IMAGE) / tf.cast(tf.sqrt(STD_TRAIN_IMAGE + 1e-8), dtype = 'float32') return ...[0]
    tf.print("after denormalization")
    tf.print("min = " + str(np.min(image.numpy())))
    tf.print("max = " + str(np.max(image.numpy())))


##### From Keras API
Simpler but slower method to build a generator a data, using augmentation
This is not used anymore

In [0]:
'''
Alternative using simpler but slower ImageDataGenerator
'''
images_generator_training = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1./255, 
                    
                                                                            featurewise_center=False,
                                                                            featurewise_std_normalization=False,

                                                                            rotation_range = 20,
                                                                            width_shift_range = .2,
                                                                            height_shift_range = .2,
                                                                            horizontal_flip = True,
                                                                            zoom_range = 0.2,
                                                                            )

images_generator_validation = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1./255, 
                                                                              featurewise_center=False,
                                                                              featurewise_std_normalization=False)

# compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied)
# images_generator_training.fit(training_images)
# images_generator_validation.fit(training_images)

def get_classification_keras_generator(dataframe, mode, seed = 426473, BS = BS, sq_size = sq_size):
    if mode == 'training':
        return images_generator_training.flow_from_dataframe(dataframe, 
                                              directory = voc_root_folder+r'/JPEGImages/', 
                                              x_col='filename', 
                                              y_col='class', 
                                              class_mode='categorical',
                                              target_size = (sq_size, sq_size), 
                                              batch_size = BS,
                                              shuffle = True, 
                                              seed = seed,
                                              )
    elif mode == 'validation':
        return images_generator_validation.flow_from_dataframe(dataframe, 
                                              directory = voc_root_folder+r'/JPEGImages/', 
                                              x_col='filename', 
                                              y_col='class', 
                                              class_mode='categorical',
                                              target_size = (sq_size, sq_size), 
                                              batch_size = BS,
                                              shuffle = True, 
                                              seed = seed,
                                              )
    else:
        raise ValueError('mode unknown')

train_img_gen = get_classification_keras_generator(df_class_train_to_generate, mode = 'training')
val_img_gen = get_classification_keras_generator(df_class_val_to_generate, mode = 'validation')


####Benchmarking the two API's 
Benchmark of `tf.Data` and `tf.keras.preprocessing.image.ImageDataGenerator`

In [0]:
perform_benchmark = False

In [0]:
import time
default_timeit_steps = 1000

def timeit(ds, steps=default_timeit_steps):
    '''
    Credit: https://www.tensorflow.org/tutorials/load_data/images
    '''
    start = time.time()
    it = iter(ds)
    for i in range(steps):
        batch = next(it)
        if i%10 == 0:
            print('.',end='')
    print()
    end = time.time()

    duration = end-start
    print("{} batches: {} s".format(steps, duration))
    print("{:0.5f} Images/s".format(BATCH_SIZE*steps/duration))

In [0]:
if perform_benchmark:
    timeit(training_set_class_tf_generator)
    timeit(val_set_class_tf_generator)
    print("==="*25)
    timeit(train_img_gen)
    timeit(val_img_gen)

# Classification Tasks


### Data checkup

In [0]:
'''
For each classification sets (training and validation), show one batch
'''

print("Training Set")
for image_train_batch, label_train_batch in training_set_class_tf_generator.take(1):
    pass

label_train_batch_str = multiLabelBinarizer.inverse_transform(np.array(label_train_batch))

plot_matrix(image_train_batch, label_train_batch_str, scale = 2)
# plot_matrix(dataset_denormalize(image_train_batch.numpy()), label_train_batch_str, scale = 2)

print("=="*30)

print("Validaton Set")
for image_val_batch, label_val_batch in val_set_class_tf_generator.take(1):
    pass

label_val_batch_str = multiLabelBinarizer.inverse_transform(np.array(label_val_batch))

plot_matrix(image_val_batch, label_val_batch_str, scale = 2)
# plot_matrix(dataset_denormalize(image_val_batch.numpy()), label_val_batch_str, scale = 2)


In [0]:
height = sq_size
width = sq_size
channel = 3
print(height)
print(width)
print(channel)

In [0]:
'''
This cell is a sandbox
'''
print("sandbox")



Based on the probabilities of each class (training set), we should define an initializer for the bias of the last layer. 


In [0]:
class MyBiasInitializer(tf.keras.initializers.Initializer):

    #- np.log( (np.ones(classes_weights.shape) - classes_weights) / classes_weights)
    def __call__(self, shape, dtype=None):
        return tf.convert_to_tensor( - np.log( (np.ones(classes_weights.shape) - classes_weights) / classes_weights) , dtype='float32')

    def get_config(self):  # To support serialization
        return {'init_weights' : classes_weights}

### Model Class
To ease the model retrieval, we encapsulate the building of the network in a class, using static method. 
 (note: this would need to be refactored as a class inheriting from Model to improve clarity and modularity, and ease further development)

In [0]:
class Gr8ClassNet:

    @staticmethod
    def build(width, height, channel, 
              dropout = False, 
              batch_normalization = False, 
              extra_dense = False, 
              extra_convolution_batch = False, 
              l2_reg = (0,0), classification_type=classification_type, num_classes = 20):
        tf.print(classification_type)
        inputShape = (height, width, channel)

        lambda_fc = l2_reg[0]
        lambda_conv = l2_reg[1]

        '''
        Classification type used
        '''
        output_activation_function = "unknown"
        if classification_type == "single":
            output_activation_function = "softmax"
        elif classification_type == "multilabel":
            output_activation_function = "sigmoid"
        else:
            raise ValueError("classification_type value unknown")


        model = tf.keras.models.Sequential()


        # CONV => RELU => POOL
        model.add(tf.keras.layers.Conv2D(32, 
                                         (3, 3), 
                                         padding="same", 
                                         strides = (1,1),
                                         input_shape=inputShape,
                                         kernel_regularizer=tf.keras.regularizers.l2(lambda_conv),
                                         name = 'conv1',
                                         ))
        if batch_normalization:
            model.add(tf.keras.layers.BatchNormalization())

        model.add(tf.keras.layers.Activation("relu"))
        
        model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))  

        if dropout:
            model.add(tf.keras.layers.Dropout(0.3))

        # (CONV => RELU) * 2 => POOL
        ######################################################################
        model.add(tf.keras.layers.Conv2D(64, 
                                        (3, 3), 
                                        padding="same",
                                        kernel_regularizer=tf.keras.regularizers.l2(lambda_conv),
                                        name = 'conv2',) )
        if batch_normalization:
            model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Activation("relu"))


        model.add(tf.keras.layers.Conv2D(64, (3, 3), padding="same",kernel_regularizer=tf.keras.regularizers.l2(lambda_conv)))
        if batch_normalization:
            model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Activation("relu"))

        model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
        if dropout:
            model.add(tf.keras.layers.Dropout(0.3))

        # (CONV => RELU) * 2 => POOL
        model.add(tf.keras.layers.Conv2D(128, (3, 3), padding="same",kernel_regularizer=tf.keras.regularizers.l2(lambda_conv)))
        if batch_normalization:
            model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Activation("relu"))

        model.add(tf.keras.layers.Conv2D(128, (3, 3), padding="same",kernel_regularizer=tf.keras.regularizers.l2(lambda_conv)))
        if batch_normalization:
            model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Activation("relu"))

        model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
        if dropout:
            model.add(tf.keras.layers.Dropout(0.3))
  
        if extra_convolution_batch:
                    # (CONV => RELU) * 2 => POOL
            ######################################################################
            model.add(tf.keras.layers.Conv2D(192, (3, 3), padding="same",kernel_regularizer=tf.keras.regularizers.l2(lambda_conv)))
            model.add(tf.keras.layers.BatchNormalization())
            model.add(tf.keras.layers.Activation("relu"))

            model.add(tf.keras.layers.Conv2D(192, (3, 3), padding="same",kernel_regularizer=tf.keras.regularizers.l2(lambda_conv)))
            model.add(tf.keras.layers.BatchNormalization())
            model.add(tf.keras.layers.Activation("relu"))

            model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
            if dropout:
                model.add(tf.keras.layers.Dropout(0.3))
            
            
            model.add(tf.keras.layers.Conv2D(192, (3, 3), padding="same",kernel_regularizer=tf.keras.regularizers.l2(lambda_conv)))
            model.add(tf.keras.layers.BatchNormalization())
            model.add(tf.keras.layers.Activation("relu"))

            model.add(tf.keras.layers.Conv2D(192, (3, 3), padding="same",kernel_regularizer=tf.keras.regularizers.l2(lambda_conv)))
            model.add(tf.keras.layers.BatchNormalization())
            model.add(tf.keras.layers.Activation("relu"))

            model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
            if dropout:
                model.add(tf.keras.layers.Dropout(0.3))
            

        # first (and only) set of FC => RELU tf.keras.layers
        model.add(tf.keras.layers.Flatten())
        model.add(tf.keras.layers.Dense(1024, kernel_regularizer=tf.keras.regularizers.l2(lambda_fc)))
        if batch_normalization:
            model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Activation("relu"))

        if extra_dense:
            model.add(tf.keras.layers.Dense(1024, kernel_regularizer=tf.keras.regularizers.l2(lambda_fc)))
            if batch_normalization:
                model.add(tf.keras.layers.BatchNormalization())
            model.add(tf.keras.layers.Activation("relu"))

        if dropout:
            model.add(tf.keras.layers.Dropout(0.3))
        
        # use a *softmax* activation for single-label classification
        # and *sigmoid* activation for multi-label classification
        model.add(tf.keras.layers.Dense(num_classes,  kernel_regularizer=tf.keras.regularizers.l2(lambda_fc))) #bias_initializer = MyBiasInitializer(),
        model.add(tf.keras.layers.Activation(output_activation_function))
        return model
    
    @staticmethod
    def build_second(width, height, channel, 
                    dropout = False, 
                    batch_normalization = False, 
                    extra_dense = False, 
                    extra_convolution_batch = False, 
                    l2_reg = (0,0), classification_type=classification_type, num_classes = 20):

        inputShape = (height, width, channel)

        lambda_fc = l2_reg[0]
        lambda_conv = l2_reg[1]
        classification_type = "multilabel"
        output_activation_function = "sigmoid"
        model = tf.keras.models.Sequential()
        
        if batch_normalization:
            model.add(tf.keras.layers.BatchNormalization( input_shape=inputShape,))

        # CONV => RELU => POOL
        model.add(tf.keras.layers.Conv2D(64, 
                                         (5, 5), 
                                         padding="same", 
                                         strides = (2,2),
                                         input_shape=inputShape,
                                         kernel_regularizer=tf.keras.regularizers.l2(lambda_conv),
                                         name = 'conv1'))
        if batch_normalization:
            model.add(tf.keras.layers.BatchNormalization(input_shape=inputShape,))

        model.add(tf.keras.layers.Activation("relu"))
        model.add(tf.keras.layers.Conv2D(64, 
                                         (3, 3), 
                                         padding="same", 
                                         strides = (1,1),
                                        #  input_shape=inputShape,
                                         kernel_regularizer=tf.keras.regularizers.l2(lambda_conv),
                                         name = 'conv1bis'))
        if batch_normalization:
            model.add(tf.keras.layers.BatchNormalization(input_shape=inputShape,))

        model.add(tf.keras.layers.Activation("relu"))
        model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2,2)))  

        if dropout:
            model.add(tf.keras.layers.Dropout(0.3))

        # (CONV => RELU) * 2 => POOL
        ######################################################################
        model.add(tf.keras.layers.Conv2D(128, 
                                        (3, 3),
                                        strides = (1,1), 
                                        padding="same",
                                        kernel_regularizer=tf.keras.regularizers.l2(lambda_conv),
                                        name = 'conv2') )
        if batch_normalization:
            model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Activation("relu"))


        model.add(tf.keras.layers.Conv2D(128, (3, 3), padding="same",kernel_regularizer=tf.keras.regularizers.l2(lambda_conv)))
        if batch_normalization:
            model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Activation("relu"))

        model.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2), strides=(2,2)))
        if dropout:
            model.add(tf.keras.layers.Dropout(0.3))

        # (CONV => RELU) * 2 => POOL
        model.add(tf.keras.layers.Conv2D(128, (3, 3), padding="same",kernel_regularizer=tf.keras.regularizers.l2(lambda_conv)))
        if batch_normalization:
            model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Activation("relu"))

        model.add(tf.keras.layers.Conv2D(128, (3, 3), padding="same",kernel_regularizer=tf.keras.regularizers.l2(lambda_conv)))
        if batch_normalization:
            model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Activation("relu"))
        if dropout:
            model.add(tf.keras.layers.Dropout(0.3))
  
        if extra_convolution_batch:
                    # (CONV => RELU) * 2 => POOL
            ######################################################################
            model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2,2)))

            model.add(tf.keras.layers.Conv2D(256, (3, 3), padding="same",kernel_regularizer=tf.keras.regularizers.l2(lambda_conv)))
            model.add(tf.keras.layers.BatchNormalization())
            model.add(tf.keras.layers.Activation("relu"))

            model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2,2)))

            model.add(tf.keras.layers.Conv2D(256, (3, 3), padding="same",kernel_regularizer=tf.keras.regularizers.l2(lambda_conv)))
            model.add(tf.keras.layers.BatchNormalization())
            model.add(tf.keras.layers.Activation("relu"))

            # model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2,2)))

            # model.add(tf.keras.layers.Conv2D(512, (3, 3), padding="valid",kernel_regularizer=tf.keras.regularizers.l2(lambda_conv)))
            # model.add(tf.keras.layers.BatchNormalization())
            # model.add(tf.keras.layers.Activation("relu"))
            
            if dropout:
                model.add(tf.keras.layers.Dropout(0.3))

        # first (and only) set of FC => RELU tf.keras.layers
        model.add(tf.keras.layers.GlobalMaxPooling2D())
        # model.add(tf.keras.layers.Dense(512, kernel_regularizer=tf.keras.regularizers.l2(lambda_fc)))
        # model.add(tf.keras.layers.Activation("relu"))
        
        if extra_dense:
            model.add(tf.keras.layers.Dense(512, kernel_regularizer=tf.keras.regularizers.l2(lambda_fc)))
            if batch_normalization:
                model.add(tf.keras.layers.BatchNormalization())
            model.add(tf.keras.layers.Activation("relu"))

            # model.add(tf.keras.layers.Dense(512, kernel_regularizer=tf.keras.regularizers.l2(lambda_fc)))
            # if batch_normalization:
            #     model.add(tf.keras.layers.BatchNormalization())
            # model.add(tf.keras.layers.Activation("relu"))

        
        # use a *softmax* activation for single-label classification
        # and *sigmoid* activation for multi-label classification
        model.add(tf.keras.layers.Dense(num_classes, kernel_regularizer=tf.keras.regularizers.l2(lambda_fc))) #bias_initializer = MyBiasInitializer,
        model.add(tf.keras.layers.Activation(output_activation_function))
        return model
    


Definition of a dictionnary to contain all the models histories

In [0]:
# try:
#     histories = pickle.load(open(histories_filename, "rb")
# except: 
#     print("No histories path found, creating a new one instead")
#     

In [0]:
INIT_LR = 1e-4
BS = 32
STEPS_PER_EPOCH = 100

lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(INIT_LR,  
                                                             decay_steps=STEPS_PER_EPOCH*100,  
                                                             decay_rate=1,  
                                                             staircase=True)

def get_optimizer():
  return tf.keras.optimizers.Adam(lr_schedule)

In [0]:
step = np.linspace(0,100000)
lr = lr_schedule(step)
plt.figure(figsize = (8,6))
plt.plot(step/STEPS_PER_EPOCH, lr)
plt.ylim([0,max(plt.ylim())])
plt.xlabel('Epoch')
_ = plt.ylabel('Learning Rate')

In [0]:
# model.save_weights(filename)
# model.load_weights(filename)

### Callbacks 
 Here, I create a custom callback class (following [Callbacks](https://www.tensorflow.org/guide/keras/custom_callback#introduction_to_keras_callbacks)) in order to retrieve information during training, and implement early stopping. 
=> not needed; it exists natively in tensorflow : [Early Stopping] (https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/callbacks/EarlyStopping)

 

In [0]:
%load_ext tensorboard
!mkdir logs
logdir = r'logs/'

Early Stopping callbacks, on the two losses.

In [0]:
callback_earlyStopping_val = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
callback_earlyStopping_train = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

my_callbacks = [callback_earlyStopping_train, callback_earlyStopping_val, ] #tensorboard_callback

### Metrics

In the multilabel case, binary accuracy may be used as a metric to monitor the training. Another interesting value is also the jaccard index. Snippet code below computes the Jaccard index, and shows in a toy example the interest

Interestingly, it is usually used in the context of semantic segmentation (see next task) but we may as well use it for multilabel classification, see [Sklearn](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.jaccard_score.html)

In [0]:
def jaccard_index(y_true, y_pred):
    """
    >CREDIT : https://github.com/keras-team/keras-contrib/blob/master/keras_contrib/losses/jaccard.py
    > 
    
    Modified original description below ==> see source
    Also known as the intersection-over-union loss.
    [...]
    Jaccard = (|X & Y|)/ (|X|+ |Y| - |X & Y|)
            = sum(|A*B|)/(sum(|A|)+sum(|B|)-sum(|A*B|))
    # Arguments
        y_true: The ground truth tensor.
        y_pred: The predicted tensor
    # Returns
        The Jaccard index between the two tensors.
    # References
        - [What is a good evaluation measure for semantic segmentation?](
           http://www.bmva.org/bmvc/2013/Papers/paper0032/paper0032.pdf)
    """
    # y_pred = tf.round(y_pred)
    intersection = tf.math.reduce_sum(tf.math.abs(y_true * y_pred), axis=-1)
    sum_ = tf.math.reduce_sum(tf.math.abs(y_true) + tf.math.abs(y_pred), axis=-1)
    jac = intersection / (sum_ - intersection + 1e-8)
    return jac

def jaccard_loss(y_true, y_pred):
    return 1 - jaccard_index(y_true, y_pred)


In [0]:
print("Comparison between three metrics for a toy problem, with 4 possible classes (multilabel)")
# y0_true = np.array([ [1,1,0,1], [0,0,1,0], [0,1,1,0]])
# y0_pred = np.array([ [1,0,0,1], [0,1,0,0], [0,1,1,0]])

# y0_true_tf = tf.convert_to_tensor(y0_true, dtype=tf.float32)
# y0_pred_tf = tf.convert_to_tensor(y0_pred, dtype=tf.float32)

y0_true = np.array([ [  1, 0,   0,]])
# y0_pred = np.array([ [.54, 0.4, .12,.51,]]) #, [0,0.6,0.1,0], [0.2,0.7,0.2,0.2]
y0_pred = np.array([ [1, 0, 0.4,]]) #, [0,0.6,0.1,0], [0.2,0.7,0.2,0.2]

y0_true_tf = tf.convert_to_tensor(y0_true, dtype=tf.float32)
y0_pred_tf = tf.convert_to_tensor(y0_pred, dtype=tf.float32)

print("True labels: ")
tf.print(y0_true_tf)
print("\nPredictions:")
tf.print(y0_pred_tf)
tf.print("=="*30)

v = np.linspace(0,1,20)
print(v)
y0_pred_v = np.tile(y0_pred, (20,1))
y0_pred_v[:,2] = v
y0_pred_tf_v = tf.convert_to_tensor(y0_pred_v, dtype=tf.float32)

res = np.zeros((20,6))
print(res.shape)
res[:,0] = jaccard_index(y0_true_tf, y0_pred_tf_v)

res[:,1] = 1- tfa.metrics.hamming.hamming_loss_fn(y0_true_tf, y0_pred_tf_v, threshold = 0.25, mode = 'multilabel')
res[:,2] = 1- tfa.metrics.hamming.hamming_loss_fn(y0_true_tf, y0_pred_tf_v, threshold = 0.50, mode = 'multilabel')
res[:,3] = 1- tfa.metrics.hamming.hamming_loss_fn(y0_true_tf, y0_pred_tf_v, threshold = 0.75, mode = 'multilabel')
res[:,4] = tf.keras.metrics.binary_accuracy(y0_true_tf, y0_pred_tf_v)
res[:,5] = tf.keras.metrics.categorical_accuracy(y0_true_tf,y0_pred_tf_v)
fig, ax = plt.subplots(1,1,figsize = (6,6))
for i in np.arange(res.shape[1]):
    ax.plot(v, res[:,i], '.-')

ax.legend(["Jaccard index",  "hamming loss (th=0.25)", "hamming loss (th=0.50)", "hamming loss (th=0.75)","Binary accuracy", "Categorical accuracy",],
          loc = "lower left")
ax.set_title("Variation of metrics with the variation of one prediction output")
ax.set_xlabel(" value of \'k\' prediction")
ax.set_ylabel(" metric score")
ax.set_ylim(0.45, 1.05)
ax.annotate("pred = [1,0, k ]\ntrue = [1, 0, 0]", (0,0.9))

plt.show()


tf.keras.metrics.categorical_accuracy(tf.convert_to_tensor(np.array(np.array([1,1,0,1]))), tf.convert_to_tensor(np.array(np.array([1,0,0,1]))))


###Model_v1
"simple" as only convolution and pooling layers
- CONVOLUTION: it learns "K" spatial filters, of a size F defined (2 parameters, usually square), applied spatially with a defined strides. The filters apply on the whole volume. As one filter is applied (= convolved) on the input volume, it builds a 2 dimensional activation map, as the response of the convolution applied on the input volume. 
In [CS231n](https://cs231n.github.io/convolutional-networks/),  several animation shows exactly how the computation are done.
The more conv layers, the more complicated features can be learned, but the more complicaed the training is. 


- POOLING: Usually max pooling or average pooling. Their goal is to decrease dimension, by keeping only the max (or average, typ.) of a spatial area. 
It is often not popular, because it induces a loss of information. Typically, certain architecture either remove those, using other mechanism to decrease dimensions, or implement some "skip" connections, as we'll see in the segmentation task.

- FC layers: Fully Connected layer, similarly to "regular" shallow feedforward networks, the FC layers are the head of the network, performing the classification based on the features learned already by the convolution part. 
There also, some research are in favor of going fully convolutional and remove the FC part, as these layers contains the majority of the weights. (the connection matrices are huge compared to CONV layers). 



In [0]:
# [DOES NOT WORK] opt =  tf.keras.optimizers.Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
# [DOES NOT WORK] opt =  tf.keras.optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)

my_loss= tf.keras.losses.binary_crossentropy
# # Compile the model

model_v1 = Gr8ClassNet.build(width, height, channel, 
                            l2_reg = (0,0),
                            num_classes = 20)
model_v1.summary()

model_v1.compile( optimizer= tf.keras.optimizers.Adam(lr = 0.001), # get_optimizer(), #
                        loss= my_loss, # to explain my_loss
                        metrics=[jaccard_index, 'accuracy'])

In [0]:
# %tensorboard --logdir logs

In [0]:
# simple
# train_model = True
if train_class_model_v1 == True:
    history_model_v1= model_v1.fit(   train_img_gen, #training_set_class_tf_generator,
                                                        steps_per_epoch= len(training_ids) // BS,
                                                        epochs=EPOCHS, 
                                                        callbacks = my_callbacks,
                                                        validation_data=val_set_class_tf_generator,
                                                        validation_steps = len(val_ids) // BS)
    model_v1.save_weights(model_v1_save_filename)
    histories["model_v1"] = history_model_v1.history
    pickle.dump(histories["model_v1"], open(model_v1_hist_save_filename, 'wb'))
else:
    model_v1.load_weights(model_v1_save_filename)
    histories['model_v1'] = pickle.load(open(model_v1_hist_save_filename, 'rb'))

In [0]:
try:
    fig, axes = plt.subplots(2,1,figsize=(8,8))
    axes[0].plot(histories["model_v1"]['jaccard_index'], label='jaccard_index')
    axes[0].plot(histories["model_v1"]['val_jaccard_index'], label = 'val_jaccard_index')
    axes[0].plot(histories["model_v1"]['accuracy'], label='accuracy')
    axes[0].plot(histories["model_v1"]['val_accuracy'], label = 'val_accuracy')
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Metric')
    axes[0].set_ylim([0.0, 1])
    axes[0].legend(loc='upper right')
    axes[1].plot(histories["model_v1"]['loss'], label='loss')
    axes[1].plot(histories["model_v1"]['val_loss'], label = 'val_loss')
    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('Binary Cross-entropy')
    axes[1].set_ylim([0.0, 1])
    axes[1].legend(loc='upper right')
    fig.suptitle('Model From Scratch - First architecture')

except:
    print("There was a problem when attempting to plot the curve.")
finally:
    print(len(val_images))
    scores = model_v1.evaluate(val_images,  val_labels_binarized, verbose=1)


In [0]:
'''
Get first layer filters
'''
random_model = Gr8ClassNet.build(width, height, channel)
first_layer = random_model.get_layer(name = 'conv1')
filters = random_model.get_weights()
filters = filters[0]
filters_matrix = np.zeros((filters.shape[3],filters.shape[0],filters.shape[1],filters.shape[2]), dtype=np.float32)
f_min, f_max = filters.min(), filters.max()
filters = (filters - f_min) / (f_max - f_min)

for i in range(filters.shape[3]):
    filters_matrix[i,:,:,:] = filters[:,:,:,i]
# plot results
plot_matrix(filters_matrix[:,:,:,:], sq_size=filters.shape[0], h = 2, w = 16)
# plot_matrix(filters_matrix[:,:,:,1], sq_size=filters.shape[0], h = 2, w = 16)
# plot_matrix(filters_matrix[:,:,:,2], sq_size=filters.shape[0], h = 2, w = 16)

''' 
after training
'''
print("=="*50)
# model_v1.load_weights(model_v1_save_filename)
first_layer = model_v1.get_layer(name = 'conv1')
filters, biases = first_layer.get_weights()

f_min, f_max = filters.min(), filters.max()
filters = (filters - f_min) / (f_max - f_min)
filters_matrix = np.zeros((filters.shape[3],filters.shape[0],filters.shape[1],filters.shape[2]), dtype =np.float32)
for i in range(filters.shape[3]):
    filters_matrix[i,:,:,:] = filters[:,:,:,i]
# plot results
plot_matrix(filters_matrix[:,:,:,:], sq_size=filters.shape[0], h = 2, w = 16)
# plot_matrix(filters_matrix[:,:,:,1], sq_size=filters.shape[0], h = 2, w = 16)
# plot_matrix(filters_matrix[:,:,:,2], sq_size=filters.shape[0], h = 2, w = 16)

In [0]:
y_hat = model_v1.predict(val_images)
y_hat_round = np.round(y_hat)
print("Are all the classes at least predicted once ?")
print(y_hat_round.any(axis=0))
# with np.printoptions(threshold=np.inf, ):
#     print(np.count_nonzero(y_hat_round, axis = 1))
print("All classes: ")
print(classes_names)


with np.printoptions(precision=1,linewidth=100): # np.set_printoptions(precision=1,linewidth=100)
    print(np.round(y_hat[1:2,:],1))
    print(np.round(val_labels_binarized[1:2,:],1))
    # print(val_labels_str[1:2])
    plot_matrix(val_images[1:2, :, :, :], val_labels_str[1:2], scale = 3)


    print(np.round(y_hat[4:5,:],1))
    print(np.round(val_labels_binarized[4:5,:],1))
    # print(val_labels_str[4:5])
    plot_matrix(val_images[4:5, :, :, :], val_labels_str[4:5], scale = 3)


It shows that, considering this (complicated) sigmoid multilabel problem:
- it does not always predict the same result (it's good)
- it indeed is able to predict two labels for a single sample, hence the multilabel (it's good)
- sometimes, it does not predict anything, for instance on sample 0 (it's bad)
This charactics is intrisically linked to the multilabeling problem: using the sigmoid output activation function, the labels are independant, and the network is not forced to output one of the classes - it can output none. This is not desirable, but it can happen in this case. 

In [0]:
val_batch, label_val_batch = next(iter(val_set_class_tf_generator))

y_pred = model_v1.predict(val_batch[4:10])
print(multiLabelBinarizer.inverse_transform(np.array(label_val_batch[4:10])))
print(multiLabelBinarizer.inverse_transform(np.array(np.round(y_pred))))

<!-- Score above isn't great, and moreover, it indicates a large gap between training and validation scores. Let's tackle this issue - overfitting - by two differents means:
1- data augmentation 
2- dropout -->

#### local sandbox 
*Test* on augmentation data

In [0]:
# the image training data are already rescaled 1/255
# to confirm by plotting
test_generators = False
def plotImages(images_arr):
    fig, axes = plt.subplots(1, 5, figsize=(20,20))
    axes = axes.flatten()
    for img, ax in zip( images_arr, axes):
        ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        ax.axis('off')
    plt.tight_layout()
    plt.show()

if test_generators:
    image_generator_horflip = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1./1, horizontal_flip = True)
    image_generator_rot     = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1./1, rotation_range=45)
    image_generator_zoom    = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1./1, zoom_range=0.5) # zoom_range from 0 - 1 where 1 = 100%.


    train_data_gen_horflip  = image_generator_horflip.flow( training_images, 
                                                            training_labels_binarized,
                                                            seed = 426473)
    train_data_gen_rot  = image_generator_rot.flow( training_images, 
                                                    training_labels_binarized,
                                                    seed = 374624)

    train_data_gen_zoom = image_generator_zoom.flow( training_images, 
                                                    training_labels_binarized, 
                                                    seed = 0)


    augmented_images_horflip = [train_data_gen_horflip[0][0][0] for i in range(5)]
    augmented_images_rot     = [train_data_gen_rot[0][0][0] for i in range(5)]
    augmented_images_zoom    = [train_data_gen_zoom[0][0][0] for i in range(5)]

    # This function will plot images in the form of a grid with 1 row and 5 columns where images are placed in each column.

    plotImages(augmented_images_horflip)
    plotImages(augmented_images_rot)
    plotImages(augmented_images_zoom)
# plot_matrix(np.array(augmented_images_zoom)*255.0, sq_size = sq_size)

Before setting up the training: time decay learning rate

### Model v2
**Add** drop out, batch normalization and weight regularization

Although drop out and batch normalization don't (always) get along:
see [Understanding the Disharmony between Dropout and Batch Normalization by Variance Shift](https://arxiv.org/abs/1801.05134)

In [0]:
model_v2 = Gr8ClassNet.build(width, height, channel, 
                             dropout = True, 
                             batch_normalization = True, 
                             extra_convolution_batch=False, 
                             extra_dense = False, 
                             l2_reg=(1e-3,1e-3))
model_v2.summary()

In [0]:
# opt =  tf.keras.optimizers.Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
# opt =  tf.keras.optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
# opt = tf.keras.optimizers.RMSprop()

if classification_type == "multilabel":
    my_loss=tf.keras.losses.binary_crossentropy
elif classification_type == "single":
    my_loss=tf.keras.losses.categorical_crossentropy
    # my_loss=tf.keras.losses.kullback_leibler_divergence

model_v2.compile(optimizer= tf.keras.optimizers.Adam(0.0001), #,
                loss=my_loss, # to explain
                metrics=[jaccard_index, 'accuracy'])


# EPOCHS = 200

if train_class_model_v2 == True:
    history_model_v2 = model_v2.fit(   train_img_gen, #training_set_class_tf_generator, 
                                            steps_per_epoch= len(df_class_train_to_generate["filename"]) // BS,
                                            epochs=EPOCHS, 
                                            callbacks=[my_callbacks],
                                            validation_data= val_set_class_tf_generator, #val_img_gen, #
                                            validation_steps = len(df_class_val_to_generate["filename"]) // BS,
    )
    model_v2.save_weights(model_v2_save_filename)
    histories["model_v2"] = history_model_v2.history
    pickle.dump(histories["model_v2"], open(model_v2_hist_save_filename, 'wb'))

else:
    model_v2.load_weights(model_v2_save_filename)
    histories['model_v2'] = pickle.load(open(model_v2_hist_save_filename, 'rb'))


In [0]:
try:
    fig, axes = plt.subplots(2,1,figsize=(8,8))
    axes[0].plot(histories["model_v2"]['jaccard_index'], label='jaccard_index')
    axes[0].plot(histories["model_v2"]['val_jaccard_index'], label = 'val_jaccard_index')
    axes[0].plot(histories["model_v2"]['accuracy'], label='accuracy')
    axes[0].plot(histories["model_v2"]['val_accuracy'], label = 'val_accuracy')
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Metric')
    axes[0].set_ylim([0.0, 1])
    axes[0].legend(loc='upper right')
    axes[1].plot(histories["model_v2"]['loss'], label='loss')
    axes[1].plot(histories["model_v2"]['val_loss'], label = 'val_loss')
    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('Binary Cross-entropy')
    axes[1].set_ylim([0.0, 1])
    axes[1].legend(loc='upper right')
    fig.suptitle('Model v2')

except:
    print("There was an error during the plot")
finally:
    scores = model_v2.evaluate(val_images,  val_labels_binarized, verbose=1)


In [0]:
first_layer = model_v2.get_layer(name = 'conv1')
filters = first_layer.get_weights()
filters = filters[0]
print(" Shape of filters - conv1" + str(filters.shape))

# normalizing before showing
f_min, f_max = filters.min(), filters.max()
filters = (filters - f_min) / (f_max - f_min)
filters_matrix = np.zeros((filters.shape[3],filters.shape[0],filters.shape[1],filters.shape[2]), dtype =np.float32)
for i in range(filters.shape[3]):
    filters_matrix[i,:,:,:] = filters[:,:,:,i]
# plot results
plot_matrix(filters_matrix[0:20,:,:,:], sq_size=filters.shape[0], h=2, w=10)
# plot_matrix(filters_matrix[0:9,:,:,1], sq_size=filters.shape[0])
# plot_matrix(filters_matrix[0:9,:,:,2], sq_size=filters.shape[0])
print("===" * 30)


second_layer = model_v2.get_layer(name = 'conv2')
filters = second_layer.get_weights()
filters = filters[0]
f_min, f_max = filters.min(), filters.max()
filters = (filters - f_min) / (f_max - f_min)
print(" Shape of filters - conv2" + str(filters.shape))
filters_matrix = np.zeros((filters.shape[2]*filters.shape[3],filters.shape[0],filters.shape[1]), dtype =np.float32)
for j in range(filters.shape[3]):
    for i in range(filters.shape[2]):
        filters_matrix[j*i,:,:] = filters[:,:,i,j]
print(filters_matrix.shape)
plot_matrix(filters_matrix[0:20,:], sq_size=filters.shape[0], h = 2, w = 10)

In [0]:
y_hat = model_v2.predict(val_images)

print("Are all the classes at least predicted once ?")
print(y_hat_round.any(axis=0))
# with np.printoptions(threshold=np.inf, ):
#     print(np.count_nonzero(y_hat_round, axis = 1))
print("All classes: ")
print(classes_names)


with np.printoptions(precision=1,linewidth=100): # np.set_printoptions(precision=1,linewidth=100)
    print(np.round(y_hat[1:2,:],1))
    print(np.round(val_labels_binarized[1:2,:],1))
    # print(val_labels_str[1:2])
    plot_matrix(val_images[1:2, :, :, :], val_labels_str[1:2], scale = 3)


    print(np.round(y_hat[4:5,:],1))
    print(np.round(val_labels_binarized[4:5,:],1))
    # print(val_labels_str[4:5])
    plot_matrix(val_images[4:5, :, :, :], val_labels_str[4:5], scale = 3)


### Model v3
Add extra convolution

In [0]:
# model_v3 = tf.keras.Sequential()

# model_v3.add(tf.keras.layers.Conv2D(filters=64, kernel_size=(5, 5),  input_shape=(width,height,3)))
# model_v3.add(tf.keras.layers.Activation('relu'))
# model_v3.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
# model_v3.add(tf.keras.layers.Dropout(0.25))
# model_v3.add(tf.keras.layers.Conv2D(filters=128, kernel_size=(3, 3),))
# # model_v3.add(tf.keras.layers.BatchNormalization())
# model_v3.add(tf.keras.layers.Activation('relu'))


# model_v3.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
# model_v3.add(tf.keras.layers.Dropout(0.25))


# model_v3.add(tf.keras.layers.Conv2D(filters=256, kernel_size=(3, 3), ))
# # model_v3.add(tf.keras.layers.BatchNormalization())
# model_v3.add(tf.keras.layers.Activation('relu'))
# model_v3.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
# model_v3.add(tf.keras.layers.Dropout(0.25))
# model_v3.add(tf.keras.layers.Conv2D(filters=256, kernel_size=(3, 3),))
# # model_v3.add(tf.keras.layers.BatchNormalization())
# model_v3.add(tf.keras.layers.Activation('relu'))
# model_v3.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))

# # model_v3.add(tf.keras.layers.GlobalMaxPool2D())
# model_v3.add(tf.keras.layers.Dropout(0.25))


# model_v3.add(tf.keras.layers.Flatten())
# model_v3.add(tf.keras.layers.Dense(512))
# model_v3.add(tf.keras.layers.Activation('relu'))

# model_v3.add(tf.keras.layers.Dropout(0.5))
# # model_v3.add(tf.keras.layers.Dense(128, activation='relu'))
# # model_v3.add(tf.keras.layers.Dropout(0.5))
# model_v3.add(tf.keras.layers.Dense(20, activation='sigmoid'))

In [0]:
model_v3 = Gr8ClassNet.build_second(width, height, channel, 
                             dropout = False, 
                             batch_normalization = True, 
                             extra_convolution_batch=True, 
                             extra_dense = False, 
                             l2_reg=(0.0001,0.00005))
model_v3.summary()
# train_model = False

In [0]:
if classification_type == "multilabel":
    my_loss=tf.keras.losses.binary_crossentropy
elif classification_type == "single":
    my_loss=tf.keras.losses.categorical_crossentropy
    # my_loss=tf.keras.losses.kullback_leibler_divergence

model_v3.compile(optimizer=  tf.keras.optimizers.Adam(0.0001), #get_optimizer(), #
                                    loss=my_loss, # to explain
                                    metrics=[jaccard_index, 'accuracy'])

# EPOCHS = 200

if train_class_model_v3 == True:
    history_model_v3 = model_v3.fit(   train_img_gen,  #, # training_set_class_tf_generator
                                            steps_per_epoch= len(df_class_train_to_generate["filename"]) // BS,
                                            epochs=EPOCHS, 
                                            callbacks=[my_callbacks],
                                            validation_data= val_set_class_tf_generator, #val_img_gen, #
                                            validation_steps = len(df_class_val_to_generate["filename"]) // BS,
    )
    model_v3.save_weights(model_v3_save_filename)
    histories["model_v3"] = history_model_v3.history
    pickle.dump(histories["model_v3"], open(model_v3_hist_save_filename, 'wb'))

else:
    model_v3.load_weights(model_v3_save_filename)
    histories['model_v3'] = pickle.load(open(model_v3_hist_save_filename, 'rb'))


layers_model_v3 = [(layer, layer.name, layer.output_shape) for layer in model_v3.layers]
pd.DataFrame(layers_model_v3, columns=['Layer Type', 'Layer Name', 'Layer output shape'])  

In [0]:
try:
    fig, axes = plt.subplots(2,1,figsize=(8,8))
    axes[0].plot(histories["model_v3"]['jaccard_index'], label='jaccard_index')
    axes[0].plot(histories["model_v3"]['val_jaccard_index'], label = 'val_jaccard_index')
    axes[0].plot(histories["model_v3"]['accuracy'], label='accuracy')
    axes[0].plot(histories["model_v3"]['val_accuracy'], label = 'val_accuracy')
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Metric')
    axes[0].set_ylim([0.0, 1])
    axes[0].legend(loc='upper right')
    axes[1].plot(histories["model_v3"]['loss'], label='loss')
    axes[1].plot(histories["model_v3"]['val_loss'], label = 'val_loss')
    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('Binary Cross-entropy')
    axes[1].set_ylim([0.0, 1])
    axes[1].legend(loc='upper right')
    fig.suptitle('Model v3')
except:
    print("There was an error during the plot")
finally:
    scores = model_v3.evaluate(val_images,  val_labels_binarized, verbose=1)

As previously, we plot filters of first and second layers

With this (slightly) deeper network, the same kind of results are reached - maybe a bit better.  

Clearly, there is overfitting. After several trial/errors, it seems complicated, considering the limited amount of original data, to learn enough to train as many classifiers - as multilabeling essentially is as many classifier as there are different classes to predict


In [0]:
model_v3.summary()

In [0]:
print("\n================================First Layer================================\n")

first_layer = model_v3.get_layer(name = 'conv1')
filters = first_layer.get_weights()
filters = filters[0]

f_min, f_max = filters.min(), filters.max()
filters = (filters - f_min) / (f_max - f_min)
filters_matrix = np.zeros((filters.shape[3],filters.shape[0],filters.shape[1],filters.shape[2]))
for i in range(filters.shape[3]):
    filters_matrix[i,:,:,:] = filters[:,:,:,i]
# plot results
plot_matrix(filters_matrix[0:32,:,:,0], sq_size=filters.shape[0], h=2, w=16)
plot_matrix(filters_matrix[0:32,:,:,1], sq_size=filters.shape[0], h=2, w=16)
plot_matrix(filters_matrix[0:32,:,:,2], sq_size=filters.shape[0], h=2, w=16)
plot_matrix(filters_matrix[0:32,:,:,:], sq_size=filters.shape[0], h=2, w=16)

print("\n===============================Second Layer================================\n")
second_layer = model_v3.get_layer(name = 'conv2')
filters, biases = second_layer.get_weights()
f_min, f_max = filters.min(), filters.max()
filters = (filters - f_min) / (f_max - f_min)
print(filters.shape)
filters_matrix = np.zeros((filters.shape[2]*filters.shape[3],filters.shape[0],filters.shape[1]), np.float32)
for j in range(filters.shape[3]):
    for i in range(filters.shape[2]):
        filters_matrix[j*i,:,:] = filters[:,:,i,j]
print(filters_matrix.shape)
plot_matrix(filters_matrix[0:32,:], sq_size=filters.shape[0], h=2, w=16)


#### Analysis using confusion matrix 
Multilabel, Multiclass => consider the labels independently - and simple confusion matrix per label

``` 
           |   Predictions |
           |___-___|___+___|
   T | Neg |  TN   |   FP  |
   R |_____|_ _ _ _|_ _ _ _|
   U | Pos |  FN   |   TP  |
   E |_____|_______|_______|
 ```
A confusion matrix is a nice tool in order to observe the behaviour of the classifier on the validation set, and in particular the behavior between classes




In [0]:
'''
Dictionary containing the Conf matrix for all labels
credit: https://stackoverflow.com/questions/53886370/multi-class-multi-label-confusion-matrix-with-sklearn

'''

'''
create the dictionnaries that will contain the confusion matrix
'''
conf_mat_dict={}
conf_mat_dict_norm = {}
conf_mat_dict_prefect_norm = {}

'''
For clarity, rename classes_names => labels
'''
labels = classes_names

''' 
Predict the labels for images on the validation set
'''
y_pred = model_v3.predict(val_images)
y_pred = np.round(y_pred)

'''
Gather true values
'''
y_true = val_labels_binarized


'''
Check consistency of shapes
'''
print("y_true shape =" + str(y_true.shape))
print("y_pred shape = " + str(y_pred.shape))


'''
Compute all the confusion matrices, one per class
'''
for label_col in range(len(labels)):
    y_true_label = y_true[:, label_col]
    y_pred_label = y_pred[:, label_col]
    conf_mat_dict[labels[label_col]] = sklearn.metrics.confusion_matrix(y_true=y_true_label, y_pred=y_pred_label)
    conf_mat_dict_norm[labels[label_col]] = sklearn.metrics.confusion_matrix(y_true=y_true_label, y_pred=y_pred_label,  normalize = 'true')

'''
Nicely plot the results to ease interpretation
'''
fig, axes = plt.subplots(4,5, figsize = (15,12))
counter = 0
for i in range(4):
    for j in range(5):
        label = labels[counter]
        cf_matrix = conf_mat_dict_norm[label] #_norm
        group_names = ['TN','FP','FN','TP']
        group_counts = ["{0:0.3f}".format(value) for value in cf_matrix.flatten()]
        # group_percentages = ["{0:.2%}".format(value) for value in cf_matrix.flatten()/np.sum(cf_matrix)]
        local_labels = [f"{v1}\n{v2}" for v1, v2 in zip(group_names,group_counts)]
        local_labels = np.asarray(local_labels).reshape(2,2)
        sns.heatmap(cf_matrix, annot=local_labels, fmt='', ax=axes[i,j])
        # axes[i,j].matshow(conf_mat_dict_norm[label])    
        axes[i,j].set_ylabel("True")
        axes[i,j].set_xlabel("Predicted")
        axes[i,j].set_title(str(label), fontsize = 14)
        counter +=1


## Classification using Transfer Learning

the approach followed relies on [tensorflow documentation](https://www.tensorflow.org/tutorials/images/transfer_learning)

For this part, I reuse the input data pipelines already built. 






### Pre-trained CNN model as Feature Extractor
- load a model (MobileNetV2). Attempts were made with Restnet (much slower) and VGG16 (worst results)
- freeze convolution blocks

We can add a flattening layer in order to feed the head part. The bottom part is therefore already trained, and is the feature extraction part. The second (last) part, the head, is the classifier that we can train based on our dataset.

The plan is:
1. Get the base_model, already trained
2. Complete the base with the classifier, the head
3. With the base_model set as non trainable (!!), train the complete model based on our data. The goal is to find a set of weights that achieve a good accuracy
4. IF there is no overfitting, we can relax the non-trainability of the base_model, lower the learning speed, and fine-tune the weigts to try and find a better optimum. This is risky as we might as well "unlearn". This is therefore important that there is no overfitting at that point, and that the learning rate is kept small.


The main interest of Transfer Learning is that the re-use of an already trained feature extractor. It is interesting to observe the behavior of such a network, beforehand.

In [0]:
'''
Ensure the parameters
'''
num_classes = 20;
if classification_type == "single":
    output_activation ='softmax' #softmax if "single"
elif classification_type == "multilabel":
    output_activation = 'sigmoid' # if multilabel <<<<<<<<<
else:
    raise ValueError(classification_type)

####Get the base model

In [0]:
'''
# Create the base model from the pre-trained model MobileNet V2
'''
print('('+str(height) + "," + str(width) +', ' + str(channel) + ")")

'''
Creation of the Pre-trained model => MobileNet
'''
base_model = tf.keras.applications.MobileNetV2(input_shape=(height, width, channel),
                                               include_top=False,
                                               weights='imagenet')

# base_model = tf.keras.applications.vgg16.VGG16(include_top=False, weights='imagenet', 
#                                      input_shape=(height, width, channel))

# base_model = tf.keras.applications.ResNet152V2(include_top=False, weights='imagenet', 
#                                      input_shape=(height, width, channel))

'''
Make the model not trainable (first)
'''
base_model.trainable = False
for layer in base_model.layers:
    layer.trainable = False

layers_mobile_net = [(layer, layer.name, layer.trainable, layer.output_shape) for layer in base_model.layers]
df_tmp = pd.DataFrame(layers_mobile_net, columns=['Layer Type', 'Layer Name', 'Layer Trainable', 'Layer output shape'])   



As an example, we take two images from the (augmented) training set, and we show the bottleneck features: the features at the output of the pre-trained model. 

- First, we show the corresponding training image
- Second, we show the dimension of the layer, 
- Third we show the 25th first features. 

We repeat that for two images, to observe the difference. 

Those bottleneck features constitute what goes in the head, the classifier. This is an "automatically learnt" feature, in contrast with HOG or PCA, from previous assignment, that were handcrafted. In the "From scratch" problem, the goal was to obtain also this feature extraction part: here, we reuse a base network already trained on a gigantic set of images. 

In [0]:
img_, label_ = next(train_img_gen)

print("First image of the bacth, than we give as input to the MobileNet model")
plot_matrix(img_[0:1], multiLabelBinarizer.inverse_transform(np.array( label_[0:1])), scale = 4)

# plot_matrix(dataset_denormalize(img_[0:1].numpy()), multiLabelBinarizer.inverse_transform(np.array( label_[0:1])), scale = 4)

print("The Model computes the features. The output is of size: (before Fully connected layer)")
bottleneck_feature_example = base_model.predict(img_[0:1])
print(bottleneck_feature_example.shape)
# print(bottleneck_feature_example[0].shape)
print("\nFinally, we can observe some of the filters as the output of the MobileNet feature extraction part")
plot_matrix(bottleneck_feature_example[0][:,:,0:25].T, h=2, w=13)

print("For another image: (before Fully connected layer)")
plot_matrix(img_[4:5], multiLabelBinarizer.inverse_transform(np.array( label_[4:5])), scale = 4)
bottleneck_feature_example = base_model.predict(img_[4:5])
print(bottleneck_feature_example.shape)
# print(bottleneck_feature_example[0].shape)
print("\nFinally, we can observe some of the filters as the output of the MobileNet feature extraction part")
plot_matrix(bottleneck_feature_example[0][:,:,0:25].T, h=2, w=13)

####Filters of first Convolutional layer

In [0]:
first_layer = base_model.get_layer(name = 'Conv1')

filters = first_layer.get_weights()
filters = filters[0]
filters_matrix = np.zeros((filters.shape[3],filters.shape[0],filters.shape[1],filters.shape[2]), np.float32)

# plot results
# filters_matrix -= np.mean(filters_matrix)

f_min, f_max = filters.min(), filters.max()
filters = (filters - f_min) / (f_max - f_min)
# filters-=np.mean(filters)
for i in range(filters.shape[3]):
    filters_matrix[i,:,:,:] = filters[:,:,:,i]


print("CHANNEL 0\n")
plot_matrix(filters_matrix[:,:,:,0], sq_size=filters.shape[0], h = 2, w = 16)

print("\n" + "=="*30 + "\nCHANNEL 1\n")
plot_matrix(filters_matrix[:,:,:,1], sq_size=filters.shape[0], h = 2, w = 16)
print("\n" + "=="*30 + "\nCHANNEL 2\n")
plot_matrix(filters_matrix[:,:,:,2], sq_size=filters.shape[0], h = 2, w = 16)

print("\n" + "=="*30 + "\nALL channels composed:\n")
plot_matrix(filters_matrix[:,:,:,], sq_size=filters.shape[0], h = 2, w = 16)


####Addition of the head of the model (classifier)
Traditionnaly, this is a (stack of) fully connected layer(s). Here, we use another (functional) API rather than `Sequential` in order to build up the model. There is no difference in the end-result. 



In [0]:
'''
Head of the classifier
'''

output_base_model = base_model.layers[-1].output
# output_base_model = tf.keras.layers.Flatten()(output_base_model)
x = tf.keras.layers.GlobalAveragePooling2D(name = "gap")(output_base_model)
# x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(1024,activation = 'relu',)(x)
                        #   kernel_regularizer = tf.keras.regularizers.l2(1e-4),
                        #   bias_regularizer = tf.keras.regularizers.l2(1e-4)
x = tf.keras.layers.Dropout(0.35)(x)

# x = tf.keras.layers.BatchNormalization()(x)
# x = tf.keras.layers.Dense(1024, activation = 'relu',
#                           kernel_regularizer = tf.keras.regularizers.l2(1e-3),
#                           bias_regularizer = tf.keras.regularizers.l2(1e-3))(x)

# x = tf.keras.layers.Dropout(0.35)(x)
# x = tf.keras.layers.BatchNormalization()(x)
output = tf.keras.layers.Dense(num_classes,activation='sigmoid')(x)

class_transfer_learning_model = tf.keras.Model(base_model.input, output)

Summary of the complete model

In [0]:
class_transfer_learning_model.summary()

####Compilation
- use of ADAM optimizer
- binary cross entropy, as we are still in the multilabel problem
- we record the jaccard_index, and the binary accuracy


In [0]:
INIT_LR = 0.00005
# EPOCHS = 100
# opt =  tf.keras.optimizers.Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
# opt =  tf.keras.optimizers.SGD(lr=INIT_LR, momentum=0.9)
opt =  tf.keras.optimizers.Adam(lr = INIT_LR)
class_transfer_learning_model.compile(loss= 'binary_crossentropy', 
                                optimizer=opt,
                                metrics=[jaccard_index, 'accuracy'])

####Training

In [0]:
# train_model = False

In [0]:
if train_class_model_tl == True:
    history_model_tl = class_transfer_learning_model.fit( 
                                                
                                                train_img_gen, # training_set_class_tf_generator, 
                                                batch_size = BS,
                                                steps_per_epoch= len(df_class_train_to_generate["filename"]) // BS,
                                                callbacks=[my_callbacks],
                                                epochs=EPOCHS, 
                                                validation_data = val_set_class_tf_generator,
                                                validation_steps = len(df_class_val_to_generate["filename"]) // BS,
                                            )
    class_transfer_learning_model.save_weights(class_tl_save_filename)
    histories["class_tansfer_learning_model"] = history_model_tl.history
    pickle.dump(histories["class_tansfer_learning_model"], open(class_tl_hist_save_filename, 'wb'))

else:
    class_transfer_learning_model.load_weights(class_tl_save_filename)
    histories['class_tansfer_learning_model'] = pickle.load(open(class_tl_hist_save_filename, 'rb'))

- MobileNetv2, ~70 % on validation set (overfitting)
- VGG16, ~25% max apres 50 EPOCH, that does not really go up
- RestNet -> quite slower than MobileNet

In [0]:
for im, lab in training_set_class_tf_generator.take(1):
    pass

tf.print(np.max(im))
tf.print(np.min(im))

In [0]:
score = class_transfer_learning_model.evaluate(val_images, val_labels_binarized)
print(score)

In [0]:
for key_ in ["class_tansfer_learning_model",]: #histories.keys():
    print("key = " + str(key_))
    try:
        jacc = histories[key_]['jaccard_index']
        val_jacc = histories[key_]['val_jaccard_index']

        acc = histories[key_]['accuracy']
        val_acc = histories[key_]['val_accuracy']

        loss = histories[key_]['loss']
        val_loss = histories[key_]['val_loss']

        fig, axes = plt.subplots(3,1, figsize=(8, 12), sharex=True)
        axes[0].plot(jacc, label='Training jaccard_index')
        axes[0].plot(val_jacc, label='Validation jaccard_index')
        axes[0].legend(loc='lower right')
        axes[0].set_ylabel('Jaccard Index')
        axes[0].set_ylim([0,1])


        axes[1].plot(acc, label='Accuracy')
        axes[1].plot(val_acc, label='Validation accuracy')
        axes[1].legend(loc='lower right')
        axes[1].set_ylabel('Accuracy')
        axes[1].set_ylim([0,1])
        # plt.title('Training and Validation jaccard_index')

        axes[2].plot(loss, label='Training Loss')
        axes[2].plot(val_loss, label='Validation Loss')
        axes[2].set_ylabel('Cross Entropy')
        axes[2].legend(loc='upper right')

        # plt.ylim([0,1.0])
        # plt.title('Training and Validation Loss')
        axes[2].set_xlim([0,100])
        axes[2].set_xlabel('epoch')
        plt.show()
    except KeyError:
        print("Key error -> pass")
        pass

At some point, after several tests and trials of Hyperparameters, it seems a kind of ~60% to 65% max on the validation set is reachable. Most likely, to improve the score considering the same problem:
- there is a need for more data, or/and better data augmentation
- the parameters should be carefully assessed
- the optimzer and objective function could be revised. 


###Fine Tuning

The following part is coded (and works ;-)) but is skipped: there is overfitting in the model, and before this is settled, unlocking the training of the base_model may only 'untrain' it. 

In [0]:
'''
Define parameters
'''

fine_tune_class_transfer_learning_model = False
epoch_restart = 0 # this shall be the number of epochs realized during previous training
if fine_tune_class_transfer_learning_model:
    assert epoch_restart > 0, "The epoch_restart number must be larger than 0"

In [0]:
''' 
We can unfreeze several layers, to try and fine tune
'''
if fine_tune_class_transfer_learning_model:
    base_model.trainable = True
    # refreeze some, in order to not forget everything
    for layer in base_model.layers[:100]:
        layer.trainable = False
    lr = 1e-6
    opt =  tf.keras.optimizers.Adam(lr)
    class_transfer_learning_model.compile(loss='binary_crossentropy',
                                    optimizer=opt,
                                    metrics=[jaccard_index, 'accuracy'])


    # Increase training epochs for fine-tuning
    fine_tune_epochs = EPOCHS
    total_epochs =  EPOCHS + fine_tune_epochs
    # Fine-tune model
    # Note: Set initial_epoch to begin training after epoch 100 since we
    # previously trained for 100 epochs.
    if train_class_model_tl_ft:
        history_model_tl_ft= class_transfer_learning_model.fit( 
                                                    training_set_class_tf_generator, 
                                                    batch_size = BS,
                                                    steps_per_epoch= len(df_class_train_to_generate["filename"]) // BS,
                                                    callbacks=[my_callbacks],
                                                    epochs=total_epochs, 
                                                    initial_epoch = EPOCHS,
                                                    validation_data = val_set_class_tf_generator,
                                                    validation_steps = len(df_class_val_to_generate["filename"]) // BS,
                                                )
        class_transfer_learning_model.save_weights(class_tl_finetuned_save_filename)
        histories["tansfer_learning_model_fine_tuning"] = history_model_tl_ft.history
        pickle.dump(histories["tansfer_learning_model_fine_tuning"], open(class_tl_finetuned_hist_save_filename, 'wb'))

    else:
        class_transfer_learning_model.load_weights(class_tl_finetuned_save_filename)
        histories['tansfer_learning_model_fine_tuning'] = pickle.load(open(class_tl_finetuned_hist_save_filename, 'rb'))

As before, we can show the training curves, for the training still registered. 

In [0]:
for key_ in ["class_tansfer_learning_model","tansfer_learning_model_fine_tuning",]: #histories.keys():
    print("key = " + str(key_))
    try:
        jacc = histories[key_]['jaccard_index']
        val_jacc = histories[key_]['val_jaccard_index']

        acc = histories[key_]['accuracy']
        val_acc = histories[key_]['val_accuracy']

        loss = histories[key_]['loss']
        val_loss = histories[key_]['val_loss']

        fig, axes = plt.subplots(3,1, figsize=(8, 12), sharex=True)
        axes[0].plot(jacc, label='Training jaccard_index')
        axes[0].plot(val_jacc, label='Validation jaccard_index')
        axes[0].legend(loc='lower right')
        axes[0].set_ylabel('Jaccard Index')
        axes[0].set_ylim([0,1])


        axes[1].plot(acc, label='Accuracy')
        axes[1].plot(val_acc, label='Validation accuracy')
        axes[1].legend(loc='lower right')
        axes[1].set_ylabel('Accuracy')
        axes[1].set_ylim([0,1])
        # plt.title('Training and Validation jaccard_index')

        axes[2].plot(loss, label='Training Loss')
        axes[2].plot(val_loss, label='Validation Loss')
        axes[2].set_ylabel('Cross Entropy')
        axes[2].legend(loc='upper right')

        # plt.ylim([0,1.0])
        # plt.title('Training and Validation Loss')
        axes[2].set_xlim([0,100])
        axes[2].set_xlabel('epoch')
        plt.show()
    except KeyError:
        print("Key error -> pass")
        pass

Yet another transer learning

Compute the predictions for the full validation set (~3500) items, and compute the jaccard index. 

In [0]:
# print(val_images[0,:][0:5, 0:5, 0])
# file_of_weights = 'class_transfer_learning_model.h5'
# class_transfer_learning_model.load_weights(file_of_weights)
# y_hat_tl = class_transfer_learning_model.predict(val_images)
# y_hat_tl_round = tf.round(y_hat_tl)
# y_true_tl = tf.convert_to_tensor(val_labels_binarized, dtype='float32')

# jac_index = jaccard_index(y_true_tl, y_hat_tl)

# print("jac index: " + str(jac_index))
# esults = class_transfer_learning_model.evaluate(x_test, y_test)

### Analysis using confusion matrix

In [0]:
'''
create the dictionnaries that will contain the confusion matrix
'''
conf_mat_dict={}
conf_mat_dict_norm = {}
conf_mat_dict_prefect_norm = {}

'''
For clarity, rename classes_names => labels
'''
labels = classes_names

''' 
Predict the labels for images on the validation set
'''
y_pred = class_transfer_learning_model.predict(val_images)
y_pred = np.round(y_pred)

'''
Gather true values
'''
y_true = val_labels_binarized


'''
Check consistency of shapes
'''
print("y_true shape =" + str(y_true.shape))
print("y_pred shape = " + str(y_pred.shape))


'''
Compute all the confusion matrices, one per class
'''
for label_col in range(len(labels)):
    y_true_label = y_true[:, label_col]
    y_pred_label = y_pred[:, label_col]
    conf_mat_dict[labels[label_col]] = sklearn.metrics.confusion_matrix( y_true=y_true_label,y_pred=y_pred_label,)
    conf_mat_dict_norm[labels[label_col]] = sklearn.metrics.confusion_matrix(y_true=y_true_label, y_pred=y_pred_label,  normalize = 'true')

'''
Nicely plot the results to ease interpretation
'''
fig, axes = plt.subplots(4,5, figsize = (15,12))
counter = 0
for i in range(4):
    for j in range(5):
        label = labels[counter]
        cf_matrix = conf_mat_dict_norm[label] #_norm
        group_names = ['TN','FP','FN','TP']
        group_counts = ["{0:0.3f}".format(value) for value in cf_matrix.flatten()]
        # group_percentages = ["{0:.2%}".format(value) for value in cf_matrix.flatten()/np.sum(cf_matrix)]
        local_labels = [f"{v1}\n{v2}" for v1, v2 in zip(group_names,group_counts)]
        local_labels = np.asarray(local_labels).reshape(2,2)
        sns.heatmap(cf_matrix, annot=local_labels, fmt='', ax=axes[i,j])
        # axes[i,j].matshow(conf_mat_dict_norm[label])    
        axes[i,j].set_ylabel("True")
        axes[i,j].set_xlabel("Predicted")
        axes[i,j].set_title(str(label), fontsize = 14)
        counter +=1



From the confusion map, we can see the variety of behavior considering the input. Recognizing a sheep seems dramatically more complicated than a horse. Similarly, recognizing a train seems easier than a potted plant. 

From the matrices (normalized per True value), our classifier behaved differently according to the class. 

Another complementary view is to show the precision-recall curve, that is shown below

In [0]:
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score

precision = {}
recall = {}
average_precision = {}

print(y_pred.shape)
print(y_true.shape)

for i in range(num_classes):
    precision[i], recall[i], _ = precision_recall_curve(y_true[:,i], y_pred[:,i])

    average_precision[i] = average_precision_score(y_true[:,i], y_pred[:,i])

# A "micro-average": quantifying score on all classes jointly
precision["micro"], recall["micro"], _ = precision_recall_curve(y_true.ravel(), y_pred.ravel())
average_precision["micro"] = average_precision_score(y_true, y_pred, average="micro")
print('Average precision score, micro-averaged over all classes: {0:0.2f}'
      .format(average_precision["micro"]))



In [0]:
plt.figure(figsize=(12, 12))
f_scores = np.linspace(0.2, 0.8, num=4)
lines = []
labels = []
for f_score in f_scores:
    x = np.linspace(0.01, 1)
    y = f_score * x / (2 * x - f_score)
    l, = plt.plot(x[y >= 0], y[y >= 0], color='gray', alpha=0.2)
    plt.annotate('f1={0:0.1f}'.format(f_score), xy=(0.9, y[45] + 0.02))

lines.append(l)
labels.append('iso-f1 curves')
l, = plt.plot(recall["micro"], precision["micro"], color='gold', lw=2)
lines.append(l)
labels.append('micro-average Precision-recall (area = {0:0.2f})'
              ''.format(average_precision["micro"]))

for i in range(num_classes):
    l, = plt.plot(recall[i], precision[i], lw=2)
    lines.append(l)
    labels.append('P-R {0} (area = {1:0.2f})'
                  ''.format(classes_names[i], average_precision[i]))

fig = plt.gcf()
fig.subplots_adjust(bottom=0.25)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Extension of Precision-Recall curve to multi-class')
plt.legend(lines, labels, bbox_to_anchor=(1.04,1), loc="upper left", prop=dict(size=14))


plt.show()

From the class to recognize, there seem to be two groups, of around 10 classes each:
- aeroplane, bicycle, bird, boat, bus, cat, dog, horse, motorbike, person and train, which are not that badly recognized yet, 
- bottle, car, chair, cow, diningtable, pottedplant, sheep, sofa and tvmonitor which are not properly recognized at all

We can analyze that result in more details, by taking 2 samples of an aeroplane, and 2 samples of a cow.

In [0]:
ids_aeroplane = [ '2007_000256', '2007_000738' ]
ids_cow = ['2007_000464', '2007_000491']

images_test_aeroplane = get_images(ids_aeroplane, path_image_folder,width=sq_size, height=sq_size,)
images_test_cow = get_images(ids_cow, path_image_folder,width=sq_size, height=sq_size,)


label_test_aeroplane = get_class_labels_str(ids_aeroplane)
label_test_cow = get_class_labels_str(ids_cow)
label_test_aeroplane_binarized =  multiLabelBinarizer.fit_transform(label_test_aeroplane)
label_test_cow_binarized =  multiLabelBinarizer.fit_transform(label_test_cow)


plot_matrix(images_test_aeroplane, label_test_aeroplane, scale = 3)
plot_matrix(images_test_cow, label_test_cow,  scale = 3)

In [0]:
y_aeroplane_pred = class_transfer_learning_model.predict(images_test_aeroplane/255.0)
y_cow_pred = class_transfer_learning_model.predict(images_test_cow/255.0)

with np.printoptions(linewidth=500): # np.set_printoptions(precision=1,linewidth=100)
    print("Y Prediction aeroplane (should be [1, 0, ...., 0])\n" + str(np.round(y_aeroplane_pred,2)))
    print("Y Prediction cow (should be [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0])\n" + str(np.round(y_cow_pred,2)))


In [0]:
# for i in range(0, 160):
#     print(class_transfer_learning_model.layers[i].__class__.__name__)
print(classes_names[9])
print(classes_names[12])
print(classes_names[14])

___
*The results are based on a training with sq_size = 128! other sq_size may induce different results, of course*
___

Digging the cow situation, we analyze the results of those two class predictions:


* the first one has only 0.44 coef of being a cow, but 0.45 of being a person and 0.12 of being a horse,
* the second one has 0.26 coef of being a cow, but even higher chance of being a person

Of course, as currently trained as a multilabel problem: for the classifier, there *could* be a cow and a person on this image. Using sigmoid activation, the sum of the 20 classifier's outputs isn't one. However, those results show that in those two cases, an image of a cow alone has activated features of horse, and human (mostly).

It's interesting as
- a horse is visually close to a cow, even for a human (mammal of comparable size, comparable colors, comparable shape, ...)
- "person" is the class that is much more represented in the dataset. It does sound right that the system has learned more features about a person, than others, features that are yet activated by a cow. 

As confirmed by those two samples, the aeroplanes are well recognized, but not the cows. 

### Digging in the network


The following part was possible thanks to https://towardsdatascience.com/visualizing-intermediate-activation-in-convolutional-neural-networks-with-keras-260b36d60d0

Similarly to what was done with the bottleneck features before, it's possible - and interesting ! - to look at how the network was activated  by the image input. we can visualize the different (convolution) layers and their filters. 

In [0]:
''' 
we create a model that makes available the outputs at each layer
'''
layer_outputs = [layer.output for layer in class_transfer_learning_model.layers] # Extracts the outputs of the top 12 layers
activation_model = tf.keras.models.Model(inputs=class_transfer_learning_model.input, outputs=layer_outputs) # Creates a model that will return these outputs, given the model input

activations = activation_model.predict(images_test_cow[1:2]/255)
print(activations[0].shape)

layer_names = []
layers_to_plot = [ l for l in class_transfer_learning_model.layers if ( (l.__class__.__name__ == 'Conv2D') or(l.name == 'gap'))] #(l.__class__.__name__ == 'Conv2D') or
for layer in layers_to_plot:
    layer_names.append(layer.name) # Names of the layers, so you can have them as part of your plot


print(layer_names)
images_per_row = 10

for layer_name, layer_activation in zip(layer_names, activations): # Displays the feature maps
    n_features = layer_activation.shape[-1] # Number of features in the feature map
    size = layer_activation.shape[1] #The feature map has shape (1, size, size, n_features).
    n_cols = n_features // images_per_row # Tiles the activation channels in this matrix
    display_grid = np.ones((size * n_cols+1, images_per_row * size+1))
    for col in range(n_cols): # Tiles each filter into a big horizontal grid
        for row in range(images_per_row):
            channel_image = layer_activation[0,
                                             :, :,
                                             col * images_per_row + row]
            # if layer_name == 'block_1_expand':
            channel_image -= channel_image.mean() # Post-processes the feature to make it visually palatable
            if channel_image.std() > 0:
                channel_image /= channel_image.std()
            channel_image *= 64
            channel_image += 128
            channel_image = np.clip(channel_image, 0, 255).astype('uint8')
            display_grid[col * size : (col + 1) * size, # Displays the grid
                         row * size : (row + 1) * size] = channel_image

    if layer_name in ['Conv1', 'expanded_conv_project']:
        continue
    else:
        scale = 1. / size
        plt.figure(figsize=(int(scale * display_grid.shape[1])+1,
                        int(scale * display_grid.shape[0])+1))
        plt.title(layer_name)
        plt.imshow(display_grid, aspect='auto', cmap='viridis')
        plt.grid(False)
        plt.show()

    # 

The deeper we go into the network, the less visual clue remains, and more abstract features are developped. 
In the end, we show the activation of the global max pooling layer, before the activation. It is remarkable to observe how, after the MobileNet v2, the features from the aeroplane can look similar.



In [0]:
# for z in [0, 1]:
#     activations = activation_model.predict(images_test_aeroplane[z:z+1]/255)
#     layer_names = []
#     layers_to_plot = [ l for l in class_transfer_learning_model.layers if (l.__class__.__name__ == 'GlobalAveragePooling2D')]
#     for layer in layers_to_plot:
#         layer_names.append(layer.name) # Names of the layers, so you can have them as part of your plot
#     images_per_row = 10

#     layer_name = layers_to_plot[0].name
#     layer_activation = activations[155]

#     # for layer_name, layer_activation in zip(layer_names, activations): # Displays the feature maps
#     n_features = layer_activation.shape[-1] # Number of features in the feature map
#     size = layer_activation.shape[1] #The feature map has shape (1, size, size, n_features).
#     n_cols = n_features // images_per_row # Tiles the activation channels in this matrix
#     display_grid = np.ones((size * n_cols, images_per_row * size))
#     for col in range(n_cols): # Tiles each filter into a big horizontal grid
#         for row in range(images_per_row):
#             channel_image = layer_activation[0,
#                                             :, :,
#                                             col * images_per_row + row]
#             channel_image -= channel_image.mean() # Post-processes the feature to make it visually palatable
#             if channel_image.std() > 0:
#                 channel_image /= channel_image.std()
#             channel_image *= 64
#             channel_image += 128
#             channel_image = np.clip(channel_image, 0, 255).astype('uint8')
#             display_grid[col * size : (col + 1) * size, # Displays the grid
#                         row * size : (row + 1) * size] = channel_image

#     scale = 1. / size
#     # plt.figure(figsize=(int(scale * display_grid.shape[1])+1,
#     #                 int(scale * display_grid.shape[0])+1))
#     plt.figure()
#     plt.title(layer_name)
#     plt.imshow(display_grid, aspect='auto', cmap='viridis')
#     plt.grid(False)
#     plt.show()

## Classification - single label

So far, I tried to tackle the complete problem of multilabel, multiclass.

At the beginning of this notebook, we computed the proportion of such multilabel cases : about 40 % cases.
- Around 60% of samples (both in training and validation) are single label instance, 
- The Multilabels are often limited to a few

Following those observations on the data, we may want to change the problem, and tackle a single label / multi-class classification. 

<u>Consequences: </u>

- as shown before, the ratio between each class is not constant, and may vary a lot between some classes (e.g. human and sheep): I want to avoid that, and therefore change the distribution of the training input. 

This is disputable, but my goal is to train an "as general" as possible classifier.

- The loss function and activation function used insofar are not adapted anymore, and need to change:
    * Loss function: Categorical Cross Entropy
    * Activation: 'softmax', as the sum of all output probability should equal to 1
    * Metric: accuracy

- A new model is built based on those new settings, using Transfer Learning.

TL seems particularly adapted as class learning as already been done behind the scenes. I "just" need to specialize this base model (trained already) to the specific classes I have from the VOC dataset. It appears quite appealing.

<u>Plan summary: </u>
1. get training and validation sets
2. built a network -- I go for transfer learning.
3. number of output class = 20
4. output activation = 'softmax'
5. loss function = categorical cross-entropy
6. metric = categorical accuracy

####Data input

In [0]:
'''
First step: from the dataframes used for the generation we keep only the items with one label
'''
# sp_df_class_train= get_dataframe_from_classes(df_class_train, )
# sp_df_class_val = get_dataframe_from_classes(df_class_val, ('cat','dog'))

# training
sp_df_class_train_to_generate = df_class_train_to_generate.copy()
sp_df_class_train_to_generate["nb_classes"] = sp_df_class_train_to_generate["class"].apply(len)
sp_df_class_train_to_generate = sp_df_class_train_to_generate[sp_df_class_train_to_generate["nb_classes"]<2]
sp_df_class_train_to_generate = sp_df_class_train_to_generate.reset_index()

# validation
sp_df_class_val_to_generate = df_class_val_to_generate.copy()
sp_df_class_val_to_generate["nb_classes"] = sp_df_class_val_to_generate["class"].apply(len)
sp_df_class_val_to_generate = sp_df_class_val_to_generate[sp_df_class_val_to_generate["nb_classes"]<2]
sp_df_class_val_to_generate = sp_df_class_val_to_generate.reset_index()

print(" #items in training set: " + str(len(sp_df_class_train_to_generate["class"])))
print(" #items in validation set: " + str(len(sp_df_class_val_to_generate["class"])))



'''
Because the classes are unbalanced, I need extra steps to 
- randomly select items from the original set
- with a weight inversely proportional to original class weight
The goal is to have a as flat as possible distribution

==> drawback: we throw away a lot
'''
sp_training_labels = multiLabelBinarizer.transform(sp_df_class_train_to_generate["class"])
sp_classes_weights = np.sum(sp_training_labels == 1, axis = 0) / sp_training_labels.shape[0]
print("Weights of the different classes, in the Single Label context:" + str(sp_classes_weights))

# inverted weights, indicating the weights for sampling
sp_inverted_classes_weights = (1/sp_classes_weights)/ np.sum(1/sp_classes_weights)
print(sp_inverted_classes_weights)

# add a column for this weight
def _loc(df):
    df = df.assign(sampling_rate=0)
    for i in range(len(classes_names)):
        df.loc[df['class']==(classes_names[i],), 'sampling_rate'] = sp_inverted_classes_weights[i]
    return df
sp_df_class_train_to_generate = _loc(sp_df_class_train_to_generate)
sp_df_class_val_to_generate = _loc(sp_df_class_val_to_generate)


#sample appropriately 1500 for training and for validation from the single_label inout dataframe
sp_df_class_train_to_generate = sp_df_class_train_to_generate.sample(2200, weights = sp_df_class_train_to_generate['sampling_rate'], random_state=1000)
print("\nNew single-label dataframe for training (5 first elem):")
print(sp_df_class_train_to_generate.head(5))


sp_df_class_val_to_generate = sp_df_class_val_to_generate.sample(500, weights = sp_df_class_val_to_generate['sampling_rate'], random_state=500)
print("\nNew single-label dataframe for validation (5 first elem):")
print(sp_df_class_val_to_generate.head(5))

'''
Check the distribution
'''
# count the number of items of each class
sp_counts_training = np.zeros((num_classes,))
sp_counts_validation = np.zeros((num_classes,))
for i in range(len(classes_names)):
    sp_counts_training[i] = len(sp_df_class_train_to_generate[sp_df_class_train_to_generate['class'] ==  (classes_names[i],)]) 
    sp_counts_validation[i] = len(sp_df_class_val_to_generate[sp_df_class_val_to_generate['class'] ==  (classes_names[i],)]) 

# get the classes' ratio
sp_counts_training /= sum(sp_counts_training)
sp_counts_training*=100
sp_counts_validation /= sum(sp_counts_validation)
sp_counts_validation*=100

# add this count to the dataframe containing proportions
local_df["sp_training"] = list(sp_counts_training)
local_df["sp_validation"] = list(sp_counts_validation)

# compare to original training/validation
print("\n\nProportion of each class associated to a specific set")
print(local_df.to_string())

# print(len(sp_df_class_train_to_generate["class"]))
# print(len(sp_df_class_val_to_generate["class"]))


In [0]:
'''
based on the dataframe, create tf generator
'''
sp_train_set_class_tf_generator         = get_classification_generator(sp_df_class_train_to_generate, multiLabelBinarizer, to_augment = True)
sp_val_set_class_tf_generator           = get_classification_generator(sp_df_class_val_to_generate, multiLabelBinarizer, to_augment = False)
#Because Keras API seems to give better results at this point
sp_train_class_keras_generator          = get_classification_keras_generator(sp_df_class_train_to_generate, 'training')
sp_val_set_class_keras_generator        = get_classification_keras_generator(sp_df_class_val_to_generate, 'validation')

'''
Plot to see the check the inputs of the training models
'''

sp_img_local, sp_labels_local = next(iter(sp_train_class_keras_generator))
plot_matrix(sp_img_local, multiLabelBinarizer.inverse_transform(np.array(sp_labels_local)), scale = 2)

sp_img_local, sp_labels_local = next(iter(sp_val_set_class_keras_generator))
# plot_matrix(sp_img_local, multiLabelBinarizer.inverse_transform(np.array(sp_labels_local)), scale = 2)


### Creation of the model

In [0]:
'''
# Create the base model from the pre-trained model MobileNet V2, for single label
case.
credit: https://www.tensorflow.org/tutorials/images/transfer_learning

'''
print('('+str(height) + "," + str(width) +', ' + str(channel) + ")")

'''
Inspect a batch of data
'''
for image_batch, label_batch in sp_train_set_class_tf_generator.take(1):
   pass

image_batch.shape

'''
Creation of the Pre-trained model => MobileNet
'''
sp_base_model = tf.keras.applications.MobileNetV2(input_shape=(height, width, channel),
                                               include_top=False,
                                               weights='imagenet')

sp_base_model.trainable = False

feature_batch = sp_base_model(image_batch)
print(feature_batch.shape)

'''
Add the classification head => here we want softmax as 1 label only
'''
# output_base_model = sp_base_model.layers[-1].output
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
feature_batch_average = global_average_layer(feature_batch)
print(feature_batch_average.shape)


prediction_layer   = tf.keras.layers.Dense(num_classes,kernel_regularizer=tf.keras.regularizers.l2(0.001), activation='softmax')
prediction_batch = prediction_layer(feature_batch_average)
print(prediction_batch.shape)

'''
build the model
# note: we used another API than before, in order to get acquainted with this tk.keras library
'''
model_sp = tf.keras.Sequential([
                                 sp_base_model,
                                 global_average_layer,
                                 prediction_layer])



###Compilation

In [0]:
'''
Compile
'''
# Compile the model
model_sp.compile(optimizer=  tf.keras.optimizers.Adam(0.0001), #get_optimizer(), #
                                    loss=tf.keras.losses.categorical_crossentropy, # to explain
                                    metrics=['accuracy']) #categorical accuracy chosen


model_sp.summary()

Based on this model, we can evaluate directly the loss and accuracy, on several batches. We get something close to random => 1/20 = 5% accuracy. This is expected.

In [0]:
loss0,accuracy0 = model_sp.evaluate(sp_val_set_class_tf_generator.take(100), steps = 100)
print("initial loss: {:.2f}".format(loss0))
print("initial accuracy: {:.2f}".format(accuracy0))

###Training

In [0]:
# train_model = True
#' + str(int(time.time())) + '
if train_class_model_sp:
    history_model_sp = model_sp.fit(   sp_train_class_keras_generator, #  sp_train_set_class_tf_generator
                                                steps_per_epoch= len(sp_df_class_train_to_generate["filename"]) // BS,
                                                epochs=EPOCHS, 
                                                callbacks=[my_callbacks],
                                                validation_data= sp_val_set_class_tf_generator, #val_img_gen, #
                                                validation_steps = len(sp_df_class_val_to_generate["filename"]) // BS,
        )
    model_sp.save_weights(single_model_save_filename)
    histories['model_sp'] = history_model_sp.history
    pickle.dump(histories["model_sp"], open(single_model_hist_save_filename, 'wb'))
else:
    model_sp.load_weights(single_model_save_filename)
    histories['model_sp'] = pickle.load(open(single_model_hist_save_filename, 'rb'))

In [0]:
for key_ in  ["model_sp"]: #histories.keys():
    print("key = " + str(key_))
    try:
        acc = histories[key_]['accuracy']
        val_acc = histories[key_]['val_accuracy']

        loss = histories[key_]['loss']
        val_loss = histories[key_]['val_loss']

        plt.figure(figsize=(8, 8))
        plt.subplot(2, 1, 1)

        plt.plot(acc, label='Training accuracy')
        plt.plot(val_acc, label='Validation accuracy')
        plt.legend(loc='lower right')
        plt.ylabel('Accuracy')
        plt.ylim([0,1])
        plt.title('Training and Validation accuracy')

        plt.subplot(2, 1, 2)
        plt.plot(loss, label='Training Loss')
        plt.plot(val_loss, label='Validation Loss')
        plt.legend(loc='upper right')
        plt.ylabel('Cross Entropy')
        # plt.ylim([0,1.0])
        plt.title('Training and Validation Loss')
        plt.xlabel('epoch')
        plt.show()
    except KeyError:
        print("Key error -> pass")
        pass

###Analysis
With this model, the confusion matrix isnt' binary anymore but multiclass, of course

In [0]:
'''
Get the validation data ready for prediction
'''
# retrieve the ids from the dataframe
sp_val_ids = list(sp_df_class_val_to_generate["filename"].apply(lambda x: x.split('.')[0]))

# load the images in RAM
sp_val_images = get_images(sp_val_ids, path_image_folder,width=sq_size, height=sq_size,)
if np.max(np.max(sp_val_images)) > 1:
    sp_val_images = np.divide(sp_val_images,255.0, dtype = np.float32)

# retrieve the labels from the dataframe
sp_val_labels_binarized = multiLabelBinarizer.transform(sp_df_class_val_to_generate["class"])
sp_val_labels_str = list(sp_df_class_val_to_generate["class"])
y_true = sp_val_labels_binarized


'''
Predict
'''
y_pred = model_sp.predict(sp_val_images)
# y_pred_str = multiLabelBinarizer.inverse_transform(y_pred)

'''
Check on shapes
'''
print("y_true shape = " + str(y_true.shape))
print("y_pred shape = " + str(y_pred.shape))

'''
convert one hot encoding to categorical using argmax
'''
y_true_categorical = [ np.argmax(t) for t in y_true ]
y_pred_categorical = [ np.argmax(t) for t in y_pred ]
'''
Compute confusion matrix
'''
conf_mat_dict = sklearn.metrics.confusion_matrix(y_true = y_true_categorical, y_pred = y_pred_categorical)
conf_mat_dict_norm = sklearn.metrics.confusion_matrix( y_true=y_true_categorical, y_pred = y_pred_categorical, normalize = 'true')


'''
sp_classes_names => no diningtable (item #10 in zero indexing)
'''

classes_names_wo_diningtable = tuple([name for name in classes_names if not(name == 'diningtable')])


fig, axes = plt.subplots(1,2,figsize = (25,10))

# group_names = ['TN','FP','FN','TP']
# local_labels = [f"{v1}\n{v2}" for v1, v2 in zip(group_names,group_counts)]
group_counts = [str(int(value))  if value > 0.01 else "<" for value in conf_mat_dict.flatten()]
# group_percentages = ["{0:0.2f}".format(value) if value > 0.05 else "<" for value in conf_mat_dict.flatten()/np.sum(conf_mat_dict)]

if len(group_counts) == 361:
    size_local_labels = 19
    sp_classes_names = classes_names_wo_diningtable
else:
    size_local_labels = 20
    sp_classes_names = classes_names

local_labels = np.asarray(group_counts).reshape(size_local_labels,size_local_labels)


sns.heatmap(conf_mat_dict, fmt='', annot=local_labels ,xticklabels=sp_classes_names, yticklabels=sp_classes_names, ax=axes[0])
axes[0].set_title("Confusion Matrix")

group_counts = ["{0:0.2f}".format(value)  if value > 0.05 else "<" for value in conf_mat_dict_norm.flatten()]
local_labels = np.asarray(group_counts).reshape(size_local_labels,size_local_labels)
sns.heatmap(conf_mat_dict_norm, fmt='', annot=local_labels ,xticklabels=sp_classes_names, yticklabels=sp_classes_names, ax=axes[1])
axes[1].set_title("Confusion Matrix normalized per True values")

'''
Question: is the model capable of predicting a diningtable?
'''
y_pred = model_sp.predict(val_images)
print("If the model is capable, all the index should have been predicted from the global validation set (containing all the classes):")
print(np.unique(np.argmax(y_pred, axis=-1)))

###Conclusion
1.  From scratch models
    * v1 -> lots of weights
    * v2 -> attempt to solve overfitting - does not work always
    * v3 -> deeper convolution: less weights, deeper -> better results
2.  Transfer Learning - MobileNetv2
    * Much Better - ~63% (sqsize = 128) to ~70% (sqsize = 224)
    * Overfitting --> not adapted to fine tuning (although implemented)
    * VGG16 -> worst results; Restnet -> slower

3.  'Simpler' problem: Single-label multi-class classification
    * Better results
    * Lack of data overall
    * Classes better represented perform better as well


---
* Overall lack of data. Data augmentation helps but is not miraculous at this point
* Drop Out tested but not (always) successfull
* Batch Normalization helps
* Mean substraction does not help (at least, in the sq_size = 128 case)
* Confusion Matrix is a nice tool to apprehend behaviour
* Importance of image input pipeline (intuitive)




##Clean Up RAM

In [0]:
try:
    del training_set_class_tf_generator
except:
    pass

try:
    del val_set_class_tf_generator
except:
    pass

try:
    del sp_train_class_tf_generator
except:
    pass

try:
    del sp_val_class_tf_generator
except:
    pass

try:
    del train_img_gen
except:
    pass
try:
    del val_img_gen
except:
    pass
    

# Segmentation Tasks



### Definition of the data pipeline
In a very similar fashion to Classification Tasks, we define a pipeline (based on tf.Data)

In [0]:
'''
override parameters
'''
sq_size = 224
height = sq_size
width = sq_size

In [0]:
'''
Segmentation
'''
print("--" * 50)
training_seg_ids = get_ids("training", "segmentation")
validation_seg_ids = get_ids("validation", "segmentation")

print("Segmentation task:")
print("training set,   #elements = ", len(training_seg_ids))
print("validation set, #elements = ", len(validation_seg_ids))

print(training_seg_ids[0:2])
print(validation_seg_ids[0:2])

'''
get labels
'''

training_seg_labels_str = get_class_labels_str(training_seg_ids, classification_type = classification_type, n_samples = None )
val_seg_labels_str = get_class_labels_str(validation_seg_ids, classification_type = classification_type, n_samples = None)




####Create base dictionary

At this point, we have the structures that contains all the info for the pipeline. The name of the variables is kept as explicit as possible

>`name` | type | description
>---|---|---
>`d_seg_train`| dictionary | training dictionary for segmentation
>`d_seg_val` | dictionary | validation dictionary for segmentation
>`df_seg_train`| dataframe | training dataframe for segmentation
>`df_seg_val` | dataframe | validation dataframe for segmentation


In [0]:
'''
build dictionary and dataframe
'''
d_seg_train = {'stem_filename': training_seg_ids , 'class': training_seg_labels_str}
d_seg_val   = {'stem_filename': validation_seg_ids , 'class': val_seg_labels_str}

df_seg_train = pd.DataFrame(data=d_seg_train)
df_seg_val   = pd.DataFrame(data=d_seg_val)

## Example of dataframe with horses only
# df_seg_train_horse = get_dataframe_from_classes(df_seg_train, ('horse',))
# df_seg_val_horse = get_dataframe_from_classes(df_seg_val, ('horse',))

'''
the dataframes that are used to generate data. 
It can be change with more specific dataframe, if needed 
(or a new generator can be created as well, see below)
'''
df_seg_train_to_generate = df_seg_train
df_seg_val_to_generate   = df_seg_val

Although there are other ways, the colormap is short enough to be hardcoded manually (pragmatic yet not production-ready solution)

In [0]:
'''
Segmentation part
'''

colormap = ((0,0,0),(128,0,0), (0,128,0),(128,128,0),(0,0,128),(128,0,128), (0,128,128),(128,128,128),
                           (64,0,0),(192,0,0), (64,128,0), (192,128,0), (64,0,128), (192,0,128), (64,128,128),
                           (192,128,128),(0,64,0), (128,64,0), (0,192,0),(128,192,0), (0,64,128),(224, 224, 192))
# colormap = np.array(colormap_uint8, np.float32) 
# colormap = colormap/255.0
# print(colormap)
color_class = {"color": colormap,
                 "class_id":(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,255),
                 "classes_names" :( 'background', 'aeroplane','bicycle', 'bird','boat','bottle','bus','car',
                'cat', 'chair','cow','diningtable','dog','horse','motorbike',
                'person','pottedplant','sheep','sofa','train','tvmonitor', 'void') }
df_color_class = pd.DataFrame(color_class)
print(df_color_class.to_string())



####Creation of the generators

similarly to classification task, tf.Data generators can be built up. 
In particular it:
- loads an image
- converts its mask to one-hot encoding representation
- resize of the image
- convert pix value to float, between [0,1]
- performs (limited!) data augmentation

######Helper functions

In [0]:
'''
Helper function to create generartors using tensorflow Data API
'''

# https://www.tensorflow.org/tutorials/images/segmentation

def _one_hot_encode(mask):
    """
    Converts mask to a one-hot encoding specified by the semantic map.
    source: inspired by https://stackoverflow.com/questions/57518057/how-can-i-convert-an-image-from-pixels-to-one-hot-encodings
    does not use directly tf.onehot
    """
    # Create a "color reference" tensor from image_colors
    color_reference = tf.cast(tf.constant(colormap[0:21]), dtype=tf.float32)
    # Load the image and obtain tensor with one-hot values
    comp = tf.equal(mask[..., None, :], color_reference)
    one_hot_map = tf.cast(tf.reduce_all(comp, axis=-1), dtype=tf.float32)
    # print(one_hot_map.shape)
    return one_hot_map


def segmentation_normalize(input_image, input_mask):
    '''
    Normalize an input image by:
    - convert it to float32, within [0.0, 1.0]
    - encode the input_mask
    '''
    input_image = tf.cast(input_image, tf.float32) / 255.0
    input_mask = _one_hot_encode(input_mask)
    return input_image, input_mask


def parse_segmentation_function(stem_filename):
    '''
    @param stem_filename: id of the image files to read
    '''
    filename_input_image = voc_root_folder + r'/JPEGImages/' + stem_filename + ".jpg"
    filename_input_mask = voc_root_folder + r'/SegmentationClass/' + stem_filename + ".png"

    input_image_string = tf.io.read_file(filename_input_image)
    input_image = tf.image.decode_jpeg(input_image_string, channels=3)

    input_mask_string = tf.io.read_file(filename_input_mask)
    input_mask = tf.image.decode_png(input_mask_string, channels=3)

    input_image = tf.image.resize(input_image, [sq_size, sq_size])
    input_mask = tf.image.resize(input_mask, [sq_size, sq_size])

    input_image, input_mask = segmentation_normalize(input_image, input_mask)

    return input_image, input_mask


def train_segmentation_preprocess(input_image, input_mask):
    '''
    Segmentation pre-processing -- linked to data augmentation
    Only perform a random flip of the input insofar
    The benefits of such augmentation are yet not really clear on the augmentation
    '''
    if tf.random.uniform(()) > 0.5:
        input_image = tf.image.flip_left_right(input_image)
        input_mask = tf.image.flip_left_right(input_mask)
    # if tf.random.uniform(()) > 0.5:
    #     input_image = tf.image.central_crop(input_image, central_fraction=0.5)
    #     input_mask = tf.image.central_crop(input_mask, central_fraction=0.5)
    #     input_image = tf.image.resize(input_image, [sq_size, sq_size])
    #     input_mask = tf.image.resize(input_mask, [sq_size, sq_size])

    # input_image = tf.image.random_brightness(input_image, max_delta=0.3)
    # input_image = tf.image.random_saturation(input_image, lower=0.8, upper=1.2)
    # input_image = tf.image.random_contrast(input_image, 0.85, 1.15)
    
    # tf.print(tf.reduce_mean(input_image, axis=None))

    return input_image, input_mask

# def train_segmentation_reduce(input_image, input_mask):
#     '''
#     convert a (sq_size, sq_size, 22) to a (sqsize, sqsize, 3)
#     '''
#     get_class_number = tf.argmax(input_mask, axis=-1)
    
#     t_ = tf.convert_to_tensor(arr)

#     # print(get_class_number.numpy)
#     tf.print(t_)
#     # condition = tf.equal(get_class_number, tf.constant(0, dtype = tf.int64)) or \
#     #             tf.equal(get_class_number, tf.constant(1, dtype = tf.int64)) or \
#     #             tf.equal(get_class_number, tf.constant(13, dtype = tf.int64))
#     # case_true = get_class_number #
#     # case_false = 0
#     # get_class_number = tf.where(condition, case_true, case_false)
#     tf.print(get_class_number)
#     # one_hot_map = tf.one_hot(indices =get_class_number, depth=3, on_value=1, off_value=0, axis=-1, dtype=None,)

#     # condition2 = tf.equal(get_class_number, tf.constant(13, dtype = tf.int64))    
    
#     # local_colormap = ((0,0,0),(128,0,0), (192,0,128),)
#     # local_color_reference = tf.cast(tf.constant(local_colormap), dtype=tf.int64)
#     # # Load the image and obtain tensor with one-hot values
#     # comp = tf.equal(get_class_number[..., None, :], local_color_reference)
#     # one_hot_map = tf.cast(tf.reduce_all(comp, axis=-1), dtype=tf.float32)


#     # print(get_class_number.numpy)
#     # print("end")

#     return input_image, input_mask


def get_segmentation_generator(dataframe, cache = True, to_augment = True, reduced = False):
    '''
    In the exact same fashion as get_classification_generator, it returns a 
    tf generator of image and label for segmentation task. 
    This generator allows caching
    '''
    stem_filenames = dataframe["stem_filename"]
    tf_generator = tf.data.Dataset.from_tensor_slices(stem_filenames)
    
    tf_generator = tf_generator.map(parse_segmentation_function, num_parallel_calls = AUTOTUNE)
    
    # if reduced:
    #     tf_generator = tf_generator.map(train_segmentation_reduce, num_parallel_calls = AUTOTUNE)

    if isinstance(cache, str):      
        tf_generator = tf_generator.cache(cache)
    else:
        tf_generator = tf_generator.cache()
    
    if to_augment:
        tf_generator = tf_generator.map(train_segmentation_preprocess, num_parallel_calls = AUTOTUNE)

    tf_generator = tf_generator.shuffle(len(stem_filenames), seed = 426473)
    tf_generator = tf_generator.repeat()
    tf_generator = tf_generator.batch(BATCH_SIZE)
    tf_generator = tf_generator.prefetch(buffer_size = AUTOTUNE)
    return tf_generator


######Creation of the generators


In [0]:
training_set_seg_tf_generator   = get_segmentation_generator(df_seg_train_to_generate, to_augment = False)
val_set_seg_tf_generator        = get_segmentation_generator(df_seg_val_to_generate, to_augment = False)

In [0]:
VAL_SUBSPLITS = 1
BATCH_SIZE = 10 #10
BS = BATCH_SIZE
BUFFER_SIZE = 1000
STEPS_PER_EPOCH = len(df_seg_train_to_generate) // BATCH_SIZE  # TRAIN_LENGTH // BATCH_SIZE
VALIDATION_STEPS = len(df_seg_val_to_generate) // BATCH_SIZE // VAL_SUBSPLITS #VAL_LENGTH//BATCH_SIZE//VAL_SUBSPLITS

OUTPUT_CHANNELS = 20+1 #+1 # 20 classes + background + void (shoud not be, But I don't know how to tackle cleanly)

####Visualization
Heper Function to get the colors and to visualize the mask created

In [0]:
'''
Creation of a palette, and visualisation of the mask
source of the code snippet : https://github.com/shelhamer/fcn.berkeleyvision.org/blob/master/vis.py
'''
def make_palette(num_classes):
    """
    NOT USED AS COLORS HARDCODED

    Maps classes to colors in the style of PASCAL VOC.
    Close values are mapped to far colors for segmentation visualization.
    See http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html#devkit
    Takes:
        num_classes: the number of classes
    Gives:
        palette: the colormap as a k x 3 array of RGB colors
    """
    palette = np.zeros((num_classes, 3), dtype=np.uint8)
    for k in range(0, num_classes):
        label = k
        i = 0
        while label:
            palette[k, 0] |= (((label >> 0) & 1) << (7 - i))
            palette[k, 1] |= (((label >> 1) & 1) << (7 - i))
            palette[k, 2] |= (((label >> 2) & 1) << (7 - i))
            label >>= 3
            i += 1
    return palette

def color_seg(seg, palette):
    """
    Replace classes with their colors.
    Takes:
        seg: H x W segmentation image of class IDs
    Gives:
        H x W x 3 image of class colors
    """
    res = palette[seg.flat].reshape(seg.shape + (3,))
    return res

def vis_seg(img, seg, palette, alpha=0.5):
    """
    Visualize segmentation as an overlay on the image.
    Takes:
        img: H x W x 3 image in [0, 255]
        seg: H x W segmentation image of class IDs
        palette: K x 3 colormap for all classes
        alpha: opacity of the segmentation in [0, 1]
    Gives:
        H x W x 3 image with overlaid segmentation
    """
    vis = np.array(img, dtype=np.float32)
    mask = seg > 0
    vis[mask] *= 1. - alpha
    vis[mask] += alpha * palette[seg[mask].flat]
    # vis = vis.astype(np.uint8)
    return vis


def display(display_list):
    '''
        Utils function from XXX
        Allows plotting :
        | input image | the True Mask | The predicted Mask |
    '''
    plt.figure(figsize=(15, 15))
    title = ['Input Image', 'True Mask', 'Predicted Mask']

    for i in range(len(display_list)):
        plt.subplot(1, len(display_list), i+1)
        plt.title(title[i])
        plt.imshow(tf.keras.preprocessing.image.array_to_img(display_list[i]))
        plt.axis('off')
    plt.show()


def create_mask(pred_mask, ):
    '''
    transform a prediction to a an image
    '''
    if len(pred_mask.shape) == 4:
        pred_mask = tf.argmax(pred_mask, axis=-1)
        return color_seg(np.array(pred_mask[0]), np.array(colormap[0:21])/255.0)
    elif len(pred_mask.shape) == 3:
        pred_mask = tf.argmax(pred_mask, axis=-1)
        return color_seg(np.array(pred_mask), np.array(colormap[0:21])/255.0)


Sanity checks on the behavior of the previous functions

In [0]:
'''
- load images using the generators, and show them using plot_matrix (as usual)
- create corresponding mask from the one-hot encoding, and show them 
'''
# palette=make_palette(21)
# print(palette)
# plot_matrix(np.array(color_seg(np.array(pred_mask), palette)).T, color=True)
# display([color_seg(np.array(pred_mask),palette)])

print("Segmentation Training Set")
image_batch, mask_batch = next(iter(training_set_seg_tf_generator))
plot_matrix(image_batch, scale=2)
tab_mask = np.empty((32,sq_size,sq_size,3), np.float32)
i = 0
for mask_ in mask_batch:
    tab_mask[i,:] = create_mask(mask_)
    i+=1
plot_matrix(tab_mask, scale=2)
# print("dtype of image = " + str(image_val_batch[0].dtype))
# print("dtype of mask  = " + str(mask_val_batch[0].dtype))
# print("max value mask = " + str(np.max(mask_val_batch)))
# print("min value mask = " + str(np.min(mask_val_batch)))

print("Segmentation Validation Set")
image_batch, mask_batch = next(iter(val_set_seg_tf_generator))
plot_matrix(image_batch, scale=2)
tab_mask = np.empty((32,sq_size,sq_size,3), np.float32)
i = 0
for mask_ in mask_batch:
    tab_mask[i,:] = create_mask(mask_)
    i+=1
plot_matrix(tab_mask,scale=2)

In [0]:
'''
viewing the numbers; just for ppt purpose
'''

# image_val_batch, mask_val_batch = next(iter(training_set_seg_tf_generator))
# mask_ = mask_val_batch[0]
# plot_matrix(np.array([image_val_batch[0]]), scale=3)
# pred_mask = tf.argmax(mask_, axis=-1)
# print("pred_mask.shape", pred_mask.shape)
# plot_matrix(np.array([pred_mask]), color = True, scale=3)
# with np.printoptions(threshold=sys.maxsize, linewidth = 100):
#     print(pred_mask[55:60,55:64].numpy())
#     # print(mask_[55:60, 55:64, :].numpy())
# plt.figure(figsize=(10,10))
# sns.heatmap(pred_mask[55:64, 55:64], annot = True)

In [0]:
'''
segmentation part: check of the pipeline
'''
stem_filename_ = "2007_002400" #2007_000129

toy_filename_input_image = voc_root_folder + r'/JPEGImages/' + stem_filename_ + ".jpg"
toy_filename_input_mask = voc_root_folder + r'/SegmentationClass/' + stem_filename_ + ".png"

toy_input_image_string = tf.io.read_file(toy_filename_input_image)
toy_input_image = tf.image.decode_jpeg(toy_input_image_string, channels=3)

toy_input_mask_string = tf.io.read_file(toy_filename_input_mask)
toy_input_mask = tf.image.decode_png(toy_input_mask_string, channels=3)

toy_input_image = tf.image.resize(toy_input_image, [sq_size, sq_size])
toy_input_mask = tf.image.resize(toy_input_mask, [sq_size, sq_size])
img_source = tf.cast(toy_input_image, tf.float32) / 255.0 
mask_source = tf.cast(toy_input_mask, tf.int32)

display([img_source,mask_source])
one_hot_ = _one_hot_encode(tf.cast(mask_source, tf.float32))
print("one hot shape:" + str(one_hot_.shape))

# one_hot_tmp = tf.one_hot(mask_source, colormap)
# print("TensorFlow one hot: ", one_hot_tmp.shape)
# crop = one_hot_[80:87, 60:65]
# display([mask_source[80:87, 60:65]])
# print(crop)
# print("=="*30)

pred_tmp = tf.argmax(one_hot_, axis=-1)
display([img_source, mask_source, color_seg(np.array(pred_tmp), np.array(colormap[0:21]))])
print("unique classes: " + str(np.unique(pred_tmp)))

# for i,j in zip(palette, colormap):
# #     print(str(i)+ " <-> " + str(j))
# print( ('bird',) in list(df_seg_val_to_generate["class"]))
# print( "2007_002400" in list(df_seg_val_to_generate["stem_filename"]))

##Definition of loss and score


The class MyScore gathers some metrics interessant in the context of image segmentation 
- dice score and loss
- generalized dice score
- twersky loss

Not all are used, but are left in the notebook

In [0]:
class MyScore():


    @staticmethod
    def iou_coef(y_true, y_pred):
        '''
        classical intersection over union idea
        '''
        # axis = 1,2,3 ---> per image
        smooth = 1
        intersection = tf.keras.backend.sum(tf.keras.backend.abs(y_true * y_pred), axis = [1,2,3])
        union        = tf.keras.backend.sum(y_true, axis = [1,2,3]) + tf.keras.backend.sum(y_pred, axis = [1,2,3]) - intersection
        iou = tf.keras.backend.mean((intersection + smooth)/ (union + smooth) , axis = 0)
        return iou
    
    @staticmethod
    def dice_loss(y_true, y_pred):
        '''
        As refered to in the litterature, as in 
        https://lars76.github.io/neural-networks/object-detection/losses-for-segmentation/

        '''
        return 1 - MyScore.dice_score(y_true, y_pred)
    
    @staticmethod
    def dice_score(y_true, y_pred):
        name_of_class = y_true.__class__.__name__
        if not ("Tensor" in name_of_class):
            y_true = tf.convert_to_tensor(y_true, dtype=tf.float32)
            y_pred = tf.convert_to_tensor(y_pred, dtype=tf.float32)

        eps = 1e-7
        numerator = 2 * tf.reduce_sum(y_true * y_pred, axis = (0,1,2)) + eps
        denominator = tf.reduce_sum(y_true + y_pred, axis = (0,1,2)) + eps
        return (numerator / denominator)
    
    @staticmethod
    def gen_dice(y_true, y_pred):
        '''
        both tensors are [b, h, w, classes] and y_pred is in logit form
        '''
        eps=1e-7

        # [b, h, w, classes]
        pred_tensor = tf.nn.softmax(y_pred)
        y_true_shape = tf.shape(y_true)

        # [b, h*w, classes]
        y_true = tf.reshape(y_true, [-1, y_true_shape[0]*y_true_shape[1], y_true_shape[2]])
        y_pred = tf.reshape(pred_tensor, [-1, y_true_shape[0]*y_true_shape[1], y_true_shape[2]])

        # [b, classes]
        # count how many of each class are present in 
        # each image, if there are zero, then assign
        # them a fixed weight of eps
        counts = tf.reduce_sum(y_true, axis=0)
        weights = 1. / (counts ** 2)
        weights = tf.where(tf.math.is_finite(weights), weights, eps)

        multed = tf.reduce_sum(y_true * y_pred, axis=0)
        summed = tf.reduce_sum(y_true + y_pred, axis=0)

        # [b]
        numerators = tf.reduce_sum(weights*multed, axis=-1)
        denom = tf.reduce_sum(weights*summed, axis=-1)
        dices = 1. - 2. * numerators / denom
        dices = tf.where(tf.math.is_finite(dices), dices, tf.zeros_like(dices))
        return tf.reduce_mean(dices)


    # Ref: salehi17, "Twersky loss function for image segmentation using 3D FCDN"
    # -> the score is computed for each class separately and then summed
    # alpha=beta=0.5 : dice coefficient
    # alpha=beta=1   : tanimoto coefficient (also known as jaccard)
    # alpha+beta=1   : produces set of F*-scores
    # implemented by E. Moebel, 06/04/18
    @staticmethod
    def tversky_loss(y_true, y_pred):
        name_of_class = y_true.__class__.__name__
        print(name_of_class)
        if not ("Tensor" in name_of_class):
            y_true = tf.convert_to_tensor(y_true, dtype=tf.float32)
            y_pred = tf.convert_to_tensor(y_pred, dtype=tf.float32)

        alpha = 0.5
        beta  = 0.5
        
        ones = tf.keras.backend.ones(tf.keras.backend.shape(y_true))
        p0 = y_pred      # proba that voxels are class i
        p1 = ones-y_pred # proba that voxels are not class i
        g0 = y_true
        g1 = ones-y_true
        
        num = tf.keras.backend.sum(p0*g0, (0,1,2))
        den = num + alpha*tf.keras.backend.sum(p0*g1,(0,1,2)) + beta*tf.keras.backend.sum(p1*g0,(0,1,2))
        T = tf.keras.backend.sum(num/den) # when summing over classes, T has dynamic range [0 Ncl]
        
        Ncl = tf.keras.backend.cast(tf.keras.backend.shape(y_true)[-1], 'float32')
        return Ncl-T


In [0]:
'''
https://github.com/tensorflow/tensorflow/issues/32875 
'''
class MyMeanIoU(tf.keras.metrics.MeanIoU):
    '''
    Attempt to use built-in MeanIoU, but seeting the weight of the VOID class to 0
    '''
    def __call__(self, y_true, y_pred, sample_weight=None):
        # y_pred = tf.argmax(y_pred, axis=-1)
        weights_ = np.array([1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0])
        return super().__call__(y_true, y_pred, sample_weight=weights_)



Additional Loss function
Attempt to have a loss weighted per class

In [0]:
class CategoricalCrossentropyWeightedPerClass(tf.keras.losses.CategoricalCrossentropy):
    def __init__(self, weights):
        super(CategoricalCrossentropyWeightedPerClass, self).__init__()
        self._weights = weights # tf.convert_to_tensor(np.array([BS, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0]))
    
    def __call__(self, y_true, y_pred, sample_weight):
        # tf.print(y_true)
        # tf.print("y_true shape = " + str(y_true.shape))
        # tf.print("y_pred shape = " + str(y_pred.shape))

        return super().__call__(y_true, y_pred, sample_weight = self._weights)


####Computing (inverse) weight of class

In [0]:
'''
class weights
'''
# totalPixels = 0
# counts = np.zeros((21,))
# generator = training_set_seg_tf_generator
# batches_per_epoch = len(df_seg_train_to_generate) // BS

# for i in range(batches_per_epoch):
#     x, y = next(iter(generator))
#     # plot_matrix(x)
#     shp = y.shape
#     totalPixels += shp[0] * shp[1] * shp[2] 
#     counts = counts + np.sum(y, axis=(0,1,2))

# print("total number of pixels: " + str(totalPixels))
# print("counts: " + str(counts))

# weights = totalPixels / (counts + 1e-8)
# print("weights before normalization: " + str(weights))

# weights = weights / np.sum(weights)
# print("weights after normalization: " + str(weights))


weights_before_normalization = np.array([  1.37158705, 125.5154789,  346.32864165,  97.87471169, 188.29581472,
 129.08625047,  63.33267949,  66.69344206,  78.27472078, 117.4650739,
 175.19801069,  76.89171447, 216.99631816, 134.38962242,  70.32948383,
  25.88651142, 154.05779723, 123.93974291, 125.51081184,  83.77931862,
  95.39310543])
weights_after_normalization = np.array([0.00054938, 0.05027435, 0.13871951, 0.03920303, 0.07542057, 0.05170459,
 0.02536746, 0.02671359, 0.03135239, 0.04704981, 0.07017434, 0.03079844,
 0.08691636, 0.05382882, 0.02816998, 0.01036866, 0.06170677, 0.0496432,
 0.05027248, 0.03355722, 0.03820904])
weights = weights_after_normalization.reshape((1,1,1,21))
kWeights = tf.keras.backend.constant(weights)

'''
Attempt of weighted categorical cross entropy
'''
def weighted_cce(y_true, y_pred):
    yWeights = kWeights * y_pred         #shape (batch, 128, 128, 4)
    yWeights = tf.keras.backend.sum(yWeights, axis=-1)  #shape (batch, 128, 128)  

    yWeights = yWeights / tf.keras.backend.sum(yWeights)

    loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred) #shape (batch, 128, 128)
    wLoss = yWeights * loss

    return tf.keras.backend.sum(wLoss, axis=(1,2))



####Toy Example

In [0]:
'''
toy example
3 1 1 3
1 2 2 1
1 2 2 1
3 1 1 3 
'''
channel1 = np.array([[0,1,1,0],[1,0,0,1],[1,0,0,1],[0,1,1,0]], dtype = np.float32)
channel1pred = np.array([[0.2,0.6,0.6,0.2],[0.6,0.2,0.2,0.6],[0.6,0.2,0.2,0.6],[0.2,0.6,0.6,0.2]], dtype = np.float32)

channel2 = np.array([[0,0,0,0],[0,1,1,0],[0,1,1,0],[0,0,0,0]], dtype = np.float32)
channel2pred = np.array([[0.2,0.2,0.2,0.2],[0.2,0.6,0.6,0.2],[0.2,0.6,0.6,0.2],[0.2,0.2,0.2,0.2]], dtype = np.float32)

channel3 = np.array([[1,0,0,1],[0,0,0,0],[0,0,0,0],[1,0,0,1]], dtype = np.float32)
channel3pred = np.array([[0.6,0.2,0.2,0.6],[0.2,0.2,0.2,0.2],[0.2,0.2,0.2,0.2],[0.6,0.2,0.2,0.6]], dtype = np.float32)
t_ = np.stack([channel1[0:2,0:2], channel2[0:2,0:2], channel3[0:2,0:2]], axis = -1)
pred_ = np.stack([channel1pred[0:2,0:2], channel2pred[0:2,0:2], channel3pred[0:2,0:2]], axis = -1)
mask_input = t_.copy()
# print("Mask input shape", mask_input.shape)
 
mask_pred1 = mask_input.copy()
# print("mask_pred1 shape", mask_pred1.shape)
print("Keras binary cross entropy (same input): ", tf.keras.losses.binary_crossentropy(mask_input, mask_pred1))
mask_pred2 = pred_.copy()
print("Keras binary cross entropy (pred):\n", tf.keras.losses.binary_crossentropy(mask_input, mask_pred2))
print(tf.keras.backend.sum(tf.keras.losses.binary_crossentropy(mask_input, mask_pred2)))
print("Keras categorical cross entropy (pred):\n ", tf.keras.losses.categorical_crossentropy(mask_input, mask_pred2))
print(tf.keras.backend.sum(tf.keras.losses.categorical_crossentropy(mask_input, mask_pred2)))

print("MyScore Dice loss(same input):\n", MyScore.dice_loss(mask_input, mask_pred1))
print("MyScore Dice loss(pred):\n", MyScore.dice_loss(mask_input, mask_pred2))
print("MyScore Gen Dice (same input):\n", MyScore.gen_dice(mask_input, mask_pred1))
print("MyScore Gen Dice (pred):\n", MyScore.gen_dice(mask_input, mask_pred2))

# print("MyScore tversky_loss:\n", MyScore.tversky_loss(mask_input, mask_pred2))


# cce = tf.keras.losses.CategoricalCrossentropy()
# loss = cce(mask_input,mask_pred2)
# print('CCE Loss: ', loss.numpy())  # Loss: 0.5108256



##Additional Callbacks
 


Function to see prediction (can be used in other context than callbacks)

In [0]:
def show_predictions(model, dataset=None):
    '''
    Used the model given to predict the segmentation of the image taken from the dataset given, if any. 
    Else, the image is taken from the validation set (using tf.Data API generators created)

    It uses the display function to plot [ original | target_mask | predicted_mask ]
    '''
    if not(dataset is None):
        # print(next(iter(dataset)))
        image_batch, mask_batch = next(iter(dataset))
        image=image_batch[0]
        mask = color_seg(np.array(tf.argmax(mask_batch[0], axis = -1)),np.array(colormap))
        pred_mask = model.predict(image[tf.newaxis, ...])
        display([image, mask, create_mask(pred_mask)])
    else:
        image_train_batch, mask_train_batch = next(iter(val_set_seg_tf_generator))
        sample_image = image_train_batch[0]
        # print(mask_train_batch[0].shape)
        sample_mask = color_seg(np.array(tf.argmax(mask_train_batch[0], axis = -1)),np.array(colormap))
        pred_mask = model.predict(sample_image[tf.newaxis, ...])
        display([sample_image, sample_mask, create_mask(pred_mask)])



In [0]:
from IPython.display import clear_output

class DisplayCallback(tf.keras.callbacks.Callback):

    def __init__(self, model_t):
        super(DisplayCallback, self).__init__()
        self.model_t = model_t

    def on_epoch_end(self, epoch, logs=None):
        # clear_output(wait=True)
        show_predictions(model = self.model_t)
        print ('\nSample Prediction after epoch {}\n'.format(epoch+1))



##Segmentation using TL

#####Base Model

First, get the base model, and select a few layers
-> they will be used for skip connections

In [0]:
'''
Get Backbone - MobileNetv2
'''

base_model = tf.keras.applications.MobileNetV2(input_shape=[sq_size, sq_size, 3], include_top=False)

'''
Create the encoding part model thanks to MobileNetv2
'''


# Use the activations of these layers
layer_names = [
    'block_1_expand_relu',   # 64x64
    'block_3_expand_relu',   # 32x32
    'block_6_expand_relu',   # 16x16
    'block_13_expand_relu',  # 8x8
    'block_16_project',      # 4x4
]
layers_list = [base_model.get_layer(name).output for name in layer_names]

'''
create a model based on those activations, and make in Not Trainable
'''
# Create the feature extraction model
down_stack = tf.keras.Model(inputs=base_model.input, outputs=layers_list)

'''
make it not trainable!
'''
down_stack.trainable = False

#####Upsampling part

In [0]:
'''
Helper function from 
https://github.com/tensorflow/examples/blob/master/tensorflow_examples/models/pix2pix/pix2pix.py 
'''

def upsample(filters, size, norm_type='batchnorm', apply_dropout=False):
  """Upsamples an input.
  Conv2DTranspose => Batchnorm => Dropout => Relu
  Args:
    filters: number of filters
    size: filter size
    norm_type: Normalization type; either 'batchnorm' or 'instancenorm'.
    apply_dropout: If True, adds the dropout layer
  Returns:
    Upsample Sequential Model
  """

  initializer = tf.random_normal_initializer(0., 0.02)

  result = tf.keras.Sequential()
  result.add(
      tf.keras.layers.Conv2DTranspose(filters, size, strides=2,
                                      padding='same',
                                      kernel_initializer=initializer,
                                      use_bias=False))

  if norm_type.lower() == 'batchnorm':
    result.add(tf.keras.layers.BatchNormalization())
  elif norm_type.lower() == 'instancenorm':
    result.add(InstanceNormalization())

  if apply_dropout:
    result.add(tf.keras.layers.Dropout(0.5))

  result.add(tf.keras.layers.ReLU())

  return result

establishing the upsampling stack (no skip at this point)

In [0]:
'''
Defining the layers for the decoding part (upsampling)
'''
up_stack = [
            upsample(512, 3),  # 4x4 -> 8x8
            upsample(256, 3),  # 8x8 -> 16x16
            upsample(128, 3),  # 16x16 -> 32x32
            upsample(64, 3),   # 32x32 -> 64x64
            ]

####U-Net
(using skip connections)

In [0]:
'''
Definition of U-Net model, with skip connections
'''
def unet_model(output_channels):
    inputs = tf.keras.layers.Input(shape=[sq_size, sq_size, 3])
    x = inputs

    # Downsampling through the model
    skips = down_stack(x)
    x = skips[-1]

    # create a generator of the layers in skips ==> in downstack, in the reverse order, until last-but-one
    skips = reversed(skips[:-1])

    # Upsampling and establishing the skip connections
    for up, skip in zip(up_stack, skips):
        x = up(x)
        concat = tf.keras.layers.Concatenate()
        x = concat([x, skip])

    # This is the last layer of the model
    last = tf.keras.layers.Conv2DTranspose(
        output_channels, 3, strides=2,
        padding='same',
        activation="softmax")  #64x64 -> 128x128

    x = last(x)
    model = tf.keras.Model(inputs=inputs, outputs=x)
    # model.trainable = False
    return model

####Compilation

In [0]:
'''
Build the model
'''
model_seg_tl = unet_model(OUTPUT_CHANNELS)

In [0]:
'''
Compilation 
'''

# meanIoU = tf.keras.metrics.MeanIoU(num_classes = 22)
# my_meanIoU = MyMeanIoU(num_classes = 22)
model_seg_tl.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
              loss= tf.keras.losses.categorical_crossentropy,#MyScore.dice_loss, 
              metrics= [MyScore.iou_coef, MyScore.dice_score] ) #MyScore.dice_score

'''
Plot the model
'''
tf.keras.utils.plot_model(model_seg_tl, show_shapes=True)
model_seg_tl.summary()

We know have the U-Net model (with skip connections).
We can plot a prediction on a image from the validation set

In [0]:
'''
Using show_predictions BEFORE training
'''
show_predictions(model_seg_tl)

####Training

In [0]:
# EPOCHS = 100
if train_seg_model_tl:
    history_model_seg_tl = model_seg_tl.fit(training_set_seg_tf_generator, 
                            epochs=EPOCHS,
                            steps_per_epoch= len(df_seg_train_to_generate)//BS, #STEPS_PER_EPOCH,
                            validation_steps= len(df_seg_val_to_generate)//BS, #VALIDATION_STEPS,
                            validation_data=val_set_seg_tf_generator,
                            callbacks=[my_callbacks] #DisplayCallback(model_seg_tl), 
                            )
    model_seg_tl.save_weights(model_seg_tl_save_filename)
    histories['model_seg_tl'] = history_model_seg_tl.history
    pickle.dump(histories["model_seg_tl"], open(model_seg_tl_hist_save_filename, 'wb'))
else:
    model_seg_tl.load_weights(model_seg_tl_save_filename)
    histories['model_seg_tl'] = pickle.load(open(model_seg_tl_hist_save_filename, 'rb'))

In [0]:
try:
    fig, axes = plt.subplots(2,1,figsize=(8,8))
    axes[0].plot(histories["model_seg_tl"]['iou_coef'], label='IoU')
    axes[0].plot(histories["model_seg_tl"]['val_iou_coef'], label = 'val_IoU')
    axes[0].plot(histories["model_seg_tl"]['dice_score'], label='Dice')
    axes[0].plot(histories["model_seg_tl"]['val_dice_score'], label = 'val_Dice')
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Metric')
    axes[0].set_ylim([0.0, 1])
    axes[0].legend(loc='upper right')
    axes[1].plot(histories["model_seg_tl"]['loss'], label='loss')
    axes[1].plot(histories["model_seg_tl"]['val_loss'], label = 'val_loss')
    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('Categorical Cross-entropy')
    # axes[1].set_ylim([0.0, 1])
    axes[1].legend(loc='upper right')
    fig.suptitle('Model Sementation TL')
except:
    print("There was an error during the plot")
finally:
    scores = model_seg_tl.evaluate(val_set_seg_tf_generator, steps=2*len(df_seg_train_to_generate["stem_filename"])//BS )

####Results
Example of results

In [0]:
# next(iter(val_set_seg_tf_generator))
'''
using show_predictions
'''
print("Using show_predictions:")
show_predictions(model = model_seg_tl, dataset = val_set_seg_tf_generator)


print("Manually:")
image_train_batch, mask_train_batch = next(iter(val_set_seg_tf_generator))
sample_image = image_train_batch[3]
sample_mask = color_seg(np.array(tf.argmax(mask_train_batch[3], axis = -1)),np.array(colormap[0:21]))
pred_mask = model_seg_tl.predict(sample_image[tf.newaxis, ...])
display([sample_image, sample_mask, create_mask(pred_mask)])



# print(len(pred_mask[0,...]))
# first_layer = pred_mask[0,...]
# print(first_layer.shape)
# print(np.round(first_layer[50:60, 50:60, 0],1))
# print(np.round(first_layer[50:60, 50:60, 3],1))
# print(np.round(first_layer[64,64,0:22],1))


# print(len(np.where(np.round(pred_mask[0,:,:,0]) == 1)))
# print(len(np.where(np.round(pred_mask[0,:,:,1]) == 1)))
# print(len(np.where(np.round(pred_mask[0,:,:,2]) == 1)))
# print(len(np.where(np.round(pred_mask[0,:,:,3]) == 1)))
# print(len(np.where(np.round(pred_mask[0,:,:,4]) == 1)))
# print(len(np.where(np.round(pred_mask[0,:,:,5]) == 1)))

####Sandbox metrics and losses

In [0]:
# image_train_batch, mask_train_batch = next(iter(val_set_seg_tf_generator))
# sample_image = image_train_batch[5]
# # print(mask_train_batch[0].shape)
# sample_mask = color_seg(np.array(tf.argmax(mask_train_batch[5], axis = -1)),np.array(colormap))
# pred_mask = model_seg_tl.predict(sample_image[tf.newaxis, ...])
# display([sample_image, sample_mask, create_mask(pred_mask)])
# print(tf.shape(mask_train_batch[5]))
# metric_0 = tf.reduce_mean(tf.keras.metrics.categorical_crossentropy(mask_train_batch[5][tf.newaxis, ...], pred_mask))
# print(metric_0)
# loss_0 = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(mask_train_batch[5][tf.newaxis, ...], pred_mask))
# print(loss_0)

# metric_1 = tf.reduce_mean(MyScore.dice_score(mask_train_batch[5][tf.newaxis, ...], pred_mask))
# print(metric_1)
# metric_2 = Trial.dice_coe(mask_train_batch[5][tf.newaxis, ...], pred_mask)
# print(metric_2)
# metric_3 = Trial.dice_hard_coe(mask_train_batch[5][tf.newaxis, ...], pred_mask)
# print(metric_3)

# reference_mask = mask_train_batch[5][tf.newaxis, ...]
# predicted_mask = reference_mask
# metric_1 = tf.reduce_mean(MyScore.dice_score(reference_mask, predicted_mask))
# print(metric_1)
# metric_2 = Trial.dice_coe(reference_mask, predicted_mask)
# print(metric_2)
# metric_3 = Trial.dice_hard_coe(reference_mask, predicted_mask)
# print(metric_3)
# '''
# BUG
# '''
# # reference_mask = tf.convert_to_tensor(mask_input)
# # predicted_mask = tf.convert_to_tensor(mask_pred1)
# # metric_1 = tf.reduce_mean(MyScore.dice_score(reference_mask, predicted_mask))
# # print(metric_1)
# # metric_2 = Trial.dice_coe(reference_mask, predicted_mask)
# # print(metric_2)
# # metric_3 = Trial.dice_hard_coe(reference_mask, predicted_mask)
# # print(metric_3)


# # reference_mask = tf.convert_to_tensor(mask_input)
# # predicted_mask = tf.convert_to_tensor(mask_pred2)
# # metric_1 = tf.reduce_mean(MyScore.dice_score(reference_mask, predicted_mask))
# # print(metric_1)
# # metric_2 = Trial.dice_coe(reference_mask, predicted_mask)
# # print(metric_2)
# # metric_3 = Trial.dice_hard_coe(reference_mask, predicted_mask)
# # print(metric_3)

In [0]:
# from tensorflow.python.framework import ops
# from tensorflow.python.ops import array_ops, math_ops, nn_ops, standard_ops

# # from tensorlayer import logging

# class Trial():

#     @staticmethod
#     def cross_entropy(output, target, name=None):
#         """
#         Softmax cross-entropy operation, returns the TensorFlow expression of cross-entropy for two distributions,
#         it implements softmax internally. See ``tf.nn.sparse_softmax_cross_entropy_with_logits``.
#         Parameters
#         ----------
#         output : Tensor
#             A batch of distribution with shape: [batch_size, num of classes].
#         target : Tensor
#             A batch of index with shape: [batch_size, ].
#         name : string
#             Name of this loss.

#         Examples
#         --------
#         >>> import tensorlayer as tl
#         >>> ce = tl.cost.cross_entropy(y_logits, y_target_logits, 'my_loss')    
#         References
#         -----------
#         - About cross-entropy: `<https://en.wikipedia.org/wiki/Cross_entropy>`__.
#         - The code is borrowed from: `<https://en.wikipedia.org/wiki/Cross_entropy>`__.
#         """
#         return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target, logits=output), name=name)



#     @staticmethod
#     def sigmoid_cross_entropy(output, target, name=None):
#         """Sigmoid cross-entropy operation, see ``tf.nn.sigmoid_cross_entropy_with_logits``.

#         Parameters
#         ----------
#         output : Tensor
#             A batch of distribution with shape: [batch_size, num of classes].
#         target : Tensor
#             A batch of index with shape: [batch_size, ].
#         name : string
#             Name of this loss.

#         """
#         return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output), name=name)


#     @staticmethod
#     def binary_cross_entropy(output, target, epsilon=1e-8, name='bce_loss'):
#         """Binary cross entropy operation.

#         Parameters
#         ----------
#         output : Tensor
#             Tensor with type of `float32` or `float64`.
#         target : Tensor
#             The target distribution, format the same with `output`.
#         epsilon : float
#             A small value to avoid output to be zero.
#         name : str
#             An optional name to attach to this function.

#         References
#         -----------
#         - `ericjang-DRAW <https://github.com/ericjang/draw/blob/master/draw.py#L73>`__

#         """
#         #     with ops.op_scope([output, target], name, "bce_loss") as name:
#         #         output = ops.convert_to_tensor(output, name="preds")
#         #         target = ops.convert_to_tensor(targets, name="target")

#         # with tf.name_scope(name):
#         return tf.reduce_mean(
#             tf.reduce_sum(
#                 -(target * tf.math.log(output + epsilon) + (1. - target) * tf.math.log(1. - output + epsilon)), axis=1
#             ), name=name
#         )


#     # For brevity, let `x = output`, `z = target`.  The binary cross entropy loss is
#     #
#     #     loss(x, z) = - sum_i (x[i] * log(z[i]) + (1 - x[i]) * log(1 - z[i]))

#     @staticmethod
#     def mean_squared_error(output, target, is_mean=False, axis=-1, name="mean_squared_error"):
#         """Return the TensorFlow expression of mean-square-error (L2) of two batch of data.

#         Parameters
#         ----------
#         output : Tensor
#             2D, 3D or 4D tensor i.e. [batch_size, n_feature], [batch_size, height, width] or [batch_size, height, width, channel].
#         target : Tensor
#             The target distribution, format the same with `output`.
#         is_mean : boolean
#             Whether compute the mean or sum for each example.
#                 - If True, use ``tf.reduce_mean`` to compute the loss between one target and predict data.
#                 - If False, use ``tf.reduce_sum`` (default).
#         axis : int or list of int
#             The dimensions to reduce.
#         name : str
#             An optional name to attach to this function.

#         References
#         ------------
#         - `Wiki Mean Squared Error <https://en.wikipedia.org/wiki/Mean_squared_error>`__

#         """
#         # with tf.name_scope(name):
#         # if len(output.shape) == 2:  # [batch_size, n_feature]
#         #     axis = 1
#         # elif len(output.shape) == 3:  # [batch_size, w, h]
#         #     axis = [1, 2]
#         # elif len(output.shape) == 4:  # [batch_size, w, h, c]
#         #     axis = [1, 2, 3]
#         # else:
#         #     raise Exception("Unknow dimension")

#         if is_mean:
#             mse = tf.reduce_mean(tf.reduce_mean(tf.math.squared_difference(output, target), axis), name=name)
#         else:
#             mse = tf.reduce_mean(tf.reduce_sum(tf.math.squared_difference(output, target), axis), name=name)
#         return mse


#     @staticmethod
#     def normalized_mean_square_error(output, target, axis=-1, name="normalized_mean_squared_error_loss"):
#         """Return the TensorFlow expression of normalized mean-square-error of two distributions.

#         Parameters
#         ----------
#         output : Tensor
#             2D, 3D or 4D tensor i.e. [batch_size, n_feature], [batch_size, height, width] or [batch_size, height, width, channel].
#         target : Tensor
#             The target distribution, format the same with `output`.
#         axis : int or list of int
#             The dimensions to reduce.
#         name : str
#             An optional name to attach to this function.

#         """
#         with tf.name_scope("normalized_mean_squared_error_loss"):
#             # if len(output.shape) == 2:  # [batch_size, n_feature]
#             #     axis = 1
#             # elif len(output.shape) == 3:  # [batch_size, w, h]
#             #     axis = [1, 2]
#             # elif len(output.shape) == 4:  # [batch_size, w, h, c]
#             #     axis = [1, 2, 3]
#             nmse_a = tf.sqrt(tf.reduce_sum(tf.math.squared_difference(output, target), axis=axis))
#             nmse_b = tf.sqrt(tf.reduce_sum(tf.square(target), axis=axis))
#             nmse = tf.reduce_mean(nmse_a / nmse_b, name=name)
#         return nmse


#     @staticmethod
#     def absolute_difference_error(output, target, is_mean=False, axis=-1, name="absolute_difference_error_loss"):
#         """Return the TensorFlow expression of absolute difference error (L1) of two batch of data.

#         Parameters
#         ----------
#         output : Tensor
#             2D, 3D or 4D tensor i.e. [batch_size, n_feature], [batch_size, height, width] or [batch_size, height, width, channel].
#         target : Tensor
#             The target distribution, format the same with `output`.
#         is_mean : boolean
#             Whether compute the mean or sum for each example.
#                 - If True, use ``tf.reduce_mean`` to compute the loss between one target and predict data.
#                 - If False, use ``tf.reduce_sum`` (default).
#         axis : int or list of int
#             The dimensions to reduce.
#         name : str
#             An optional name to attach to this function.

#         """
#         # # with tf.name_scope("absolute_difference_error_loss"):
#         # if len(output.shape) == 2:  # [batch_size, n_feature]
#         #     axis = 1
#         # elif len(output.shape) == 3:  # [batch_size, w, h]
#         #     axis = [1, 2]
#         # elif len(output.shape) == 4:  # [batch_size, w, h, c]
#         #     axis = [1, 2, 3]
#         # else:
#         #     raise Exception("Unknow dimension")
#         if is_mean:
#             loss = tf.reduce_mean(tf.reduce_mean(tf.abs(output - target), axis), name=name)
#         else:
#             loss = tf.reduce_mean(tf.reduce_sum(tf.abs(output - target), axis), name=name)
#         return loss


#     @staticmethod
#     def dice_coe(output, target, loss_type='jaccard', axis=(1, 2, 3), smooth=1e-5):
#         """Soft dice (Sørensen or Jaccard) coefficient for comparing the similarity
#         of two batch of data, usually be used for binary image segmentation
#         i.e. labels are binary. The coefficient between 0 to 1, 1 means totally match.

#         Parameters
#         -----------
#         output : Tensor
#             A distribution with shape: [batch_size, ....], (any dimensions).
#         target : Tensor
#             The target distribution, format the same with `output`.
#         loss_type : str
#             ``jaccard`` or ``sorensen``, default is ``jaccard``.
#         axis : tuple of int
#             All dimensions are reduced, default ``[1,2,3]``.
#         smooth : float
#             This small value will be added to the numerator and denominator.
#                 - If both output and target are empty, it makes sure dice is 1.
#                 - If either output or target are empty (all pixels are background), dice = ```smooth/(small_value + smooth)``, then if smooth is very small, dice close to 0 (even the image values lower than the threshold), so in this case, higher smooth can have a higher dice.

#         Examples
#         ---------
#         >>> import tensorlayer as tl
#         >>> outputs = tl.act.pixel_wise_softmax(outputs)
#         >>> dice_loss = 1 - tl.cost.dice_coe(outputs, y_)

#         References
#         -----------
#         - `Wiki-Dice <https://en.wikipedia.org/wiki/Sørensen–Dice_coefficient>`__

#         """
#         name_of_class = target.__class__.__name__
#         if not ("Tensor" in name_of_class):
#             target = tf.convert_to_tensor(target, dtype=tf.float32)
#             output = tf.convert_to_tensor(output, dtype=tf.float32)


#         inse = tf.reduce_sum(output * target, axis=axis)
#         if loss_type == 'jaccard':
#             l = tf.reduce_sum(output * output, axis=axis)
#             r = tf.reduce_sum(target * target, axis=axis)
#         elif loss_type == 'sorensen':
#             l = tf.reduce_sum(output, axis=axis)
#             r = tf.reduce_sum(target, axis=axis)
#         else:
#             raise Exception("Unknow loss_type")
#         # old axis=[0,1,2,3]
#         # dice = 2 * (inse) / (l + r)
#         # epsilon = 1e-5
#         # dice = tf.clip_by_value(dice, 0, 1.0-epsilon) # if all empty, dice = 1
#         # new haodong
#         dice = (2. * inse + smooth) / (l + r + smooth)
#         ##
#         dice = tf.reduce_mean(dice, name='dice_coe')
#         return dice


#     @staticmethod
#     def dice_hard_coe(output, target, threshold=0.5, axis=(1, 2, 3), smooth=1e-5):
#         """Non-differentiable Sørensen–Dice coefficient for comparing the similarity
#         of two batch of data, usually be used for binary image segmentation i.e. labels are binary.
#         The coefficient between 0 to 1, 1 if totally match.

#         Parameters
#         -----------
#         output : tensor
#             A distribution with shape: [batch_size, ....], (any dimensions).
#         target : tensor
#             The target distribution, format the same with `output`.
#         threshold : float
#             The threshold value to be true.
#         axis : tuple of integer
#             All dimensions are reduced, default ``(1,2,3)``.
#         smooth : float
#             This small value will be added to the numerator and denominator, see ``dice_coe``.

#         References
#         -----------
#         - `Wiki-Dice <https://en.wikipedia.org/wiki/Sørensen–Dice_coefficient>`__

#         """
#         name_of_class = target.__class__.__name__
#         if not ("Tensor" in name_of_class):
#             target = tf.convert_to_tensor(target, dtype=tf.float32)
#             output = tf.convert_to_tensor(output, dtype=tf.float32)
            
#         output = tf.cast(output > threshold, dtype=tf.float32)
#         target = tf.cast(target > threshold, dtype=tf.float32)
#         inse = tf.reduce_sum(tf.multiply(output, target), axis=axis)
#         l = tf.reduce_sum(output, axis=axis)
#         r = tf.reduce_sum(target, axis=axis)
#         # old axis=[0,1,2,3]
#         # hard_dice = 2 * (inse) / (l + r)
#         # epsilon = 1e-5
#         # hard_dice = tf.clip_by_value(hard_dice, 0, 1.0-epsilon)
#         # new haodong
#         hard_dice = (2. * inse + smooth) / (l + r + smooth)
#         ##
#         hard_dice = tf.reduce_mean(hard_dice, name='hard_dice')
#         return hard_dice


#     @staticmethod
#     def iou_coe(output, target, threshold=0.5, axis=(1, 2, 3), smooth=1e-5):
#         """Non-differentiable Intersection over Union (IoU) for comparing the
#         similarity of two batch of data, usually be used for evaluating binary image segmentation.
#         The coefficient between 0 to 1, and 1 means totally match.

#         Parameters
#         -----------
#         output : tensor
#             A batch of distribution with shape: [batch_size, ....], (any dimensions).
#         target : tensor
#             The target distribution, format the same with `output`.
#         threshold : float
#             The threshold value to be true.
#         axis : tuple of integer
#             All dimensions are reduced, default ``(1,2,3)``.
#         smooth : float
#             This small value will be added to the numerator and denominator, see ``dice_coe``.

#         Notes
#         ------
#         - IoU cannot be used as training loss, people usually use dice coefficient for training, IoU and hard-dice for evaluating.

#         """
#         pre = tf.cast(output > threshold, dtype=tf.float32)
#         truth = tf.cast(target > threshold, dtype=tf.float32)
#         inse = tf.reduce_sum(tf.multiply(pre, truth), axis=axis)  # AND
#         union = tf.reduce_sum(tf.cast(tf.add(pre, truth) >= 1, dtype=tf.float32), axis=axis)  # OR
#         # old axis=[0,1,2,3]
#         # epsilon = 1e-5
#         # batch_iou = inse / (union + epsilon)
#         # new haodong
#         batch_iou = (inse + smooth) / (union + smooth)
#         iou = tf.reduce_mean(batch_iou, name='iou_coe')
#         return iou  # , pre, truth, inse, union



# ## test soft/hard dice and iou
# # import numpy as np
# y = np.zeros((1,10,10,1))
# # y[0,0:5,0:5]=1.0
# o = np.zeros((1,10,10,1))
# # o[:,:,:,:] = 0            # what we want: dice=0   iou=0  OK
# # o[0,0:2,0:2]=0.3          # what we want: dice larger iou=0  OK
# # o[0,0:2,0:2]=0.6          # what we want: dice larger  iou small  OK
# # o[0,0:3,0:3]=0.6          # what we want: dice larger iou larger OK
# # o[0,0:3,0:3]=1            # what we want: dice larger iou same OK
# # o[0,0:5,0:5]=1            # what we want: dice=1 iou=1  OK
# # o[0,0:5,0:5]=0.3          # what we want: dice smaller  iou=0  OK
# # o[0,0:5,0:5]=1e-2           # what we want: dice≈0 iou=0  OK
# # o[0,8:10,8:10]=1.0        # what we want: dice=0 iou=0  OK
# # o[0,8:10,8:10]=1e-10        # what we want: dice=0 iou=0  OK
# # y[:,:,:,:] = o[:,:,:,:] = 0 # what we want: dice=1 iou=1  OK
# ## why in u-net, dice=1 hard-dice=1 iou=1 exist?? print bug?

# d = Trial.dice_coe(o, y, 'jaccard', smooth=1.)
# hd = Trial.dice_hard_coe(o, y, smooth=1e-5)
# i = Trial.iou_coe(o, y, smooth=1e-5)
# # sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
# # sess.run(tf.local_variables_initializer())
# # print(sess.run([d,hd,i]))
# # p, t, i, u = sess.run([pre, truth, inse, union])
# # import pprint
# # pprint.pprint(((y>0.5)*(o>0.5)).astype(int).tolist())
# # pprint.pprint(p.tolist())
# # pprint.pprint(t.tolist())
# # pprint.pprint(i)
# # pprint.pprint(u)
# # exit()
# print(d)
# print(hd)
# print(i)


##Segmentation from scratch
here, we build a complete network from scratch. 
The code is based on [this tutorial](https://github.com/tensorflow/examples/blob/master/community/en/ImageSegmentation_ModelSubclassing.ipynb) 

First we define encoding block and decoding block. Then, we build the encoding-decoding version, using those base blocks.

####Encoder Block

In [0]:
class EncoderBlock(tf.keras.Model):
    def __init__(self, filter_size):
        super(EncoderBlock, self).__init__()
        self.filter_size = filter_size

        self.layer_1 = tf.keras.layers.Activation('relu')
        self.layer_2 = tf.keras.layers.SeparableConv2D(self.filter_size, 3, padding = 'same' ,
                                                       kernel_regularizer=tf.keras.regularizers.l2(0),
                                                       bias_regularizer = tf.keras.regularizers.l2(0))
        self.layer_3 = tf.keras.layers.BatchNormalization()

        self.layer_4 = tf.keras.layers.Activation('relu')
        self.layer_5 = tf.keras.layers.SeparableConv2D(self.filter_size, 3, padding = 'same',
                                                       kernel_regularizer=tf.keras.regularizers.l2(0),
                                                       bias_regularizer = tf.keras.regularizers.l2(0))
        self.layer_6 = tf.keras.layers.BatchNormalization()

        self.layer_7 = tf.keras.layers.MaxPooling2D(3, strides = 2, padding = 'same') #3,2
        self.residual_layer = tf.keras.layers.Conv2D(self.filter_size, 1, strides = 2, padding = 'same')

    def call(self, inputs):
        x = self.layer_1(inputs)
        x = self.layer_2(x)
        x = self.layer_3(x)
        x = self.layer_4(x)
        x = self.layer_5(x)
        x = self.layer_6(x)
        x = self.layer_7(x)
        residual = self.residual_layer(inputs)
        x = tf.keras.layers.add([x, residual])

        return x

####Decoder Block

In [0]:
'''
Decoder class - Upsampling
'''
class DecoderBlock(tf.keras.Model):
    def __init__(self, filter_size):
        super(DecoderBlock, self).__init__()

        self.filter_size = filter_size
    
        self.layer_1 = tf.keras.layers.Activation('relu')
        self.layer_2 = tf.keras.layers.Conv2DTranspose(self.filter_size, 3, padding = 'same')
        self.layer_3 = tf.keras.layers.BatchNormalization()

        self.layer_4 = tf.keras.layers.Activation('relu')
        self.layer_5 = tf.keras.layers.Conv2DTranspose(self.filter_size, 3, padding = 'same')
        self.layer_6 = tf.keras.layers.BatchNormalization()

        self.layer_7 = tf.keras.layers.UpSampling2D(2)

        self.residual_layer_1 = tf.keras.layers.UpSampling2D(2)
        self.residual_layer_2 = tf.keras.layers.Conv2D(filter_size, 1, padding = 'same')

    def call(self, inputs):
        x = self.layer_1(inputs)
        x = self.layer_2(x)
        x = self.layer_3(x)
        x = self.layer_4(x)
        x = self.layer_5(x)
        x = self.layer_6(x)
        x = self.layer_7(x)
        residual = self.residual_layer_1(inputs)
        residual = self.residual_layer_2(residual)

        x = tf.keras.layers.add([x, residual])
        return x


####Image Segmentation Model From Scratch
Concatenating the two parts: encoding and decoding

In [0]:
class ImageSegmentationModel(tf.keras.Model):
    def __init__(self, output_channels, dynamic = True):
        super(ImageSegmentationModel, self).__init__()
        self.output_channels = output_channels

        self.entry_block_1 = tf.keras.layers.Conv2D(32, 3, strides = 2, padding='same')
        self.entry_block_2 = tf.keras.layers.BatchNormalization()
        self.entry_block_3 = tf.keras.layers.Activation('relu')

        self.encoder_block_1 = EncoderBlock(64)
        self.encoder_block_2 = EncoderBlock(128)
        self.encoder_block_3 = EncoderBlock(256)
        self.decoder_block_1 = DecoderBlock(256)
        self.decoder_block_2 = DecoderBlock(128)
        self.decoder_block_3 = DecoderBlock(64)
        self.decoder_block_4 = DecoderBlock(32)

        self.output_layer = tf.keras.layers.Conv2D(output_channels, (3,3), activation='softmax', padding = 'same')

    def call(self, inputs):
        x = self.entry_block_1(inputs)
        x = self.entry_block_2(x)
        x = self.entry_block_3(x)
        x = self.encoder_block_1(x)
        x = self.encoder_block_2(x)
        x = self.encoder_block_3(x)
        x = self.decoder_block_1(x)
        x = self.decoder_block_2(x)
        x = self.decoder_block_3(x)
        x = self.decoder_block_4(x)
        x = self.output_layer(x)
        # tf.print(x.shape)
        return x

####Model Compilation

In [0]:
OUTPUT_CHANNELS_ = 21
model_seg_fs = ImageSegmentationModel(OUTPUT_CHANNELS_)
model_seg_fs.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4),
                     loss = tf.keras.losses.categorical_crossentropy, #loss_weighted, #
                     metrics = [  MyScore.iou_coef, MyScore.dice_score ]) #meanIoU,

model_seg_fs.build(input_shape = (BS, sq_size, sq_size, 3))
model_seg_fs.summary()

####Model Training

In [0]:
if train_seg_model_fs:
    model_seg_fs_history = model_seg_fs.fit(training_set_seg_tf_generator, 
                                            epochs=EPOCHS,
                                            steps_per_epoch= len(df_seg_train_to_generate)//BS,
                                            validation_steps= len(df_seg_val_to_generate)//BS, #VALIDATION_STEPS,
                                            validation_data=val_set_seg_tf_generator,
                                            callbacks=[my_callbacks] #DisplayCallback(model_t = model_seg_fs), 
                                            )
    model_seg_fs.save_weights(model_seg_fs_save_filename)
    histories['model_seg_fs'] = model_seg_fs_history.history
    pickle.dump(histories["model_seg_fs"], open(model_seg_fs_hist_save_filename, 'wb'))
else:
    model_seg_fs.load_weights(model_seg_fs_save_filename)
    histories['model_seg_fs'] = pickle.load(open(model_seg_fs_hist_save_filename, 'rb'))

model_seg_fs.summary()

In [0]:
try:
    fig, axes = plt.subplots(2,1,figsize=(8,8))
    axes[0].plot(histories["model_seg_fs"]['iou_coef'], label='IoU')
    axes[0].plot(histories["model_seg_fs"]['val_iou_coef'], label = 'val_IoU')
    axes[0].plot(histories["model_seg_fs"]['dice_score'], label='Dice')
    axes[0].plot(histories["model_seg_fs"]['val_dice_score'], label = 'val_Dice')
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Metric')
    axes[0].set_ylim([0.0, 1])
    axes[0].legend(loc='upper right')
    axes[1].plot(histories["model_seg_fs"]['loss'], label='loss')
    axes[1].plot(histories["model_seg_fs"]['val_loss'], label = 'val_loss')
    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('Categorical Cross-entropy')
    # axes[1].set_ylim([0.0, 1])
    axes[1].legend(loc='upper right')
    fig.suptitle('Model Sementation FS')
except:
    print("There was an error during the plot")
finally:
    scores = model_seg_fs.evaluate(val_set_seg_tf_generator, steps=2*len(df_seg_val_to_generate["stem_filename"])//BS )

###Results (Comparison with Transfer Learning)

Similarly we can show the results of from-scratch modeling.
Those results are pretty bad => The network hasn't learned useful things, much.

In [0]:
'''
Comparison between prediction from TL model, and from FS (From Scratch) model
'''
show_predictions(model = model_seg_tl, dataset=val_set_seg_tf_generator)
show_predictions(model = model_seg_fs, dataset=val_set_seg_tf_generator)

##Segmentation From Scratch - Cat & Dog

Tackling the full problem is hard, and there are not a lot of data available. In order to learn better, I decide to implement a simpler model in two steps:
1. From scratch, binary classifier between cats and dogs
    - Get more data from another dataset (available in tensorflow)
    - Build a simpler CNN classifier
        * 1 output (either cat or dog)
        * Loss = binary cross entropy (2 classes)
        * Metric = accuracy
        * Output activation Function: sigmoid
    - Verify performance on validation VOC set (cat & dogs only)
    - save the model
2. Reuse this model as the encoding part for segmentation
    - create a training and a validation dataframe for segmentation containing only aeroplane and horses
    - transform the mask so that they only contains background, cats and dogs
    - Build the full network, reusing the feature extraction (= encoding) part of previous network
    - train this as a segmentation problem, with 3 classes: background, cat, dog

In [0]:
'''
Variable updates
'''
sq_size = 224

#####Dataframes

In [0]:

'''
create label binarizer
'''
classes_names_ah = ["cat", "dog"]
multiLabelBinarizer_ah = preprocessing.MultiLabelBinarizer(classes=classes_names_ah)
multiLabelBinarizer_ah.fit(classes_names_ah)


'''
obtain the classification dataframes
'''
df_class_train_ah= get_dataframe_from_classes(df_class_train, ('cat','dog'))
df_class_val_ah  = get_dataframe_from_classes(df_class_val, ('cat','dog'))

df1 = df_class_val_ah.iloc[:125,:]
df_class_val_ah = df_class_val_ah.iloc[125:,:]

frames = [df_class_train_ah, df1]

df_class_train_ah = pd.concat(frames)

print(df_class_train_ah.head(5))
print(df_class_val_ah.head(5))

nb_elm_train_ah = len(df_class_train_ah["class"])
print("Number total of element in training set: " + str(nb_elm_train_ah))
print("Number of 'cat' in training set: " + str(len([ x for x in df_class_train_ah["class"] if x == ('cat',)  ])))
print("Number of 'dog' in training set: " + str(len([ x for x in df_class_train_ah["class"] if x == ('dog',)  ]) ))
print("--"*30)
print("Number of 'cat' in validation set: " + str(len([ x for x in df_class_val_ah["class"] if x == ('cat',)  ]) ))
print("Number of 'dog' in validation set: " + str(len([ x for x in df_class_val_ah["class"] if x == ('dog',)  ]) ))
print("Number total of element in validation set: " + str(len(df_class_val_ah["class"])))

In [0]:
'''
Because the classes are unbalanced, I need extra steps to 
- randomly select items from the original set
- with a weight inversely proportional to original class weight
The goal is to have a as flat as possible distribution

==> Wrong side: we throw away a lot
'''
equalize_distribution = False
if equalize_distribution:
    ah_training_labels = multiLabelBinarizer_ah.transform(df_class_train_ah["class"])
    print(ah_training_labels)
    ah_classes_weights = np.sum(ah_training_labels == 1, axis = 0) / ah_training_labels.shape[0]
    print("Weights of the different classes, in the Single Label context:" + str(ah_classes_weights))

    # inverted weights, indicating the weights for sampling
    ah_inverted_classes_weights = (1/ah_classes_weights)/ (np.sum(1/ah_classes_weights)+1e-16)
    print(ah_inverted_classes_weights)

    # add a column for this weight
    def _loc(df):
        df = df.assign(sampling_rate=0)
        for i in range(len(classes_names_ah)):
            df.loc[df['class']==(classes_names_ah[i],), 'sampling_rate'] = ah_inverted_classes_weights[i]
        return df
    ah_df_class_train_to_generate = _loc(df_class_train_ah)
    ah_df_class_val_to_generate = _loc(df_class_val_ah)


    print(ah_df_class_train_to_generate.head(10))

    #sample appropriately 1500 for training and for validation from the single_label inout dataframe
    ah_df_class_train_to_generate = ah_df_class_train_to_generate.sample(600, weights = ah_df_class_train_to_generate['sampling_rate'], random_state=1000)
    print("\nNew single-label dataframe for training (5 first elem):")
    print(ah_df_class_train_to_generate.head(5))


    ah_df_class_val_to_generate = ah_df_class_val_to_generate.sample(85, weights = ah_df_class_val_to_generate['sampling_rate'], random_state=0)
    print("\nNew single-label dataframe for validation (5 first elem):")
    print(ah_df_class_val_to_generate.head(5))
else:
    ah_df_class_train_to_generate = df_class_train_ah
    ah_df_class_val_to_generate = df_class_val_ah
'''
Check the distribution
'''
# count the number of items of each class
ah_counts_training = np.zeros((2,))
ah_counts_validation = np.zeros((2,))
for i in range(len(classes_names_ah)):
    ah_counts_training[i] = len(ah_df_class_train_to_generate[ah_df_class_train_to_generate['class'] ==  (classes_names_ah[i],)]) 
    ah_counts_validation[i] = len(ah_df_class_val_to_generate[ah_df_class_val_to_generate['class'] ==  (classes_names_ah[i],)]) 

# get the classes' ratio
ah_counts_training /= sum(ah_counts_training)
ah_counts_training*=100
ah_counts_validation /= sum(ah_counts_validation)
ah_counts_validation*=100

print(ah_counts_training)
print(ah_counts_validation)

#####Generators
Create generators for classification

In [0]:
'''
construct generators for classification training
'''
train_class_gen_ah   = get_classification_keras_generator(ah_df_class_train_to_generate, mode = 'training', BS = 32,  seed = 50)
val_class_gen_ah     = get_classification_keras_generator(ah_df_class_val_to_generate, mode = 'validation', BS = 32, seed = 500)

img, lbls = next(train_class_gen_ah)
plot_matrix(img, multiLabelBinarizer_ah.inverse_transform(lbls))

img, lbls = next(val_class_gen_ah)
plot_matrix(img, multiLabelBinarizer_ah.inverse_transform(lbls))


### Classifier (encoder) with new Data
After many trials, it appears this is not easy to get a good classifier conidering the limited number of pictures available
-> a solution is to try and get another dataset - hopefully from similar images

the next few snippets are inspired by [tensorflow tutorial](https://www.tensorflow.org/tutorials/images/classification)

In [0]:
_URL = 'https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip'
path_to_zip = tf.keras.utils.get_file('cats_and_dogs.zip', origin=_URL, extract=True)
PATH = os.path.join(os.path.dirname(path_to_zip), 'cats_and_dogs_filtered')

In [0]:
train_dir = os.path.join(PATH, 'train')
validation_dir = os.path.join(PATH, 'validation')

In [0]:
train_cats_dir = os.path.join(train_dir, 'cats')  # directory with our training cat pictures
train_dogs_dir = os.path.join(train_dir, 'dogs')  # directory with our training dog pictures
validation_cats_dir = os.path.join(validation_dir, 'cats')  # directory with our validation cat pictures
validation_dogs_dir = os.path.join(validation_dir, 'dogs')  # directory with our validation dog pictures

In [0]:
num_cats_tr = len(os.listdir(train_cats_dir))
num_dogs_tr = len(os.listdir(train_dogs_dir))

num_cats_val = len(os.listdir(validation_cats_dir))
num_dogs_val = len(os.listdir(validation_dogs_dir))

total_train = num_cats_tr + num_dogs_tr
total_val = num_cats_val + num_dogs_val
print('total training cat images:', num_cats_tr)
print('total training dog images:', num_dogs_tr)

print('total validation cat images:', num_cats_val)
print('total validation dog images:', num_dogs_val)
print("--")
print("Total training images:", total_train)
print("Total validation images:", total_val)

In [0]:
batch_size = BS

In [0]:
train_image_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255,
                                                                        width_shift_range=.1,
                                                                        height_shift_range=.1,
                                                                        horizontal_flip=True) # Generator for our training data
validation_image_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255) # Generator for our validation data

In [0]:
train_data_gen = train_image_generator.flow_from_directory(batch_size=batch_size,
                                                           directory=train_dir,
                                                           shuffle=True,
                                                           seed = 0,
                                                           target_size=(sq_size, sq_size),
                                                           class_mode='binary')
val_data_gen = validation_image_generator.flow_from_directory(batch_size=batch_size,
                                                              directory=validation_dir,
                                                              seed = 0,
                                                              target_size=(sq_size, sq_size),
                                                              class_mode='binary')

In [0]:
sample_training_images, labels_sampes= next(train_data_gen)
plot_matrix(sample_training_images, labels_sampes)

#####Build Classifier model

In [0]:
def define_model():
    model = tf.keras.models.Sequential()
    
    model.add(tf.keras.layers.SeparableConv2D( 32, (3, 3), kernel_initializer='he_uniform', padding='same', input_shape=(sq_size, sq_size, 3),name = 'conv0', ))
    model.add(tf.keras.layers.Activation('relu', name = 'relu0'))
    # model.add(tf.keras.layers.SeparableConv2D( 32, (3, 3), kernel_initializer='he_uniform', padding='same', input_shape=(sq_size, sq_size, 3),name = 'conv02', ))
    # model.add(tf.keras.layers.Activation('relu', name = 'relu02'))
    
    model.add(tf.keras.layers.MaxPooling2D(2,2)),
    
    model.add(tf.keras.layers.SeparableConv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', name = 'conv1', ))
    model.add(tf.keras.layers.Activation('relu', name = 'relu1'))
    # model.add(tf.keras.layers.SeparableConv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', name = 'conv12', ))
    # model.add(tf.keras.layers.Activation('relu', name = 'relu12'))
    model.add(tf.keras.layers.MaxPooling2D((2, 2)))
    
    model.add(tf.keras.layers.SeparableConv2D( 128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', name = 'conv2',))
    model.add(tf.keras.layers.Activation('relu', name = 'relu2'))
    # model.add(tf.keras.layers.SeparableConv2D( 128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', name = 'conv22',))
    # model.add(tf.keras.layers.Activation('relu', name = 'relu22'))
    model.add(tf.keras.layers.MaxPooling2D((2, 2)))
    
    model.add(tf.keras.layers.SeparableConv2D( 256, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', name = 'conv3',))
    model.add(tf.keras.layers.Activation('relu', name = 'relu3'))
    # model.add(tf.keras.layers.SeparableConv2D( 256, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', name = 'conv32',))
    # model.add(tf.keras.layers.Activation('relu', name = 'relu32'))
    model.add(tf.keras.layers.MaxPooling2D((2, 2)))
    
    # model.add(tf.keras.layers.Conv2D( 256, (1, 1), strides=1, activation='relu', kernel_initializer='he_uniform', padding='same', name = 'conv4',))
    # model.add(tf.keras.layers.Activation('relu', name = 'relu4'))

    # model.add(tf.keras.layers.SeparableConv2D( 384, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', name = 'conv4',))
    # model.add(tf.keras.layers.Activation('relu', name = 'relu4'))
    # model.add(tf.keras.layers.SeparableConv2D( 256, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', name = 'conv32',))
    # model.add(tf.keras.layers.Activation('relu', name = 'relu32'))
    # model.add(tf.keras.layers.MaxPooling2D((2, 2)))


    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(128, activation='relu', kernel_initializer='he_uniform'))
    # model.add(tf.keras.layers.Dropout(0.5))
    # model.add(tf.keras.layers.Dense(256, activation='relu', kernel_initializer='he_uniform'))
    # model.add(tf.keras.layers.Dropout(0.5))
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
    
    return model

##### Compilation

In [0]:
'''
Compilation
'''
loss_ah = tf.keras.losses.BinaryCrossentropy(from_logits = False)
# loss_ah = tf.keras.losses.CategoricalCrossentropy()
# model_ah = get_model_ah()
# model_ah = ImageClassificationModel(2)
model_ah = define_model()
model_ah.compile(   optimizer = tf.keras.optimizers.Adam(1e-4),
                    loss = loss_ah, #loss_weighted, #
                    metrics = [ 'accuracy' ]) 
model_ah.summary()



#####Training


In [0]:
'''
Model Fitting
'''
BS_AH = 32
if train_class_ah:
    history_model_ah = model_ah.fit(train_data_gen, #train_class_gen_ah, #
                                    epochs=EPOCHS,
                                    # shuffle = True,
                                    steps_per_epoch=  total_train // BS_AH, #len(df_class_train_ah)//BS_AH, # 
                                    validation_steps= total_val // BS_AH, # #VALIDATION_STEPS, len(df_class_val_ah)//BS_AH, # 
                                    validation_data=val_data_gen, #val_class_gen_ah, # 
                                     ) #callbacks=[my_callbacks]
    model_ah.save_weights(model_ah_save_filename)
    histories['model_ah'] = history_model_ah.history
    pickle.dump(histories["model_ah"], open(model_ah_hist_save_filename, 'wb'))
else:
    model_ah.load_weights(model_ah_save_filename)
    histories['model_ah'] = pickle.load(open(model_ah_hist_save_filename, 'rb'))


model_ah.trainable = False

In [0]:
try:
    fig, axes = plt.subplots(2,1,figsize=(8,8))
    axes[0].plot(histories['model_ah']['accuracy'], label='accuracy')
    axes[0].plot(histories['model_ah']['val_accuracy'], label = 'val_accuracy')
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Metric')
    axes[0].set_ylim([0.5, 1])
    axes[0].legend(loc='upper right')
    axes[1].plot(histories['model_ah']['loss'], label='loss')
    axes[1].plot(histories['model_ah']['val_loss'], label = 'val_loss')
    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('Binary Cross-entropy')
    axes[1].set_ylim([0.0, 1])
    axes[1].legend(loc='upper right')
    fig.suptitle('Model AH ')
except:
    print("There was an error during the plot")
finally:
    model_ah.summary()

pred = model_ah.predict(val_class_gen_ah)

####Check performance on VOC val set
This CAt&Dog was trained using another dataset. We can verify the performances on the validation set defined. 

In [0]:
'''
Get the validation data ready for prediction
'''
# retrieve the ids from the dataframe
ah_val_ids = list(ah_df_class_val_to_generate["filename"].apply(lambda x: x.split('.')[0]))

# load the images in RAM
ah_val_images = get_images(ah_val_ids, path_image_folder,width=sq_size, height=sq_size,)
if np.max(np.max(ah_val_images)) > 1:
    ah_val_images = np.divide(ah_val_images,255.0, dtype = np.float32)

# retrieve the labels from the dataframe
ah_val_labels_binarized = multiLabelBinarizer_ah.transform(ah_df_class_val_to_generate["class"])
ah_val_labels_str = list(ah_df_class_val_to_generate["class"])
y_true = ah_val_labels_binarized


'''
Predict
'''
y_pred_binary = model_ah.predict(ah_val_images)

# reshape the binary so that it gets one-hot encoded
y_pred = [[1,0] if x[0] < 0.5 else [0,1] for x in y_pred_binary]
y_pred = np.array(y_pred)

# y_pred_str = multiLabelBinarizer.inverse_transform(y_pred)

'''
Check on shapes
'''
print("y_true shape = " + str(y_true.shape))
print("y_pred shape = " + str(y_pred.shape))

'''
convert one hot encoding to categorical using argmax
'''
y_true_categorical = [ np.argmax(t) for t in y_true ]
y_pred_categorical = [ np.argmax(t) for t in y_pred ]
'''
Compute confusion matrix
'''
conf_mat_dict = sklearn.metrics.confusion_matrix(y_true = y_true_categorical, y_pred = y_pred_categorical)
conf_mat_dict_norm = sklearn.metrics.confusion_matrix( y_true=y_true_categorical, y_pred = y_pred_categorical, normalize = 'true')

print(conf_mat_dict_norm)

###Segmentation network

In [0]:
'''
Extract layers of the classifier
'''

# Use the activations of these layers
layer_names = [
    'relu0',   # 64x64
    'relu1',   # 32x32
    'relu2',   # 16x16
    'relu3', 
]
layers_list = [model_ah.get_layer(name).output for name in layer_names]


'''
create a model based on those activation, and make in Not Trainable
'''
# Create the feature extraction model
down_stack_ah = tf.keras.Model(inputs=model_ah.input, outputs=layers_list)

'''
copy weights
'''
for name_ in layer_names:
    down_stack_ah.get_layer(name = name_).set_weights(model_ah.get_layer(name = name_).get_weights())

'''
make it not trainable!
'''
down_stack_ah.trainable = False

print(len(down_stack_ah.layers))
print(len(model_ah.layers))

print(down_stack_ah.get_layer(name = 'conv2'))
print(model_ah.get_layer(name = 'conv2'))



In [0]:
'''
upstack
'''
up_stack_ah = [
            upsample(128, 3),  # 16x16 -> 32x32
            upsample(64, 3),   # 32x32 -> 64x64
            upsample(32, 3)
            ]

def my_unet_ah(output_channels = 3):
    inputs = tf.keras.layers.Input(shape=[sq_size, sq_size, 3])
    x = inputs
    # Downsampling through the model
    skips = down_stack_ah(x)
    x = skips[-1]

    # create a generator of the layers in skips ==> in downstack, in the reverse order, until last-but-one
    skips = reversed(skips[:-1])

    # Upsampling and establishing the skip connections
    for up, skip in zip(up_stack_ah, skips):
        x = up(x)
        concat = tf.keras.layers.Concatenate()
        x = concat([x, skip])

    # This is the last layer of the model
    last = tf.keras.layers.Conv2DTranspose(output_channels, 3, strides=1,  padding='same',   activation="softmax")  #64x64 -> 128x128
    x = last(x)
    model = tf.keras.Model(inputs=inputs, outputs=x)
    # model.trainable = False
    return model



In [0]:
model_seg_fs_ah = my_unet_ah(3)
model_seg_fs_ah.summary()
tf.keras.utils.plot_model(model_seg_fs_ah, show_shapes=True)

###Data for segmentation
Build the generators for training and validation sets.

Most of the code is a duplicate of previous cells, with tiny changes. This is bad code design.

Those functions below replicate exactly the creation of the generators usinf tf.Data API, but change the way the mask is encoded to make sure only background, cats and dogs are present. All the other colors go to black

In [0]:
'''
Those functions are only dedicated to the aerplane vs horse case: only 2 classes + background problem. 
The implementation is the exact same as above, instead of the one-hot encoding of the mask. 

It should be changed as it induces a lot of code redundancy (note the "_reduced" in the function names)
'''

def get_reduced_segmentation_generator(dataframe, cache = True, to_augment = True):
    '''
    In the exact same fashion as get_classification_generator, it returns a 
    tf generator of image and label for segmentation task. 
    This generator allows caching
    '''
    stem_filenames = dataframe["stem_filename"]
    tf_generator = tf.data.Dataset.from_tensor_slices(stem_filenames)
    tf_generator = tf_generator.map(parse_reduced_segmentation_function, num_parallel_calls = AUTOTUNE)


    if isinstance(cache, str):      
        tf_generator = tf_generator.cache(cache)
    else:
        tf_generator = tf_generator.cache()
    
    if to_augment:
        tf_generator = tf_generator.map(train_segmentation_preprocess, num_parallel_calls = AUTOTUNE)
    

    tf_generator = tf_generator.shuffle(len(stem_filenames), seed = 426473)
    tf_generator = tf_generator.repeat()
    tf_generator = tf_generator.batch(BATCH_SIZE)
    tf_generator = tf_generator.prefetch(buffer_size = AUTOTUNE)
    return tf_generator

def parse_reduced_segmentation_function(stem_filename):
    '''
    @param stem_filename: id of the image files to read
    '''
    filename_input_image = voc_root_folder + r'/JPEGImages/' + stem_filename + ".jpg"
    filename_input_mask = voc_root_folder + r'/SegmentationClass/' + stem_filename + ".png"

    input_image_string = tf.io.read_file(filename_input_image)
    input_image = tf.image.decode_jpeg(input_image_string, channels=3)

    input_mask_string = tf.io.read_file(filename_input_mask)
    input_mask = tf.image.decode_png(input_mask_string, channels=3)

    input_image = tf.image.resize(input_image, [sq_size, sq_size])
    input_mask = tf.image.resize(input_mask, [sq_size, sq_size])

    input_image, input_mask = segmentation_reduced_normalize(input_image, input_mask)

    return input_image, input_mask


def segmentation_reduced_normalize(input_image, input_mask):
    '''
    Normalize an input image by:
    - convert it to float32, within [0.0, 1.0]
    - encode the input_mask
    '''
    input_image = tf.cast(input_image, tf.float32) / 255.0
    input_mask = __two_encode(input_mask)
    return input_image, input_mask



def __two_encode(mask):
    '''
    Trial helper function that maps a full mask of 22 colors into a mask of 3 colors. 
    '''
    # colors = tf.map_fn(lambda x: map_colors(tf.cast(x, dtype=tf.float32)), mask)
    # one_hot = tf.one_hot(colors, depth=3)
    local_colormap = ([64,0,0], [64,0,128],) #
    local_color_reference = tf.cast(tf.constant(local_colormap), dtype=tf.float32)
    # Load the image and obtain tensor with one-hot values
    comp = tf.equal(mask[..., None, :], local_color_reference)
    one_hot_map = tf.cast(tf.reduce_all(comp, axis=-1), dtype=tf.float32)

    map_elements = tf.reduce_sum(one_hot_map, axis=-1)
    map_elements = tf.expand_dims(map_elements,2)
    one_hot_opposite_map = tf.math.subtract(tf.ones((sq_size,sq_size,1)), map_elements)

    output = tf.concat([one_hot_opposite_map, one_hot_map], axis = -1)

    return output

In [0]:
'''
get dataframe for segmentation
'''
df_seg_train_ah = get_dataframe_from_classes(df_seg_train, ('cat','dog',))
df_seg_val_ah = get_dataframe_from_classes(df_seg_val, ('cat','dog',))


print(df_seg_val.head(5))
print(df_seg_val_ah.head(5))


In [0]:
'''
get generators from dataframe
'''
train_seg_tf_gen_ah   = get_reduced_segmentation_generator(df_seg_train_ah, to_augment = True)
val_seg_tf_gen_ah     = get_reduced_segmentation_generator(df_seg_val_ah, to_augment = False)

for img, mask in train_seg_tf_gen_ah.take(2):
    tf.print(mask.shape)

plot_matrix(img, scale =2)
plot_matrix(mask, scale = 2)


###Segmentation Training

Now we are ready to train.

In [0]:
weights_after_normalization = np.array([0.6, 0.25, 0.15])
weights = weights_after_normalization.reshape((1,1,1,3))
kWeights = tf.keras.backend.constant(weights)

'''
Attempt of weighted categorical cross entropy
'''
def weighted_cce_ah(y_true, y_pred):
    yWeights = kWeights * y_pred         #shape (batch, 128, 128, 4)
    yWeights = tf.keras.backend.sum(yWeights, axis=-1)  #shape (batch, 128, 128)  

    yWeights = yWeights / tf.keras.backend.sum(yWeights)

    loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred) #shape (batch, 128, 128)
    wLoss = yWeights * loss
    return tf.keras.backend.sum(wLoss, axis=(1,2))



mean_iou_ah = tf.keras.metrics.MeanIoU(num_classes=3)



'''
Compilation
'''


model_seg_fs_ah.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=1e-5),
                        loss = tf.keras.losses.categorical_crossentropy, #loss_weighted, #weighted_cce_ah, #
                        metrics = [MyScore.iou_coef, MyScore.dice_score ]) #meanIoU,

if train_seg_fs_ah:
    history_model_seg_fs_ah = model_seg_fs_ah.fit(train_seg_tf_gen_ah, 
                                                epochs=EPOCHS,
                                                steps_per_epoch= len(df_seg_train_ah),
                                                validation_steps= len(df_seg_val_ah), #VALIDATION_STEPS,
                                                validation_data=val_seg_tf_gen_ah,
                                                callbacks= [my_callbacks], 
                                                )

    model_seg_fs_ah.save_weights(model_seg_fs_ah_save_filename)
    histories['model_seg_fs_ah'] = history_model_seg_fs_ah.history
    pickle.dump(histories["model_seg_fs_ah"], open(model_seg_fs_ah_hist_save_filename, 'wb'))
else:
    model_seg_fs_ah.load_weights(model_seg_fs_ah_save_filename)
    histories['model_seg_fs_ah'] = pickle.load(open(model_seg_fs_ah_hist_save_filename, 'rb'))



model_seg_fs_ah.summary()

In [0]:
try:
    fig, axes = plt.subplots(2,1,figsize=(8,8))
    axes[0].plot(histories["model_seg_fs_ah"]['iou_coef'], label='IoU')
    axes[0].plot(histories["model_seg_fs_ah"]['val_iou_coef'], label = 'val_IoU')
    axes[0].plot(histories["model_seg_fs_ah"]['dice_score'], label='Dice')
    axes[0].plot(histories["model_seg_fs_ah"]['val_dice_score'], label = 'val_Dice')
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Metric')
    axes[0].set_ylim([0.0, 1])
    axes[0].legend(loc='upper right')
    axes[1].plot(histories["model_seg_fs_ah"]['loss'], label='loss')
    axes[1].plot(histories["model_seg_fs_ah"]['val_loss'], label = 'val_loss')
    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('Categorical Cross-entropy')
    # axes[1].set_ylim([0.0, 1])
    axes[1].legend(loc='upper right')
    fig.suptitle('Model Sementation FS Cats and Dogs')
except:
    print("There was an error during the plot")
finally:
    scores = model_seg_fs_ah.evaluate(val_seg_tf_gen_ah, steps=2*len(df_seg_val_ah["stem_filename"])//BS )

###Predictions results

In [0]:
# show_predictions(model = model_seg_fs_ah, dataset=val_seg_tf_gen_ah)
# show_predictions(model = model_seg_fs_ah, dataset=val_seg_tf_gen_ah)
# show_predictions(model = model_seg_fs_ah, dataset=val_seg_tf_gen_ah)

for img_batch, mask_batch in val_seg_tf_gen_ah.take(10):
    for i in range(10):
        sample_image = img_batch[i]
        sample_mask = color_seg(np.array(tf.argmax(mask_batch[i], axis = -1)),np.array(colormap[0:21]))
        pred_mask = model_seg_fs_ah.predict(sample_image[tf.newaxis, ...])
        display([sample_image, sample_mask, create_mask(pred_mask)])

