<a href="https://colab.research.google.com/github/giacomogreggio/computer-vision-project/blob/master/VCS_Project_Image_Segmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow.keras.backend as back 
import cv2
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input 
import os
from tensorflow.keras.layers import Conv2D, Conv2DTranspose, MaxPooling2D, Input, MaxPool2D, ZeroPadding2D, Cropping2D, Softmax
from tensorflow.keras.regularizers import l2
from tensorflow.keras import Model
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.models import Sequential
import h5py
from IPython.display import display
from PIL import Image
from skimage.io import imshow
from tensorflow.keras.losses import SparseCategoricalCrossentropy, CategoricalCrossentropy
from sklearn.preprocessing import OneHotEncoder
from tqdm.notebook import trange, tqdm 
from time import sleep 

In [12]:

def get_color_map(N=256, normalized=False):
    def bitget(byteval, idx):
        return ((byteval & (1 << idx)) != 0)

    dtype = 'float32' if normalized else 'uint8'
    cmap = np.zeros((N, 3), dtype=dtype)
    for i in range(N):
        r = g = b = 0
        c = i
        for j in range(8):
            r = r | (bitget(c, 0) << 7-j)
            g = g | (bitget(c, 1) << 7-j)
            b = b | (bitget(c, 2) << 7-j)
            c = c >> 3

        cmap[i] = np.array([r, g, b])

    cmap = cmap/255 if normalized else cmap
    return cmap[0:N]


def color_map_viz():
    labels = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor', 'void']
    nclasses = 21
    row_size = 50
    col_size = 500
    cmap = get_color_map()
    array = np.empty((row_size*(nclasses+1), col_size, cmap.shape[1]), dtype=cmap.dtype)
    for i in range(nclasses):
        array[i*row_size:i*row_size+row_size, :] = cmap[i]
    array[nclasses*row_size:nclasses*row_size+row_size, :] = cmap[-1]
    imshow(array)
    plt.yticks([row_size*i+row_size/2 for i in range(nclasses+1)], labels)
    plt.xticks([])
    plt.show()


def get_color_dict(n_classes):
    cmap = get_color_map(N=n_classes).tolist()
    colored_map = cmap[0:n_classes]
    color_dict = {}
    for i in range(len(colored_map)):
        color_dict[tuple(colored_map[i])] = i
    return color_dict

In [13]:
def print_img(img):
    plt.figure(figsize=(14, 7))
    plt.subplot(1,1,1), plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)), plt.title('Lena'), plt.axis('off')

In [18]:
nclasses = 21
colored_map = get_color_map(N=nclasses)
color_dict = get_color_dict(nclasses)
dataset_path = "../TrainVal/VOCdevkit/VOC2011"

In [19]:
def color_to_one_hot(img, color_dict):
    semantic_map = []
    for colour in color_dict:
        class_map = tf.reduce_all(tf.equal(img, colour), axis=-1)
        semantic_map.append(class_map)
    semantic_map = tf.stack(semantic_map, axis=-1)
    # NOTE cast to tf.float32 because most neural networks operate in float32.
    semantic_map = tf.cast(semantic_map, tf.float32)
    return semantic_map
    
def one_hot_to_rgb(semantic_map, color_dict):
    color_dict = tf.constant(color_dict, dtype=tf.uint8)
    class_indexes = tf.argmax(semantic_map, axis=-1)
    # NOTE this operation flattens class_indexes
    class_indexes = tf.reshape(class_indexes, [-1])
    color_image = tf.gather(color_dict, class_indexes)
    semantic_map = np.array(semantic_map)
    color_image = tf.reshape(color_image, [semantic_map.shape[0], semantic_map.shape[1], 3])
    return color_image

def color_to_class(img, color_dict):
    s = img.shape
    res = np.zeros((s[0],s[1]))
    for row in range(len(img)):
        for pixel in range(len(img[row])):
            p = tuple(np.flip(img[row][pixel]))
            if p in color_dict:
                res[row][pixel] = color_dict[p]
            else:
                res[row][pixel]=0
    return res

In [20]:
#scommentare per riconvertire le immagini


'''
images = os.listdir(dataset_path + "/SegmentationClass")
for i in tqdm(range(len(images)), desc='Converting'):   
    
    name = images[i]
    img_loaded = cv2.imread('./TrainVal/VOCdevkit/VOC2011/SegmentationClass/' + name.split(".")[0] + '.png',1)
    ht, wd, cc= img_loaded.shape
    img_loaded = color_to_class(img_loaded, color_dict)
    color = (255,255,255)
    res = np.full( (500,500), 21, dtype=np.uint8)
    # compute center offset
    xx = (500 - wd) // 2
    yy = (500 - ht) // 2

    # copy img image into center of result image
    res[yy:yy+ht, xx:xx+wd] = img_loaded
    
    
    #img_converted = color_to_one_hot(res, colored_map).numpy()
    with open(dataset_path + "/SegmentationClassConvertedMin/" + name.split(".")[0] + '.npy', 'wb') as f:
        np.save(f, res)
    #semantic_map_dataset.append(rgb_to_onehot(targets[i],color_dict))
    
    sleep(0.01)
'''

'\nimages = os.listdir(dataset_path + "/SegmentationClass")\nfor i in tqdm(range(len(images)), desc=\'Converting\'):   \n    \n    name = images[i]\n    img_loaded = cv2.imread(\'./TrainVal/VOCdevkit/VOC2011/SegmentationClass/\' + name.split(".")[0] + \'.png\',1)\n    ht, wd, cc= img_loaded.shape\n    img_loaded = color_to_class(img_loaded, color_dict)\n    color = (255,255,255)\n    res = np.full( (500,500), 21, dtype=np.uint8)\n    # compute center offset\n    xx = (500 - wd) // 2\n    yy = (500 - ht) // 2\n\n    # copy img image into center of result image\n    res[yy:yy+ht, xx:xx+wd] = img_loaded\n    \n    \n    #img_converted = color_to_one_hot(res, colored_map).numpy()\n    with open(dataset_path + "/SegmentationClassConvertedMin/" + name.split(".")[0] + \'.npy\', \'wb\') as f:\n        np.save(f, res)\n    #semantic_map_dataset.append(rgb_to_onehot(targets[i],color_dict))\n    \n    sleep(0.01)\n'

In [21]:
dataset = []
targets = []
i = 0
for img in os.listdir(dataset_path + "/SegmentationClassConvertedMin"):
    img_x = cv2.imread(dataset_path + '/JPEGImagesMax/' + img.split(".")[0] + '.jpg',1)
    dataset.append(img_x)
    
    with open(dataset_path + '/SegmentationClassConvertedMin/' + img.split(".")[0] + ".npy", 'rb') as f:
        targets.append(np.load(f))
        
    if i == 50:
        break
    i += 1
    
    #targets.append(np.expand_dims(img_y, axis=0))
    

In [22]:
x_train = np.array(dataset[:40])
x_valid = np.array(dataset[40:])
y_train = np.array(targets[:40])
y_valid = np.array(targets[40:])

## VGG16

In [23]:
import sys
class Crop(tf.keras.layers.Layer):
   
    def call(self, inputs):
        x1, x2 = inputs
        x1_shape = tf.shape(x1)
        x2_shape = tf.shape(x2)
        # offsets for the top left corner of the crop
        offsets = [0, (x1_shape[1] - x2_shape[1]) // 2, (x1_shape[2] - x2_shape[2]) // 2, 0]
        size = [-1, x2_shape[1], x2_shape[2], -1]
        x1_crop = tf.slice(x1, offsets, size)
        return x1_crop
        

In [25]:
l2_value = 5 ** -4

#initial_model = VGG16(weights="imagenet", include_top=False, input_shape=(None,None, 3))

#initial_model.save_weights("./weights.h5")

input_layer = Input(shape=(None,None, 3))
model = ZeroPadding2D(100)(input_layer)
model = Conv2D(filters=64,kernel_size=(3,3),padding="valid", activation="relu")(model)
model = ZeroPadding2D(1)(model)
model = Conv2D(filters=64,kernel_size=(3,3),padding="valid", activation="relu")(model)
model = MaxPool2D(pool_size=(2,2),strides=(2,2))(model)
model = ZeroPadding2D(1)(model)
model = Conv2D(filters=128, kernel_size=(3,3), padding="valid", activation="relu")(model)
model = ZeroPadding2D(1)(model)
model = Conv2D(filters=128, kernel_size=(3,3), padding="valid", activation="relu")(model)
model = MaxPool2D(pool_size=(2,2),strides=(2,2))(model)
model = ZeroPadding2D(1)(model)
model = Conv2D(filters=256, kernel_size=(3,3), padding="valid", activation="relu")(model)
model = ZeroPadding2D(1)(model)
model = Conv2D(filters=256, kernel_size=(3,3), padding="valid", activation="relu")(model)
model = ZeroPadding2D(1)(model)
model = Conv2D(filters=256, kernel_size=(3,3), padding="valid", activation="relu")(model)
model = MaxPool2D(pool_size=(2,2),strides=(2,2))(model)
model = ZeroPadding2D(1)(model)
model = Conv2D(filters=512, kernel_size=(3,3), padding="valid", activation="relu")(model)
model = ZeroPadding2D(1)(model)
model = Conv2D(filters=512, kernel_size=(3,3), padding="valid", activation="relu")(model)
model = ZeroPadding2D(1)(model)
model = Conv2D(filters=512, kernel_size=(3,3), padding="valid", activation="relu")(model)
model = MaxPool2D(pool_size=(2,2),strides=(2,2))(model)
model = ZeroPadding2D(1)(model)
model = Conv2D(filters=512, kernel_size=(3,3), padding="valid", activation="relu")(model)
model = ZeroPadding2D(1)(model)
model = Conv2D(filters=512, kernel_size=(3,3), padding="valid", activation="relu")(model)
model = ZeroPadding2D(1)(model)
model = Conv2D(filters=512, kernel_size=(3,3), padding="valid", activation="relu")(model)
model = MaxPool2D(pool_size=(2,2),strides=(2,2))(model)

m = Model(input_layer, model)
m.load_weights("./weights.h5", by_name=True)
m.trainable = False

model = Conv2D(4096, kernel_size=(7,7), activation='relu', kernel_regularizer=l2(l2_value), padding="valid")(model)
model = Conv2D(4096, kernel_size=(1,1), activation='relu', kernel_regularizer=l2(l2_value), padding="valid")(model)
model = Conv2D(22, kernel_size=(1,1), activation='relu', kernel_regularizer=l2(l2_value), padding="valid")(model)
model = Conv2DTranspose(22, kernel_size=(64,64), strides=(32,32), activation='softmax', kernel_initializer=tf.keras.initializers.Zeros(), use_bias=False)(model)

c = Crop()([model, input_layer]) 

#model = tf.keras.models.model_from_json(model_json)
model = Model(input_layer, model)

model.summary()

Model: "functional_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, None, None, 3)]   0         
_________________________________________________________________
zero_padding2d_13 (ZeroPaddi (None, None, None, 3)     0         
_________________________________________________________________
conv2d_13 (Conv2D)           (None, None, None, 64)    1792      
_________________________________________________________________
zero_padding2d_14 (ZeroPaddi (None, None, None, 64)    0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, None, None, 64)    36928     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, None, None, 64)    0         
_________________________________________________________________
zero_padding2d_15 (ZeroPaddi (None, None, None, 64)   

In [26]:
model.compile(optimizer=SGD(learning_rate=0.0001, momentum=0.9), loss=SparseCategoricalCrossentropy(from_logits=True))

TRAIN_LENGTH = len(x_train)
BATCH_SIZE = 5
STEPS_PER_EPOCH = TRAIN_LENGTH // BATCH_SIZE
EPOCHS = 10
VAL_SUBSPLITS = 5
#VALIDATION_STEPS = info.splits['test'].num_examples//BATCH_SIZE//VAL_SUBSPLITS

model.fit(x_train, y_train, 
          epochs=EPOCHS, 
          batch_size=10, 
          validation_data=(x_valid, y_valid))

Epoch 1/10


ValueError: in user code:

    c:\users\user\appdata\local\programs\python\python38\lib\site-packages\tensorflow\python\keras\engine\training.py:806 train_function  *
        return step_function(self, iterator)
    c:\users\user\appdata\local\programs\python\python38\lib\site-packages\tensorflow\python\keras\engine\training.py:796 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    c:\users\user\appdata\local\programs\python\python38\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1211 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    c:\users\user\appdata\local\programs\python\python38\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2585 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    c:\users\user\appdata\local\programs\python\python38\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2945 _call_for_each_replica
        return fn(*args, **kwargs)
    c:\users\user\appdata\local\programs\python\python38\lib\site-packages\tensorflow\python\keras\engine\training.py:789 run_step  **
        outputs = model.train_step(data)
    c:\users\user\appdata\local\programs\python\python38\lib\site-packages\tensorflow\python\keras\engine\training.py:748 train_step
        loss = self.compiled_loss(
    c:\users\user\appdata\local\programs\python\python38\lib\site-packages\tensorflow\python\keras\engine\compile_utils.py:204 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    c:\users\user\appdata\local\programs\python\python38\lib\site-packages\tensorflow\python\keras\losses.py:149 __call__
        losses = ag_call(y_true, y_pred)
    c:\users\user\appdata\local\programs\python\python38\lib\site-packages\tensorflow\python\keras\losses.py:253 call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    c:\users\user\appdata\local\programs\python\python38\lib\site-packages\tensorflow\python\util\dispatch.py:201 wrapper
        return target(*args, **kwargs)
    c:\users\user\appdata\local\programs\python\python38\lib\site-packages\tensorflow\python\keras\losses.py:1566 sparse_categorical_crossentropy
        return K.sparse_categorical_crossentropy(
    c:\users\user\appdata\local\programs\python\python38\lib\site-packages\tensorflow\python\util\dispatch.py:201 wrapper
        return target(*args, **kwargs)
    c:\users\user\appdata\local\programs\python\python38\lib\site-packages\tensorflow\python\keras\backend.py:4782 sparse_categorical_crossentropy
        res = nn.sparse_softmax_cross_entropy_with_logits_v2(
    c:\users\user\appdata\local\programs\python\python38\lib\site-packages\tensorflow\python\util\dispatch.py:201 wrapper
        return target(*args, **kwargs)
    c:\users\user\appdata\local\programs\python\python38\lib\site-packages\tensorflow\python\ops\nn_ops.py:4175 sparse_softmax_cross_entropy_with_logits_v2
        return sparse_softmax_cross_entropy_with_logits(
    c:\users\user\appdata\local\programs\python\python38\lib\site-packages\tensorflow\python\util\dispatch.py:201 wrapper
        return target(*args, **kwargs)
    c:\users\user\appdata\local\programs\python\python38\lib\site-packages\tensorflow\python\ops\nn_ops.py:4088 sparse_softmax_cross_entropy_with_logits
        raise ValueError("Shape mismatch: The shape of labels (received %s) "

    ValueError: Shape mismatch: The shape of labels (received (10, 500, 500)) should equal the shape of logits except for the last dimension (received (10, 512, 512, 22)).
