# Semantic Segmentation

In this exercise we will implement a convolutional neural network for semantic segmentation.
The goal of semantic segmentation is to classify the image on the pixel level, for each pixel
we want to determine the class of the object which it belongs to.

In [5]:
import tensorflow as tf
import numpy as np

from data import Dataset

In [6]:
# hyperparameters
num_epochs = 50
batch_size = 10
num_classes = Dataset.num_classes

# learning_rate = 5e-4
# learning_rate = 1e-2
learning_rate = 1e-3
decay_power = 1.2

In [8]:
weight_decay = 1e-4
reg_func = tf.contrib.layers.l2_regularizer(weight_decay)

# need this placeholder for bach norm
is_training = tf.placeholder(tf.bool)
bn_params = {
  # Decay for the moving averages.
  'momentum': 0.9,
  # epsilon to prevent 0s in variance.
  'epsilon': 1e-5,
  # fused must be false if BN is frozen
  'fused': True,
  'training': is_training
}


def conv_22(x, num_maps, k=3, activation=tf.nn.relu):
  return tf.layers.conv2d(x, num_maps, k, activation=activation, padding='same')
def conv(x, num_maps, k=3):
  x = tf.layers.conv2d(x, num_maps, k, use_bias=False,
    kernel_regularizer=tf.contrib.layers.l2_regularizer(weight_decay), padding='same')
  x = tf.layers.batch_normalization(x, training=is_training)
  return tf.nn.relu(x)

def pool(x):
  # return tf.layers.average_pooling2d(x, 2, 2, 'same')
  return tf.layers.max_pooling2d(x, 2, 2, 'same')


def build_model1(x):
  # input_size = tf.shape(x)[height_dim:height_dim+2]
  input_size = x.get_shape().as_list()[1:3]
  print(input_size)
  x = conv(x, 32, 3)
  x = pool(x)
  x = conv(x, 64, 3)
  x = pool(x)
  x = conv(x, 128, 3)
  x = pool(x)
  x = conv(x, 128, 3)
  # x = pool(x)
  # x = conv(x, 64, 3)
  # logits = conv(x, num_classes, 3, activation=None)
  logits = tf.layers.conv2d(x, num_classes, 1, padding='same')
  
  logits = tf.image.resize_bilinear(logits, input_size, name='upsample_logits')
  return logits

def upsample(x, skip, num_maps):
  skip_size = skip.get_shape().as_list()[1:3]
  x = tf.image.resize_bilinear(x, skip_size)
  x = tf.concat([x, skip], 3)
  return conv(x, num_maps)


def build_model(x, num_classes):
  # input_size = tf.shape(x)[height_dim:height_dim+2]
  input_size = x.get_shape().as_list()[1:3]
  maps = [32, 64, 128, 256, 128]
  # maps = [64, 128, 256, 256]
  skip_layers = []
  x = conv(x, maps[0], k=5)
  # x = conv(x, maps[0])
  # skip_layers.append(x)
  x = pool(x)
  x = conv(x, maps[1])
  x = conv(x, maps[1])
  # skip_layers.append(x)
  x = pool(x)
  x = conv(x, maps[2])
  x = conv(x, maps[2])
  # skip_layers.append(x)
  x = pool(x)
  x = conv(x, maps[3])
  x = conv(x, maps[3])
  skip_layers.append(x)
  x = pool(x)
  x = conv(x, maps[4])
  x = conv(x, maps[4])
  # x = conv(x, maps[3])
  # skip_layers.append(x)  
  # x = pool(x)
  # x = conv(x, maps[4])

  # 36 without
  for i, skip in reversed(list(enumerate(skip_layers))):
    print(i, x, '\n', skip)
    x = upsample(x, skip, maps[i])

  # x = pool(x)
  # x = conv(x, 64, 3)
  # logits = conv(x, num_classes, 3, activation=None)
  logits = tf.layers.conv2d(x, num_classes, 1, padding='same')
  
  logits = tf.image.resize_bilinear(logits, input_size, name='upsample_logits')
  return logits, is_training

In [9]:
train_data = Dataset('train', batch_size)
val_data = Dataset('val', batch_size, shuffle=False)

height = train_data.height
width = train_data.width
channels = train_data.channels
# x = tf.placeholder(tf.float32, shape=(batch_size, height, width, channels))
# y = tf.placeholder(tf.int32, shape=(batch_size, height, width))
x = tf.placeholder(tf.float32, shape=(None, height, width, channels))
y = tf.placeholder(tf.int32, shape=(None, height, width))

logits, is_training = build_model(x, num_classes)
loss, logits_vec, y_vec = build_loss(logits, y)

0 Tensor("Relu_8:0", shape=(?, 10, 24, 128), dtype=float32) 
 Tensor("Relu_6:0", shape=(?, 20, 48, 256), dtype=float32)


NameError: name 'build_loss' is not defined