In [1]:
import tensorflow as tf
#tf.enable_eager_execution()
from os import listdir
from os.path import isfile, join
from PIL import Image
import numpy as np


%load_ext pycodestyle_magic

### Parameters

In [2]:
TRAIN_FEAT_DIR = './data/train/images'
TRAIN_LABELS_DIR = './data/train/masks'
VGG_PATH = './vgg16/saved_model'
NUM_CLASSES = 1
LRATE = 0.001

IMAGE_SHAPE = (101, 101, 3)
LABEL_SHAPE = (101, 101)
INPUT_SHAPE = (104, 104, 3)
OUTPUT_SHAPE = (104, 104)
# input different to the image dimensions due to a hack to make the layers symmetric when
# deconvoluting


### data get

In [3]:
#%%pycodestyle

class data_handling:
    
    def __init__(self, train_feat_path, train_label_path):
        self.train_feat_path = train_feat_path
        self.train_label_path = train_label_path
        self.train_feat_data = self.create_array(train_feat_path)
        self.train_label_data = self.create_array(
            train_label_path, mode='label')/65535.0
        self.epoch = 0
        self.epoch_limit = 1
    
    def gen_batch_function(self, dataset = 'train', bs = 64):
        if dataset == 'train':
            train_data = self.train_feat_data
            labels_data = self.train_label_data

        batch = 0
        while self.epoch < self.epoch_limit:
            if batch != len(train_data)//bs:
                yield train_data[batch*bs:(batch+1)*bs], labels_data[batch*bs:(batch+1)*bs]
                batch += 1
            else:
                yield train_data[batch*bs:(len(train_data))], labels_data[batch*bs:(len(data))]
                batch = 0
                self.epoch += 1  
    
    def create_array(self, path, mode = 'train'):
        files = [f for f in listdir(path) if isfile(join(path, f))]
        dims = np.array(Image.open(path + '/'+ files[0])).shape
        
        if mode=='train':
            data = np.zeros((len(files), *INPUT_SHAPE)).astype('int')
        elif mode=='label':
            data = np.zeros((len(files), *OUTPUT_SHAPE)).astype('int')
            
        for i in range(len(files)):
            if mode == 'train':
                data[i,
                     :IMAGE_SHAPE[0],
                     :IMAGE_SHAPE[1],
                     :] = np.array(Image.open(path + '/'+ files[i])) \
                - self.get_mean()
            elif mode == 'label':
                data[i,
                     :IMAGE_SHAPE[0],
                     :IMAGE_SHAPE[1]
                    ] = np.array(Image.open(path + '/'+ files[i]))                
        return data
    
    @staticmethod
    def get_mean():
        x = np.zeros((1,1,1,3))
        x[0,0,0,:]= np.array([120.346, 120.346, 120.346])
        return x
    
  

### run code

In [4]:
vgg_path = '/home/paperspace/kaggle/sem_seg/vgg16/saved_model'

def load_vgg(sess, vgg_path):
  
   # load the model and weights
    model = tf.saved_model.loader.load(sess, ['vgg16'], vgg_path)

   # Get Tensors to be returned from graph
    graph = tf.get_default_graph()
    image_input = graph.get_tensor_by_name('image_input:0')
   
    #keep_prob = graph.get_tensor_by_name('keep_prob:0')
    layer3 = graph.get_tensor_by_name('conv2_1:0')
    layer4 = graph.get_tensor_by_name('pool2:0')
    layer7 = graph.get_tensor_by_name('pool3:0')

    return image_input, layer3, layer4, layer7

In [5]:
def layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes = NUM_CLASSES):
   
    # Use a shorter variable name for simplicity
    layer3, layer4, layer7 = vgg_layer3_out, vgg_layer4_out, vgg_layer7_out

    # Apply 1x1 convolution in place of fully connected layer
    fcn8 = tf.layers.conv2d(layer7, filters=num_classes, kernel_size=1, name="fcn8")

    # Upsample fcn8 with size depth=(4096?) to match size of layer 4 so that we can add skip connection with 4th layer
    fcn9 = tf.layers.conv2d_transpose(fcn8, filters=layer4.get_shape().as_list()[-1],
    kernel_size=4, strides=(2, 2), padding='SAME', name="fcn9")

    # Add a skip connection between current final layer fcn8 and 4th layer
    fcn9_skip_connected = tf.add(fcn9, layer4, name="fcn9_plus_vgg_layer4")

    # Upsample again
    fcn10 = tf.layers.conv2d_transpose(fcn9_skip_connected, filters=layer3.get_shape().as_list()[-1],
    kernel_size=4, strides=(2, 2), padding='SAME', name="fcn10_conv2d")

    # Add skip connection
    fcn10_skip_connected = tf.add(fcn10, layer3, name="fcn10_plus_vgg_layer3")
    
    # Upsample again
    fcn11 = tf.layers.conv2d_transpose(fcn10_skip_connected, filters=NUM_CLASSES,
    kernel_size=4, strides=(2, 2), padding='SAME', name="fcn11")

    return fcn11

In [6]:
def optimize(nn_last_layer, correct_label, learning_rate = LRATE, num_classes = NUM_CLASSES):
  
  # Reshape 4D tensors to 2D, each row represents a pixel, each column a class
    logits = tf.reshape(nn_last_layer, (-1, num_classes), name="fcn_logits")
    correct_label_reshaped = tf.reshape(correct_label, (-1, num_classes))

    # Calculate distance from actual labels using cross entropy
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=correct_label_reshaped[:])
    # Take mean for total loss
    loss_op = tf.reduce_mean(cross_entropy, name="fcn_loss")

    # The model implements this operation to find the weights/parameters that would yield correct pixel labels
    train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss_op, name="fcn_train_op")

    return logits, train_op, loss_op

In [7]:
def train_nn(sess, epochs, batch_size, get_batches_fn, train_op,
             cross_entropy_loss, input_image,
             correct_label, keep_prob, learning_rate):

  #keep_prob_value = 0.5
  learning_rate_value = LRATE
  for epoch in range(epochs):
      # Create function to get batches
      total_loss = 0
      for X_batch, gt_batch in get_batches_fn(batch_size):

          loss, _ = sess.run([cross_entropy_loss, train_op],
          feed_dict={input_image: X_batch,
                     correct_label: gt_batch,
                     learning_rate:learning_rate_value})

          total_loss += loss;

      print("EPOCH {} ...".format(epoch + 1))
      print("Loss = {:.3f}".format(total_loss))
      print()

In [11]:
def run():
  
  d_proc = data_handling(TRAIN_FEAT_DIR, TRAIN_LABELS_DIR)

  # A function to get batches
  get_batches_fn = d_proc.gen_batch_function()
  
  with tf.Session() as session:
    
    correct_label = tf.placeholder(tf.float32, [None, *OUTPUT_SHAPE], name='correct_label')
    
    # Returns the three layers, keep probability and input layer from the vgg architecture
    image_input, layer3, layer4, layer7 = load_vgg(session, vgg_path)
    print(layer3.get_shape())
    # The resulting network architecture from adding a decoder on top of the given vgg model
    
    model_output = layers(layer3, layer4, layer7, num_classes = 1)

    # Returns the output logits, training operation and cost operation to be used
    # - logits: each row represents a pixel, each column a class
    # - train_op: function used to get the right parameters to the model to correctly label the pixels
    # - cross_entropy_loss: function outputting the cost which we are minimizing, lower cost should yield higher accuracy
    logits, train_op, cross_entropy_loss = optimize(model_output, correct_label)
    
    # Initialize all variables
    session.run(tf.global_variables_initializer())
    session.run(tf.local_variables_initializer())

    print("Model build successful, starting training")

    # Train the neural network
    train_nn(session, EPOCHS, BATCH_SIZE, get_batches_fn, 
             train_op, cross_entropy_loss, image_input,
             correct_label, keep_prob, learning_rate)

    # Run the model with the test images and save each painted output image (roads painted green)
    #helper.save_inference_samples(runs_dir, data_dir, session, image_shape, logits, image_input)
    
    print("All done!")

In [None]:
run()