In [1]:
import tensorflow as tf
#tf.enable_eager_execution()
from os import listdir
from os.path import isfile, join
from PIL import Image
import numpy as np


%load_ext pycodestyle_magic

### Parameters

In [2]:
TRAIN_FEAT_DIR = './data/train/images'
TRAIN_LABELS_DIR = './data/train/masks'
VGG_PATH = '/home/paperspace/kaggle/Semantic_Segmentation/vgg16/saved_model'
NUM_CLASSES = 1
LRATE = 0.001
EPOCHS = 10
BATCH_SIZE = 64

IMAGE_SHAPE = (101, 101, 3)
LABEL_SHAPE = (101, 101)
INPUT_SHAPE = (104, 104, 3)
OUTPUT_SHAPE = (104, 104)
# input different to the image dimensions due to a hack to make the layers symmetric when
# deconvoluting


### data get

In [3]:
#%%pycodestyle

class data_handling:
    
    def __init__(self, train_feat_path, train_label_path):
        self.train_feat_path = train_feat_path
        self.train_label_path = train_label_path
        
        self.train_feat_data, \
        self.val_feat_data, \
        self.test_feat_data = \
        self.split_data(self.create_array(train_feat_path))
        self.train_label_data, \
        self.val_label_data, \
        self.test_label_data =\
        self.split_data(
            self.create_array(train_label_path, mode='label')/65535.0)
    
    def gen_batch_function(self, dataset='train',
                           bs=BATCH_SIZE, num_batches=None):
        
        if dataset == 'train':
            feat = self.train_feat_data
            labels = self.train_label_data
            
        elif dataset == 'test':
            feat = self.test_feat_data
            labels = self.test_label_data
            
        if num_batches is None:
            stop_iter = len(feat)//bs + 1
        else:
            stop_iter = num_batches
        
        batch = 0
        
        for i in range(stop_iter):
            if batch != len(feat)//bs:
                
                st = batch*bs; end = (batch+1)*bs;
                
                yield (feat[st:end,:].astype('float32')\
                - self.get_mean()), \
                labels[st:end,:].astype('float32')
                
                batch += 1
            else:
                yield feat[batch*bs:(len(feat)),:].astype('float32')\
                 - self.get_mean(), \
                labels[batch*bs:(len(feat)),:].astype('float32')

    def create_array(self, path, mode = 'train'):
        files = [f for f in listdir(path) if isfile(join(path, f))]
        
        if mode=='train':
            data = np.zeros((len(files), *INPUT_SHAPE)).astype('int')
        elif mode=='label':
            data = np.zeros((len(files), *OUTPUT_SHAPE)).astype('int')
            
        for i in range(len(files)):
            if mode == 'train':
                data[i,
                     :IMAGE_SHAPE[0],
                     :IMAGE_SHAPE[1],
                     :] = np.array(Image.open(path + '/'+ files[i])) 
            elif mode == 'label':
                data[i,
                     :IMAGE_SHAPE[0],
                     :IMAGE_SHAPE[1]
                    ] = np.array(Image.open(path + '/'+ files[i]))                
        return data
    
    @staticmethod
    def get_mean():
        x = np.zeros((1,1,1,3))
        x[0,0,0,:]= np.array([120.346, 120.346, 120.346])
        return x
    
    @staticmethod
    def get_std():
        return 27.60
    
    @staticmethod
    def shuffle(data):
        ind = np.random.choice(len(data),len(data), replace=False)
        return data[ind]
    
    @staticmethod
    def split_data(data, val_split = 0.05, split = 0.8):
        train_end = int(len(data)*(split - val_split))
        val_end = int(len(data)*split)
        train_feat = data[:train_end]
        val_data = data[train_end:val_end]
        test_data = data[val_end:]
        return train_feat, val_data, test_data
    

In [4]:
# dt = data_handling(TRAIN_FEAT_DIR, TRAIN_LABELS_DIR)

# def s_data(data, val_split = 0.05, split = 0.8):
#     train_end = len(data)*int(split - val_split)
#     val_end = len(data)*int(split)
#     train_feat = data[:train_end]
#     val_data = data[train_end:val_end]
#     test_data = data[val_end:]
#     return train_feat, val_data, test_data
    

# s_data(dt.create_array(TRAIN_FEAT_DIR))
    


### run code

In [5]:
def load_vgg(sess, vgg_path = VGG_PATH):
  
   # load the model and weights
    model = tf.saved_model.loader.load(sess, ['vgg16'], vgg_path)

   # Get Tensors to be returned from graph
    graph = tf.get_default_graph()
    image_input = graph.get_tensor_by_name('image_input:0')
   
    #keep_prob = graph.get_tensor_by_name('keep_prob:0')
    layer3 = graph.get_tensor_by_name('conv2_1:0')
    layer4 = graph.get_tensor_by_name('pool2:0')
    layer7 = graph.get_tensor_by_name('pool3:0')

    return image_input, layer3, layer4, layer7

In [6]:
# sess = tf.Session()
# img, l3, _, _ = load_vgg(sess)
# print(img.get_shape(), l3.get_shape())

In [7]:
def layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes = NUM_CLASSES):
   
    # Use a shorter variable name for simplicity
    layer3, layer4, layer7 = vgg_layer3_out, vgg_layer4_out, vgg_layer7_out

    # Apply 1x1 convolution in place of fully connected layer
    fcn8 = tf.layers.conv2d(layer7, filters=num_classes, kernel_size=1, name="fcn8")

    # Upsample fcn8 with size depth=(4096?) to match size of layer 4 so that we can add skip connection with 4th layer
    fcn9 = tf.layers.conv2d_transpose(fcn8, filters=layer4.get_shape().as_list()[-1],
    kernel_size=4, strides=(2, 2), padding='SAME', name="fcn9")

    # Add a skip connection between current final layer fcn8 and 4th layer
    fcn9_skip_connected = tf.add(fcn9, layer4, name="fcn9_plus_vgg_layer4")

    # Upsample again
    fcn10 = tf.layers.conv2d_transpose(fcn9_skip_connected, filters=layer3.get_shape().as_list()[-1],
    kernel_size=4, strides=(2, 2), padding='SAME', name="fcn10_conv2d")

    # Add skip connection
    fcn10_skip_connected = tf.add(fcn10, layer3, name="fcn10_plus_vgg_layer3")
    
    # Upsample again
    fcn11 = tf.layers.conv2d_transpose(fcn10_skip_connected, filters=NUM_CLASSES,
    kernel_size=4, strides=(2, 2), padding='SAME', name="fcn11")

    return fcn11

### test run

In [8]:
# num_classes=NUM_CLASSES

# d_proc = data_handling(TRAIN_FEAT_DIR,labels_DIR)

# # A function to get batches
# get_batches_fn = d_proc.gen_batch_function

# with tf.Session() as session:
    
#     correct_label = tf.placeholder(tf.float32, [None, *OUTPUT_SHAPE], name='correct_label')

#     image_input, layer3, layer4, layer7 = load_vgg(session)

#     model_output = layers(layer3, layer4, layer7, num_classes = 1)

#     logits = tf.reshape(model_output, (-1, num_classes), name="fcn_logits")
    
#     correct_label_reshaped = tf.reshape(correct_label, (-1, num_classes))

#     cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=correct_label_reshaped[:])

#     loss_op = tf.reduce_mean(cross_entropy, name="fcn_loss")

#     # The model implements this operation to find the weights/parameters that would yield correct pixel labels
#     train_op = tf.train.AdamOptimizer(learning_rate=LRATE).minimize(loss_op, name="fcn_train_op")


#     # Initialize all variables
#     session.run(tf.global_variables_initializer())
#     session.run(tf.local_variables_initializer())

#     print("Model build successful, starting training")
    
#     # Train the neural network
#     for X_batch, gt_batch in get_batches_fn(num_batches=1):
#         ce, loss, _, pre_logit, true_logit = \
#         session.run([cross_entropy, loss_op, train_op, logits, correct_label_reshaped],
#         feed_dict={image_input: X_batch,
#                  correct_label: gt_batch})
    
    
#     # Run the model with the test images and save each painted output image (roads painted green)
#     #helper.save_inference_samples(runs_dir, data_dir, session, image_shape, logits, image_input)

#     print(ce, loss, pre_logit, true_logit)

### full run

In [9]:
def optimize(nn_last_layer, correct_label, learning_rate = LRATE, num_classes = NUM_CLASSES):
  
  # Reshape 4D tensors to 2D, each row represents a pixel, each column a class
    logits = tf.reshape(nn_last_layer, (-1, num_classes), name="fcn_logits")
    correct_label_reshaped = tf.reshape(correct_label, (-1, num_classes))

    # Calculate distance from actual labels using cross entropy
    cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=correct_label_reshaped[:])
    # Take mean for total loss
    loss_op = tf.reduce_mean(cross_entropy, name="fcn_loss")

    # The model implements this operation to find the weights/parameters that would yield correct pixel labels
    train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss_op, name="fcn_train_op")

    return logits, train_op, loss_op

In [10]:
def train_nn(sess, epochs, batch_size, data_handler, train_op,
             cross_entropy_loss, input_image,
             correct_label):
    
    output_path = "./Train"
    train_summary_writer = tf.summary.FileWriter(output_path)
    
    train_summary=tf.Summary()
    val_summary=tf.Summary()
    
    # Initialize all variables
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    
    #keep_prob_value = 0.5
    #learning_rate_value = LRATE
    total_train_loss = 0
    total_val_loss = 0
    step = 0 
    for epoch in range(epochs):
      # Create function to get batches
        total_train_loss = 0
        total_val__loss = 0
        for X_batch, gt_batch in data_handler.gen_batch_function(bs = batch_size):
            step += 1
            loss, _ = sess.run([cross_entropy_loss, train_op],
            feed_dict={input_image: X_batch,
                     correct_label: gt_batch})
            val_loss = sess.run([cross_entropy_loss],
            feed_dict={input_image: data_handler.val_feat_data,
                     correct_label: data_handler.val_label_data})
            #print(loss, np.shape(loss))
            #print(val_loss, np.shape(val_loss))
            train_summary.value.add(tag='train_loss', simple_value = loss)
            val_summary.value.add(tag='val_loss', simple_value = val_loss[0])
            train_summary_writer.add_summary(train_summary, step)
            train_summary_writer.add_summary(val_summary, step)
            
            # train_summary_writer.flush()
            total_train_loss += loss;
            total_val_loss += val_loss[0]
        print("EPOCH {} ...".format(epoch + 1))
        print("Loss = {:.3f};  Val_loss = {:.3f}".format(total_train_loss, total_val_loss))
        print()

In [11]:
def run():
  
  d_proc = data_handling(TRAIN_FEAT_DIR, TRAIN_LABELS_DIR)
  
  with tf.Session() as session:
    
    correct_label = tf.placeholder(tf.float32, [None, *OUTPUT_SHAPE], name='correct_label')
    
    # Returns the three layers, keep probability and input layer from the vgg architecture
    image_input, layer3, layer4, layer7 = load_vgg(session)
    #print(layer3.get_shape())
    # The resulting network architecture from adding a decoder on top of the given vgg model
    
    model_output = layers(layer3, layer4, layer7, num_classes = 1)

    # Returns the output logits, training operation and cost operation to be used
    # - logits: each row represents a pixel, each column a class
    # - train_op: function used to get the right parameters to the model to correctly label the pixels
    # - cross_entropy_loss: function outputting the cost which we are minimizing, lower cost should yield higher accuracy
    logits, train_op, cross_entropy_loss = optimize(model_output, correct_label)
    
#     # Initialize all variables
#     session.run(tf.global_variables_initializer())
#     session.run(tf.local_variables_initializer())

    print("Model build successful, starting training")

    # Train the neural network
    train_nn(session, EPOCHS, BATCH_SIZE, d_proc, 
             train_op, cross_entropy_loss, image_input,
             correct_label)
    
    # Run the model with the test images and save each painted output image (roads painted green)
    #helper.save_inference_samples(runs_dir, data_dir, session, image_shape, logits, image_input)
    
    print("All done!")

In [12]:
run()

INFO:tensorflow:Restoring parameters from /home/paperspace/kaggle/Semantic_Segmentation/vgg16/saved_model/variables/variables
(?, 52, 52, 128)
Model build successful, starting training
19.722923 ()
[584.34894] (1,)
223.5613 ()
[32.249283] (1,)
12.17002 ()
[43.081917] (1,)
15.265841 ()
[36.001305] (1,)
17.007084 ()
[27.30029] (1,)
7.832636 ()
[34.286205] (1,)
7.7442474 ()
[32.581745] (1,)
7.5803742 ()
[24.622812] (1,)
5.9486237 ()
[15.942922] (1,)
3.6412022 ()
[14.443981] (1,)
4.1749034 ()
[13.860594] (1,)
4.7567477 ()
[10.1611185] (1,)
3.1819313 ()
[7.143816] (1,)
1.9935011 ()
[6.023562] (1,)
2.0258787 ()
[5.2743] (1,)
1.4795595 ()
[4.189487] (1,)
1.7474203 ()
[4.07277] (1,)
1.1782188 ()
[6.545718] (1,)
1.3525051 ()
[8.2095175] (1,)
1.4126292 ()
[7.6780887] (1,)
1.243097 ()
[5.7137027] (1,)
1.1100152 ()
[3.7101665] (1,)
1.1886724 ()
[2.633136] (1,)
0.94410735 ()
[2.1665473] (1,)
1.005461 ()
[1.9762927] (1,)
0.8145219 ()
[1.9907472] (1,)
0.8301694 ()
[2.245758] (1,)
0.7524047 ()
[2.6541

KeyboardInterrupt: 