In [1]:
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
import skimage.feature
import datetime
import os

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

tf.logging.set_verbosity(tf.logging.INFO)

%matplotlib inline

  from ._conv import register_converters as _register_converters


In [2]:
# blurred_img = cv2.GaussianBlur(labeled_img, (5,5), 0)

# training code

In [3]:
def get_input_img(name):
    labeled_img = cv.imread('D:/ML work/NOAA Sea Lion count/Data/Semantics Segmentation/' + name + '-processed.tif')
    original_img = cv.imread('D:/ML work/NOAA Sea Lion count/Data/Train/' + name + '.jpg')
    
    return original_img, labeled_img


In [4]:
# Reference: https://github.com/sjchoi86/Tensorflow-101/blob/master/notebooks/semseg_basic.ipynb
def unpooling(inputOrg, size, mask=None):
    # m, c, h, w order
    m = size[0]
    h = size[1]
    w = size[2]
    c = size[3]
    input = tf.transpose(inputOrg, [0, 3, 1, 2])
    x = tf.reshape(input, [-1, 1])
    k = np.float16(np.array([1.0, 1.0]).reshape([1,-1]))
    output = tf.matmul(x, k)
    output = tf.reshape(output,[-1, c, h, w * 2])
    # m, c, w, h
    xx = tf.transpose(output, [0, 1, 3, 2])
    xx = tf.reshape(xx,[-1, 1])
    output = tf.matmul(xx, k)
    # m, c, w, h
    output = tf.reshape(output, [-1, c, w * 2, h * 2])
    output = tf.transpose(output, [0, 3, 2, 1])
    outshape = tf.stack([m, h * 2, w * 2, c])
    if mask != None:
        dense_mask = tf.sparse_to_dense(mask, outshape, output, 0)
        return output, dense_mask
    else:
        return output

In [5]:
def conv_layer(inputs, filters, kernel_size, strides = 1, padding = "SAME", bias_constant = 0.0, name = "conv"):
    with tf.name_scope(name):
#         return tf.layers.conv2d(inputs = inputs,
#                                         filters = filters,
#                                         kernel_size = kernel_size,
#                                         padding = padding,
#                                         strides = strides,
#                                         use_bias = True,
#                                         bias_initializer = tf.constant_initializer(bias_constant),
#                                         activation = tf.nn.relu)
        input_shape = inputs.shape.as_list()
    
        filter_tensor = tf.truncated_normal([kernel_size[0], kernel_size[1], input_shape[3], filters], dtype = tf.float16)
        
        filter = tf.Variable(initial_value = filter_tensor, name = "kernel")
        bias = tf.Variable(tf.constant(bias_constant, shape=[filters]), name="bias")
        
        conv2d = tf.nn.conv2d(input = inputs, filter = filter, strides = [1, strides, strides, 1], padding = padding)
        
        conv2d_32 = tf.cast(conv2d, dtype = tf.float32)
        
        activation = tf.nn.relu(conv2d_32 + bias)
        
        activation_16 = tf.cast(activation, dtype=tf.float16)
        
        tf.summary.histogram("weights", filter)
        tf.summary.histogram("biases", bias)
        tf.summary.histogram("activations", activation)
        
        return activation_16
    
def max_pooling(input, kernel_size, strides, padding, name = "max_pool"):
    with tf.name_scope(name):
        return tf.nn.max_pooling(value = input, 
                                    ksize = [1, kernel_size[0], kernel_size[1], 1], 
                                    strides=[1, strides[0], strides[1], 1], 
                                    padding = padding)
        
#         w = tf.Variable(tf)

def deconv_layer(inputs, filters, kernel_size, output_size, strides = 1, padding = "SAME", bias_constant = 0.0, name = "deconv"):
    with tf.name_scope(name):
#         return tf.layers.conv2d_transpose(inputs = inputs,
#                                             filters = filters,
#                                             kernel_size = kernel_size,
#                                             strides = strides,
#                                             padding = padding,
#                                             use_bias = True,
#                                             bias_initializer = tf.constant_initializer(bias_constant),
#                                             activation = tf.nn.relu)

        input_shape = inputs.shape.as_list()
        deconv_shape = tf.stack([tf.shape(inputs)[0], output_size[0], output_size[1],filters])
    
        filter_tensor = tf.truncated_normal([kernel_size[0], kernel_size[1], filters, input_shape[3]], dtype = tf.float16)
        
        filter = tf.Variable(initial_value = filter_tensor, name = "kernel")
        bias = tf.Variable(tf.constant(bias_constant, shape=[filters]), name="bias")
        
        conv2d_transpose = tf.nn.conv2d_transpose(value = inputs, 
                                                  filter = filter, 
                                                  strides = [1, strides, strides, 1], 
                                                  output_shape=deconv_shape,
                                                  padding = padding)
        
        conv2d_transpose_32 = tf.cast(conv2d_transpose, dtype = tf.float32)
        
        activation = tf.nn.relu(conv2d_transpose_32 + bias)
        
        activation_16 = tf.cast(activation, dtype=tf.float16)
        
        tf.summary.histogram("weights", filter)
        tf.summary.histogram("biases", bias)
        tf.summary.histogram("activations", activation)
        
        return activation_16

In [6]:
def semantic_seg_model(features, labels, mode):
    
    """Model function for CNN."""
    
    # Encoding starts here.

    # Convolutional Layer 1
    # Input: 100 x 100
    conv1 = conv_layer(inputs=features,
                        filters=10,
                        kernel_size=[5, 5],
                        bias_constant = 0.1,
                        name = "conv1")
    
    print(conv1.shape)
    
    # Convolutional Layer 2
    # Input: 100 x 100
    conv2 = conv_layer(inputs = conv1,
                        filters = 25,
                        kernel_size = [5, 5],
                        bias_constant = 0.1,
                        name = "conv2")
    print(conv2.shape)
    # Convolutional Layer 3
    # Input: 100 x 100
    conv3 = conv_layer(inputs = conv2,
                        filters = 50,
                        kernel_size = [5, 5],
                        bias_constant = 0.1,
                        strides = 2,
                        name = "conv3")
    print(conv3.shape)
    # Pooling Layer 1
    # Input: 100 x 100
#     pool1 = tf.layers.max_pooling2d(inputs = conv3, pool_size=[2, 2], strides=2)
    
    # Convolutional Layer 4
    # Input: 50 x 50
    conv4 = conv_layer(inputs = conv3,
                        filters = 80,
                        kernel_size = [5, 5],
                        bias_constant = 0.1,
                        strides = 2,
                        name = "conv4")
    print(conv4.shape)
    # Decoding starts here.
    
    # Deconvolution Layer 4
    # Input: 50 x 50
    deconv4 = deconv_layer(inputs = conv4,
                            filters = 50,
                            kernel_size = [5, 5],
                            bias_constant = 0.1,
                            strides = 2,
                            output_size = [50, 50],
                            name = "deconv4")
    print(deconv4)
    # Unpool Layer 1
    # Input: 50 x 50
#     unpool1 = unpooling(deconv4, [tf.shape(features)[0], 50, 50, 50])
    
    # Deconvolution Layer 3
    # Input: 100 x 100
    deconv3 = deconv_layer(inputs = deconv4,
                            filters = 25,
                            kernel_size = [5, 5],
                            bias_constant = 0.1,
                            strides = 2,
                            output_size = [100, 100],
                            name = "deconv3")
    print(deconv3)
    # Deconvolution Layer 2
    # Input: 100 x 100
    deconv2 = deconv_layer(inputs = deconv3,
                            filters = 10,
                            kernel_size = [5, 5],
                            bias_constant = 0.1,
                            output_size = [100, 100],
                            name = "deconv2")
    print(deconv2)
    deconv1 = deconv_layer(inputs = deconv2,
                            filters = 3,
                            kernel_size = [5, 5],
                            output_size = [100, 100],
                            bias_constant = 0.1,
                            name = "deconv1")
    
    print(deconv1.shape)
    return deconv1


In [7]:
features, labels = get_input_img('0')

height = 3900
width = 6000
channel = 3

#image should be divided into patches as image size is very large
batch_length_vertical = 39
batch_length_horizontal = 60
mini_batch = 1343

#ksize_rows and ksize_cols will define the size of patches
ksize_rows = 100
ksize_cols = 100

#patches will be overlapped by 25 pixels
overlapping_region = 25

# Tensorflow placeholder
tf_features = tf.placeholder(tf.float16, [None, int(height/batch_length_vertical), int(width/batch_length_horizontal), channel], name = 'features')
tf_labels = tf.placeholder(tf.float16, [None, int(height/batch_length_vertical), int(width/batch_length_horizontal), channel], name = 'labels')

In [8]:
#first make all the images with constant size
features = np.pad(features, ((height - features.shape[0], 0), (width - features.shape[1], 0), (0,0)), mode = 'constant')
labels = np.pad(labels, ((height - labels.shape[0], 0), (width - labels.shape[1], 0), (0,0)), mode = 'constant')

In [9]:
#convert 3-dims to 4-dims
features = np.reshape(features, [-1, features.shape[0], features.shape[1], features.shape[2]])
labels = np.reshape(labels, [-1, labels.shape[0], labels.shape[1], labels.shape[2]])

In [10]:
#extract patches from image
features = tf.extract_image_patches(features, ksizes = [1, ksize_rows, ksize_cols, 1], strides = [1, overlapping_region, overlapping_region, 1], padding = "VALID", rates = [1, 1, 1, 1])
features = tf.reshape(features, [-1, ksize_rows, ksize_cols, channel])

#extract patches from ground truth image
labels = tf.extract_image_patches(labels, ksizes = [1, ksize_rows, ksize_cols, 1], strides = [1, overlapping_region, overlapping_region, 1], padding = "VALID", rates = [1, 1, 1, 1])
labels = tf.reshape(labels, [-1, ksize_rows, ksize_cols, channel])

In [11]:
#make mini-batch of image for training
features = tf.reshape(features, [-1, int(features.shape.as_list()[0]/mini_batch), ksize_rows, ksize_cols, channel])
labels = tf.reshape(labels, [-1, int(labels.shape.as_list()[0]/mini_batch), ksize_rows, ksize_cols,  channel])

In [12]:
output = semantic_seg_model(tf_features, tf_labels, tf.estimator.ModeKeys.TRAIN)

(?, 100, 100, 10)
(?, 100, 100, 25)
(?, 50, 50, 50)
(?, 25, 25, 80)
Tensor("deconv4/Cast_1:0", shape=(?, 50, 50, 50), dtype=float16)
Tensor("deconv3/Cast_1:0", shape=(?, 100, 100, 25), dtype=float16)
Tensor("deconv2/Cast_1:0", shape=(?, 100, 100, 10), dtype=float16)
(?, 100, 100, 3)


In [13]:
# conv1_w = tf.get_variable("conv1/conv2d/kernel")
# conv2_w = tf.get_variable("conv2/kernel")
# conv3_w = tf.get_variable("conv3/kernel")
# conv4_w = tf.get_variable("conv4/kernel")
# deconv4_w = tf.get_variable("deconv4/kernel")
# deconv3_w = tf.get_variable("deconv3/kernel")
# deconv2_w = tf.get_variable("deconv2/kernel")
# deconv1_w = tf.get_variable("deconv1/kernel")

# conv1_b = tf.get_variable("conv1/bias")
# conv2_b = tf.get_variable("conv2/bias")
# conv3_b = tf.get_variable("conv3/bias")
# conv4_b = tf.get_variable("conv4/bias")
# deconv4_b = tf.get_variable("deconv4/bias")
# deconv3_b = tf.get_variable("deconv3/bias")
# deconv2_b = tf.get_variable("deconv2/bias")
# deconv1_b = tf.get_variable("deconv1/bias")

# conv1_act = tf.get_variable("conv1/activation")
# conv2_act = tf.get_variable("conv2/activation")
# conv3_act = tf.get_variable("conv3/activation")
# conv4_act = tf.get_variable("conv4/activation")
# deconv4_act = tf.get_variable("deconv4/activation")
# deconv3_act = tf.get_variable("deconv3/activation")
# deconv2_act = tf.get_variable("deconv2/activation")
# deconv1_act = tf.get_variable("deconv1/activation")

In [14]:
# add_summeries(tensors, name):
#     for tensor in tensors:
#         tf.Summary.histogram(name, tensor)

In [15]:
# add_summeries([conv1_w, conv2_w, conv3_w, conv4_w, deconv4_w, deconv3_w, deconv2_w, deconv1_w], name = "weights")
# add_summeries([conv1_b, conv2_b, conv3_b, conv4_b, deconv4_b, deconv3_b, deconv2_b, deconv1_b], name = "biases")
# add_summeries([conv1_act, conv2_act, conv3_act, conv4_act, deconv4_act, deconv3_act, deconv2_act, deconv1_act], name = "activations")

In [16]:
tf_features.shape, tf_labels.shape

(TensorShape([Dimension(None), Dimension(100), Dimension(100), Dimension(3)]),
 TensorShape([Dimension(None), Dimension(100), Dimension(100), Dimension(3)]))

In [17]:
features.shape, labels.shape

(TensorShape([Dimension(1343), Dimension(27), Dimension(100), Dimension(100), Dimension(3)]),
 TensorShape([Dimension(1343), Dimension(27), Dimension(100), Dimension(100), Dimension(3)]))

In [18]:
output.shape

TensorShape([Dimension(None), Dimension(100), Dimension(100), Dimension(3)])

In [19]:
epochs = 50
learning_rate=0.001

# with tf.device("/gpu:0"):

#cost
with tf.name_scope("cross_entropy"):
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits = output, labels = tf_labels)
    cost = tf.reduce_mean( cross_entropy )
#     return cost, optimizer, accr
    tf.summary.scalar("xent", cost)

#optimizer
with tf.name_scope("optimizer"):
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)

# Accuracy
with tf.name_scope("accuracy"):
    correct_prediction = tf.equal(tf.argmax(tf_labels,1), tf.argmax(output, 1))
    accr = tf.reduce_mean(tf.cast(correct_prediction, tf.float16))
    tf.summary.scalar("accuracy", accr)
    
summ = tf.summary.merge_all()

config = tf.ConfigProto()
config.log_device_placement = True
config.gpu_options.allow_growth = True

sess = tf.Session(config=config)

sess.run(tf.global_variables_initializer())

log_file = "semantic_seg_model_1"

writer = tf.summary.FileWriter("./" + log_file)
writer.add_graph(sess.graph)

print("Start Tensorboard by executing this command in cmd: tensorboard --logdir " + os.getcwd() + "\\" + log_file)

Start Tensorboard by executing this command in cmd: tensorboard --logdir C:\Users\Fawad Khalil\AnacondaProjects\sealion-count-kaggle-challenge\semantic_seg_model_1


In [20]:
tf.trainable_variables()

[<tf.Variable 'conv1/kernel:0' shape=(5, 5, 3, 10) dtype=float16_ref>,
 <tf.Variable 'conv1/bias:0' shape=(10,) dtype=float32_ref>,
 <tf.Variable 'conv2/kernel:0' shape=(5, 5, 10, 25) dtype=float16_ref>,
 <tf.Variable 'conv2/bias:0' shape=(25,) dtype=float32_ref>,
 <tf.Variable 'conv3/kernel:0' shape=(5, 5, 25, 50) dtype=float16_ref>,
 <tf.Variable 'conv3/bias:0' shape=(50,) dtype=float32_ref>,
 <tf.Variable 'conv4/kernel:0' shape=(5, 5, 50, 80) dtype=float16_ref>,
 <tf.Variable 'conv4/bias:0' shape=(80,) dtype=float32_ref>,
 <tf.Variable 'deconv4/kernel:0' shape=(5, 5, 50, 80) dtype=float16_ref>,
 <tf.Variable 'deconv4/bias:0' shape=(50,) dtype=float32_ref>,
 <tf.Variable 'deconv3/kernel:0' shape=(5, 5, 25, 50) dtype=float16_ref>,
 <tf.Variable 'deconv3/bias:0' shape=(25,) dtype=float32_ref>,
 <tf.Variable 'deconv2/kernel:0' shape=(5, 5, 10, 25) dtype=float16_ref>,
 <tf.Variable 'deconv2/bias:0' shape=(10,) dtype=float32_ref>,
 <tf.Variable 'deconv1/kernel:0' shape=(5, 5, 3, 10) dtype

In [None]:
print("Start time: " + str(datetime.datetime.now()))

start_time = datetime.datetime.now()

for epoch in range(epochs):
    epoch_loss = 0
    train_loss = []; train_accuracy = []

    for i in range(features.shape.as_list()[0]):
#         for i in range(1):

        batch_features, batch_labels = features[i], labels[i]

        batch_features = np.reshape(batch_features.eval(session=sess), [-1, ksize_rows, ksize_cols, channel])
        batch_labels = np.reshape(batch_labels.eval(session=sess), [-1, ksize_rows, ksize_cols, channel])

        feed_dict = {tf_features: batch_features, tf_labels: batch_labels}

        _, sess_cost, sess_accuracy = sess.run([optimizer, cost, accr], feed_dict = feed_dict)

        train_loss.append(sess_cost)
        train_accuracy.append(sess_accuracy)

        # Average loss and accuracy
    train_loss = np.mean(train_loss)
    train_accuracy = np.mean(train_accuracy)

    print ("[%02d/%02d] trainLoss: %.4f trainAcc: %.2f" 
           % (epoch + 1, epochs, train_loss, train_accuracy))

sess.close()

end_time = datetime.datetime.now()

print("Start Time: " + str(start_time))
print("End Time: " + str(end_time))
print("Time Taken: " + end_time - start_time)

Start time: 2017-12-29 15:46:42.942306
