In [1]:
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
import skimage.feature

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf


tf.logging.set_verbosity(tf.logging.INFO)

%matplotlib inline

In [2]:
# blurred_img = cv2.GaussianBlur(labeled_img, (5,5), 0)

# training code

In [36]:
def get_input_img(name):
    labeled_img = cv.imread('D:/ML work/NOAA Sea Lion count/Data/Semantics Segmentation/' + name + '-processed.tif')
    original_img = cv.imread('D:/ML work/NOAA Sea Lion count/Data/Train/' + name + '.jpg')
    
    return original_img, labeled_img


In [51]:
# Reference: https://github.com/sjchoi86/Tensorflow-101/blob/master/notebooks/semseg_basic.ipynb
def unpooling(inputOrg, size, mask=None):
    # m, c, h, w order
    m = size[0]
    h = size[1]
    w = size[2]
    c = size[3]
    input = tf.transpose(inputOrg, [0, 3, 1, 2])
    x = tf.reshape(input, [-1, 1])
    k = np.float16(np.array([1.0, 1.0]).reshape([1,-1]))
    output = tf.matmul(x, k)
    output = tf.reshape(output,[-1, c, h, w * 2])
    # m, c, w, h
    xx = tf.transpose(output, [0, 1, 3, 2])
    xx = tf.reshape(xx,[-1, 1])
    output = tf.matmul(xx, k)
    # m, c, w, h
    output = tf.reshape(output, [-1, c, w * 2, h * 2])
    output = tf.transpose(output, [0, 3, 2, 1])
    outshape = tf.stack([m, h * 2, w * 2, c])
    if mask != None:
        dense_mask = tf.sparse_to_dense(mask, outshape, output, 0)
        return output, dense_mask
    else:
        return output

In [61]:
def semantic_seg_model(features, labels, mode):
    
    """Model function for CNN."""
    
    # Encoding starts here.

    # Convolutional Layer 1
    # Input: 100 x 100
    conv1 = tf.layers.conv2d(
      inputs=features,
      filters=10,
      kernel_size=[5, 5],
      padding="same",
      activation=tf.nn.relu)
    
    # Convolutional Layer 2
    # Input: 100 x 100
    conv2 = tf.layers.conv2d(
        inputs = conv1,
        filters = 25,
        kernel_size = [5, 5],
        padding = "same",
        activation = tf.nn.relu)
    
    # Convolutional Layer 3
    # Input: 100 x 100
    conv3 = tf.layers.conv2d(
        inputs = conv2,
        filters = 50,
        kernel_size = [5, 5],
        padding = "same",
        activation = tf.nn.relu)

    # Pooling Layer 1
    # Input: 100 x 100
    pool1 = tf.layers.max_pooling2d(inputs = conv3, pool_size=[2, 2], strides=2)
    
    # Convolutional Layer 4
    # Input: 50 x 50
    conv4 = tf.layers.conv2d(
        inputs = pool1,
        filters = 80,
        kernel_size = [5, 5],
        padding = "same",
        activation = tf.nn.relu)
    
    # Decoding starts here.
    
    # Deconvolution Layer 4
    # Input: 50 x 50
    deconv4 = tf.layers.conv2d_transpose(
        inputs = conv4,
        filters = 50,
        kernel_size = [5, 5],
        padding = "same",
        activation = tf.nn.relu)
    
    # Unpool Layer 1
    # Input: 50 x 50
    unpool1 = unpooling(deconv4, [tf.shape(features)[0], 50, 50, 50])
    
    # Deconvolution Layer 3
    # Input: 100 x 100
    deconv3 = tf.layers.conv2d_transpose(
        inputs = unpool1,
        filters = 25,
        kernel_size = [5, 5],
        padding = "same",
        activation = tf.nn.relu)
    
    # Deconvolution Layer 2
    # Input: 100 x 100
    deconv2 = tf.layers.conv2d_transpose(
        inputs = deconv3,
        filters = 10,
        kernel_size = [5, 5],
        padding = "same",
        activation = tf.nn.relu)
    
    deconv1 = tf.layers.conv2d_transpose(
        inputs = deconv2,
        filters = 3,
        kernel_size = [5, 5],
        padding = "same",
        activation = tf.nn.relu)
    
    return deconv1


In [None]:
features, labels = get_input_img('0')

height = 3900
width = 6000
channel = 3

#image should be divided into patches as image size is very large
batch_length_vertical = 39
batch_length_horizontal = 60
mini_batch = 1343

#ksize_rows and ksize_cols will define the size of patches
ksize_rows = 100
ksize_cols = 100

#patches will be overlapped by 25 pixels
overlapping_region = 25


tf_features = tf.placeholder(tf.float16, [None, int(height/batch_length_vertical), int(width/batch_length_horizontal), channel], name = 'features')
tf_labels = tf.placeholder(tf.float16, [None, int(height/batch_length_vertical), int(width/batch_length_horizontal), channel], name = 'labels')

In [None]:
#first make all the images with constant size
features = np.pad(features, ((height - features.shape[0], 0), (width - features.shape[1], 0), (0,0)), mode = 'constant')
labels = np.pad(labels, ((height - labels.shape[0], 0), (width - labels.shape[1], 0), (0,0)), mode = 'constant')

In [None]:
#convert 3-dims to 4-dims
features = np.reshape(features, [-1, features.shape[0], features.shape[1], features.shape[2]])
labels = np.reshape(labels, [-1, labels.shape[0], labels.shape[1], labels.shape[2]])

In [None]:
#extract patches from image
features = tf.extract_image_patches(features, ksizes = [1, ksize_rows, ksize_cols, 1], strides = [1, overlapping_region, overlapping_region, 1], padding = "VALID", rates = [1, 1, 1, 1])
features = tf.reshape(features, [-1, ksize_rows, ksize_cols, channel])

#extract patches from ground truth image
labels = tf.extract_image_patches(labels, ksizes = [1, ksize_rows, ksize_cols, 1], strides = [1, overlapping_region, overlapping_region, 1], padding = "VALID", rates = [1, 1, 1, 1])
labels = tf.reshape(labels, [-1, ksize_rows, ksize_cols, channel])

In [None]:
#make mini-batch of image for training
features = tf.reshape(features, [-1, int(features.shape.as_list()[0]/mini_batch), ksize_rows, ksize_cols, channel])
labels = tf.reshape(labels, [-1, int(labels.shape.as_list()[0]/mini_batch), ksize_rows, ksize_cols,  channel])

In [62]:
output = semantic_seg_model(tf_features, tf_labels, tf.estimator.ModeKeys.TRAIN)

In [63]:
tf_features.shape, tf_labels.shape

(TensorShape([Dimension(None), Dimension(100), Dimension(100), Dimension(3)]),
 TensorShape([Dimension(None), Dimension(100), Dimension(100), Dimension(3)]))

In [64]:
features.shape, labels.shape

(TensorShape([Dimension(1343), Dimension(27), Dimension(100), Dimension(100), Dimension(3)]),
 TensorShape([Dimension(1343), Dimension(27), Dimension(100), Dimension(100), Dimension(3)]))

In [65]:
output.shape

TensorShape([Dimension(None), Dimension(100), Dimension(100), Dimension(3)])

In [None]:

epochs = 50

# with tf.device("/gpu:0"):

#cost
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits = output, labels = tf_labels)
cost = tf.reduce_mean( cross_entropy )
#     return cost, optimizer, accr

#optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)

# Accuracy
corr = tf.equal(tf.argmax(tf_labels,1), tf.argmax(output, 1))
accr = tf.reduce_mean(tf.cast(corr, tf.float16))

with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
    sess.run(tf.global_variables_initializer())

    for epoch in range(epochs):
        epoch_loss = 0
        train_loss = []; train_accuracy = []

        for i in range(features.shape.as_list()[0]):
#         for i in range(1):

            batch_features, batch_labels = features[i], labels[i]

            batch_features = np.reshape(batch_features.eval(session=sess), [-1, ksize_rows, ksize_cols, channel])
            batch_labels = np.reshape(batch_labels.eval(session=sess), [-1, ksize_rows, ksize_cols, channel])
            
            feed_dict = {tf_features: batch_features, tf_labels: batch_labels}

            _, sess_cost, sess_accuracy = sess.run([optimizer, cost, accr], feed_dict = feed_dict)
            
            train_loss.append(sess_cost)
            train_accuracy.append(sess_accuracy)

            # Average loss and accuracy
        train_loss = np.mean(train_loss)
        train_accuracy = np.mean(train_accuracy)

        print ("[%02d/%02d] trainLoss: %.4f trainAcc: %.2f" 
               % (epoch + 1, epochs, train_loss, train_accuracy))
