In [1]:
import numpy as np
import tensorflow as tf
import os
import sys
import glob
from random import shuffle
import cv2
from skimage import color
from scipy import ndimage as ndi
import matplotlib.pyplot as plt
import matplotlib as mpl

#Input parameter
CH_INPUT = 3
CH_OUTPUT = 3
IMG_WIDTH = 384
IMG_HEIGHT = 128
DISP_RANGE = 33

#Image path
INPUT_PATH = 'kitti/left/*.png'
GT_PATH = 'kitti/right/*.png' 

TEST_PATH = 'kitti/left_test/*.png'
GT_TEST_PATH = 'kitti/right_test/*.png' 

#Training parameter
BATCH_SIZE = 2
BATCH_TEST = 20
TRAIN_SIZE = 0.3
LAMBDA_L1 = 100.0
LR =  0.0002 # 0.001  0.0005 0.00146
LR_D =  0.0002
EPOCH = 100000
DECAY_STEP = EPOCH/4

VGG_MEAN = [103.939, 116.779, 123.68]

In [2]:
#Shift pixels
def tf_image_translate(images, tx, ty, interpolation='NEAREST'):
    # got these parameters from solving the equations for pixel translations
    # on https://www.tensorflow.org/api_docs/python/tf/contrib/image/transform
    
    #+tx -> shift to left +ty ->shift up
    #transforms = [1, 0, tx, 0, 1, ty, 0, 0]
    #translate = [BATCH_SIZE, -tx, ty]
    translate = [-tx, ty, BATCH_SIZE]
    return tf.contrib.image.translate(images, translate, interpolation)

def preprocess(image):
    with tf.name_scope("preprocess"):
        # [0, 1] => [-1, 1]
        return image * 2 - 1
    
def deprocess(image):
    with tf.name_scope("deprocess"):
        # [-1, 1] => [0, 1]
        return (image + 1) / 2
    
def batchnorm(inputs):
    return tf.layers.batch_normalization(inputs, 
                                         axis=3, 
                                         epsilon=1e-5, 
                                         momentum=0.1, 
                                         training=True, 
                                         gamma_initializer=tf.random_normal_initializer(1.0, 0.01))

# INPUT PIPELINE

In [3]:
with tf.name_scope('Input_Pipeline'):
    #X
    tf_x = tf.placeholder(tf.float32, [None, IMG_HEIGHT, IMG_WIDTH, CH_INPUT], name='Input')
    view_image = tf.summary.image('input', tf.reshape(tf_x, [-1, IMG_HEIGHT, IMG_WIDTH, CH_INPUT]), 1)
    image = tf.reshape(tf_x, [-1, IMG_HEIGHT, IMG_WIDTH, CH_INPUT], name='img_x')# (batch, height, width, channel)
    #image = preprocess(image)

    #Y
    tf_y = tf.placeholder(tf.float32, [None, IMG_HEIGHT, IMG_WIDTH, CH_OUTPUT], name='Target')
    label_image = tf.summary.image('GT', tf.reshape(tf_y, [-1, IMG_HEIGHT, IMG_WIDTH, CH_OUTPUT]), 1)
    color_norm = tf.reshape(tf_y, [-1, IMG_HEIGHT, IMG_WIDTH, CH_OUTPUT], name='img_y')# (batch, height, width, channel)
    #color_norm = preprocess(color_norm)

# PREPARE DATA

In [4]:
#Wrapper function
def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def load_image(addr):
    # cv2 load images as BGR, convert it to RGB
    img = cv2.imread(addr)
    if img is None:
        return None
    #img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_CUBIC)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img

#Load the image using OpenCV in grayscale
def load_image_gray(addr):
    img = cv2.imread(addr, 0)
    if img is None:
        return None
    #img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_CUBIC)
    return img

# DATASET RECORD

In [5]:
def createDataRecord(out_filename, addrs, labels):
    # open the TFRecords file
    writer = tf.python_io.TFRecordWriter(out_filename)
    for i in range(len(addrs)):
        # print how many images are loaded every # images
        if not i % 300:
            print('Train data: {}/{} images'.format(i, len(addrs)))
            sys.stdout.flush()
        # Load the image
        if CH_INPUT == 1:
            img = load_image_gray(addrs[i])
        else:
            img = load_image(addrs[i]) 
        
        if CH_OUTPUT == 1:
            label = load_image_gray(labels[i])
        else: 
            label = load_image(labels[i])
        
        if img is None:
            continue

        if label is None:
            continue
            
        # Create a feature
        feature = {
            'image_raw': _bytes_feature(img.tostring()),
            'label': _bytes_feature(label.tostring())
        }
        # Create an example protocol buffer
        example = tf.train.Example(features=tf.train.Features(feature=feature))
        
        # Serialize to string and write on the file
        writer.write(example.SerializeToString())
        
    writer.close()
    sys.stdout.flush()

# CREATE DATA FOR TRAINING

In [6]:
with tf.name_scope('Data_Folder_Read'):
    input_path = INPUT_PATH
    label_path = GT_PATH
    addrs = sorted(glob.glob(input_path))
    labels = sorted(glob.glob(label_path))
    
with tf.name_scope('Shuffle_Data'):
    # to shuffle data
    c = list(zip(addrs, labels))
    shuffle(c,)
    addrs, labels = zip(*c)
    
with tf.name_scope('Create_Datarecord_Train'):
    # Divide the data into % train and % test
    train_addrs = addrs[0:int(TRAIN_SIZE*len(addrs))]
    train_labels = labels[0:int(TRAIN_SIZE*len(labels))]
    createDataRecord('train.tfrecords', train_addrs, train_labels)

Train data: 0/3678 images
Train data: 300/3678 images
Train data: 600/3678 images
Train data: 900/3678 images
Train data: 1200/3678 images
Train data: 1500/3678 images
Train data: 1800/3678 images
Train data: 2100/3678 images
Train data: 2400/3678 images
Train data: 2700/3678 images
Train data: 3000/3678 images
Train data: 3300/3678 images
Train data: 3600/3678 images


# NETWORK STRUCTURE [SYNTHESIS]

In [7]:
class Vgg16:
    """
    A trainable version VGG16.
    """
    def __init__(self, vgg16_npy_path=None, trainable=True, dropout=0.5, output_dim=15360, retrain="semi"):
        if vgg16_npy_path is not None:
            self.data_dict = np.load(vgg16_npy_path, encoding='latin1').item()
        else:
            self.data_dict = None

        self.var_dict = {}
        self.trainable = trainable
        self.dropout = dropout

        self.output_dim=output_dim
        self.retrain=retrain

    def build(self, rgb, train_mode=None):
        """
        load variable from npy to build the VGG
        :param rgb: rgb image [batch, height, width, 3] values scaled [0, 1]
        :param train_mode: a bool tensor, usually a placeholder: if True, dropout will be turned on
        """

        rgb_scaled = rgb * 255.0
        
        # Convert RGB to BGR
        red, green, blue = tf.split(axis=3, num_or_size_splits=3, value=rgb_scaled)
        assert red.get_shape().as_list()[1:] == [IMG_HEIGHT, IMG_WIDTH, 1]
        assert green.get_shape().as_list()[1:] == [IMG_HEIGHT, IMG_WIDTH, 1]
        assert blue.get_shape().as_list()[1:] == [IMG_HEIGHT, IMG_WIDTH, 1]
        bgr = tf.concat(axis=3, values=[
            blue - VGG_MEAN[0],
            green - VGG_MEAN[1],
            red - VGG_MEAN[2],
        ])
        
        assert bgr.get_shape().as_list()[1:] == [IMG_HEIGHT, IMG_WIDTH, 3]

        #1st block
        self.conv1_1 = self.conv_layer(bgr, 3, 64, "conv1_1")
        self.conv1_2 = self.conv_layer(self.conv1_1, 64, 64, "conv1_2")
        self.pool1 = self.max_pool(self.conv1_2, 'pool1')
        
        #2nd block
        self.conv2_1 = self.conv_layer(self.pool1, 64, 128, "conv2_1")
        self.conv2_2 = self.conv_layer(self.conv2_1, 128, 128, "conv2_2")
        self.pool2 = self.max_pool(self.conv2_2, 'pool2')
        
        #3rd block
        self.conv3_1 = self.conv_layer(self.pool2, 128, 256, "conv3_1")
        self.conv3_2 = self.conv_layer(self.conv3_1, 256, 256, "conv3_2")
        self.conv3_3 = self.conv_layer(self.conv3_2, 256, 256, "conv3_3")
        self.pool3 = self.max_pool(self.conv3_3, 'pool3')
        
        #4th block
        self.conv4_1 = self.conv_layer(self.pool3, 256, 512, "conv4_1")
        self.conv4_2 = self.conv_layer(self.conv4_1, 512, 512, "conv4_2")
        self.conv4_3 = self.conv_layer(self.conv4_2, 512, 512, "conv4_3")
        self.pool4 = self.max_pool(self.conv4_3, 'pool4')
        
        #5th block
        self.conv5_1 = self.conv_layer(self.pool4, 512, 512, "conv5_1")
        self.conv5_2 = self.conv_layer(self.conv5_1, 512, 512, "conv5_2")
        self.conv5_3 = self.conv_layer(self.conv5_2, 512, 512, "conv5_3")
        self.pool5 = self.max_pool(self.conv5_3, 'pool5')
        
        #6th block (FC)
        self.fc6 = tf.layers.flatten(self.pool5)
        #self.fc6 = tf.layers.dense(self.fc6, 4096, bias_initializer=tf.random_normal_initializer(stddev=0.01), name="fc6-custom")
        self.fc6 = tf.layers.dense(self.fc6, 4096, name="fc6-custom", bias_initializer=tf.constant_initializer(0.0))
        self.relu6 = tf.nn.relu(self.fc6)
        if train_mode is not None:
            self.relu6 = tf.cond(train_mode, lambda: tf.nn.dropout(self.relu6, self.dropout), lambda: self.relu6)
        elif self.trainable:
            self.relu6 = tf.nn.dropout(self.relu6, self.dropout)
        
        #7th block (FC)
        #self.fc7 = tf.layers.dense(self.relu6, 4096, bias_initializer=tf.random_normal_initializer(stddev=0.01), name="fc7-custom")
        self.fc7 = tf.layers.dense(self.relu6, 4096, name="fc7-custom", bias_initializer=tf.constant_initializer(0.0))
        self.relu7 = tf.nn.relu(self.fc7)
        if train_mode is not None:
            self.relu7 = tf.cond(train_mode, lambda: tf.nn.dropout(self.relu7, self.dropout), lambda: self.relu7)
        elif self.trainable:
            self.relu7 = tf.nn.dropout(self.relu7, self.dropout)
        
        #8th block (FC)
        #self.fc8 = tf.layers.dense(self.relu7, 6144, bias_initializer=tf.random_normal_initializer(stddev=0.01), name="fc8-custom")
        self.fc8 = tf.layers.dense(self.relu7, 6144, name="fc8-custom", bias_initializer=tf.constant_initializer(0.0))
        self.fc8 = tf.reshape(self.fc8, [-1, 4, 12, 128]) 
        self.relu_bn5 = tf.nn.relu(self.fc8)
        self.conv_bn5 = self.conv_only(self.relu_bn5, 128, 3, "conv_bn5")
       
        ######################################################################################
        
        #1st disp probability
        #self.pool1 = batchnorm(self.pool1)
        self.conv_bn1 = self.conv_bn(self.pool1, 3, "conv_bn1")
        self.conv_bn1_ = self.conv_bn(self.conv_bn1, 3, "conv_bn1_")     
        self.deconv_bn1 = self.deconv_layer(self.conv_bn1_, 1, 1, "deconv_bn1")
        
        #2nd disp probability
        #self.pool2 = batchnorm(self.pool2)
        self.conv_bn2 = self.conv_bn(self.pool2, 3, "conv_bn2")   
        self.conv_bn2_ = self.conv_bn(self.conv_bn2, 3, "conv_bn2_") 
        self.deconv_bn2 = self.deconv_layer(self.conv_bn2_, 4, 2, "deconv_bn2")
        
        #3rd disp probability
        #self.pool3 = batchnorm(self.pool3)
        self.conv_bn3 = self.conv_bn(self.pool3, 3, "conv_bn3")     
        self.conv_bn3_ = self.conv_bn(self.conv_bn3, 3, "conv_bn3_") 
        self.deconv_bn3 = self.deconv_layer(self.conv_bn3_, 8, 4, "deconv_bn3")
        
        #4th disp probability
        #self.pool4 = batchnorm(self.pool4)
        self.conv_bn4 = self.conv_bn(self.pool4, 3, "conv_bn4")     
        self.conv_bn4_ = self.conv_bn(self.conv_bn4, 3, "conv_bn4_") 
        self.deconv_bn4 = self.deconv_layer(self.conv_bn4_, 16, 8, "deconv_bn4")
        
        #5th disp probability
        self.deconv_bn5 = self.deconv_layer(self.conv_bn5, 32, 16, "deconv_bn5")
        
        ######################################################################################
   
        #SUM (save memory?)
        self.summation = tf.add(self.deconv_bn1, self.deconv_bn2)
        self.summation = tf.add(self.summation, self.deconv_bn3)
        self.summation = tf.add(self.summation, self.deconv_bn4)
        self.summation = tf.add(self.summation, self.deconv_bn5)
        
        self.relu_sum = tf.nn.relu(self.summation)
        
        #Deconv sum
        self.deconv_sum = self.deconv_layer(self.relu_sum, 4, 2, "deconv_sum")
        self.deconv_sum = tf.nn.relu(self.deconv_sum)
        
        self.conv_sum_ = tf.layers.conv2d(self.deconv_sum, 128, 3, padding='SAME', activation=tf.nn.relu,
                                         kernel_initializer=tf.random_normal_initializer(stddev=0.01), name="conv_sum_")
        
        self.conv_sum = self.conv_only(self.conv_sum_, DISP_RANGE, 3, "conv_sum")

        #Final softmax disp probability
        self.prob = tf.nn.softmax(self.conv_sum)
        
        ######################################################################################
        
        #Slice and multiply
        #Shifted input
        self.image_stack = image
        for i in range(DISP_RANGE-1):
            trans_image = tf_image_translate(image, tx=i+1, ty=0)
            self.image_stack = tf.concat([self.image_stack, trans_image], -1)
        
        #Multiply prob and shifted input
        self.pred_right = tf.zeros_like(image)
        for i in range(DISP_RANGE):
            self.mult_prob = tf.multiply(self.prob[:, :, :, i:i+1], self.image_stack[:, :, :, i*3:(i*3)+3]) 
            self.pred_right = tf.add(self.pred_right, self.mult_prob)
 
    def max_pool(self, bottom, name):
        return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)

    def conv_layer(self, bottom, in_channels, out_channels, name):
        with tf.variable_scope(name):
            filt, conv_biases = self.get_conv_var(3, in_channels, out_channels, name)

            conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME')
            bias = tf.nn.bias_add(conv, conv_biases)
            relu = tf.nn.relu(bias)

            return relu  
        
    def conv_only(self, inputs, filters, kernel, name):
        with tf.variable_scope(name):
            conv = tf.layers.conv2d(inputs, filters, kernel, padding='SAME',
                                    kernel_initializer=tf.random_normal_initializer(stddev=0.01))

            return conv 
        
    def conv_bn(self, inputs, kernel, name):
        with tf.variable_scope(name):
            conv = tf.layers.conv2d(inputs, 128, kernel, padding='SAME', activation=tf.nn.relu,
                                    kernel_initializer=tf.random_normal_initializer(stddev=0.01))
            return conv
        
    def deconv_layer(self, inputs, kernel, stride, name):
        with tf.variable_scope(name):
            deconv = tf.layers.conv2d_transpose(inputs, 128,  kernel, (stride,stride), padding='SAME')
            return deconv

    def get_conv_var(self, filter_size, in_channels, out_channels, name):
        initial_value = tf.truncated_normal([filter_size, filter_size, in_channels, out_channels], 0.0, 0.001)

        if self.retrain == 'complete':
            rt = True
        elif self.retrain == 'semi':
            if 'conv1' in name or 'conv2' in name:
                rt = True
            else:
                rt = False
        else:
            rt = False

        filters = self.get_var(initial_value, name, 0, name + "_filters", retrain=rt)
        initial_value = tf.truncated_normal([out_channels], .0, .001)
        biases = self.get_var(initial_value, name, 1, name + "_biases", retrain=rt)

        return filters, biases

    def get_var(self, initial_value, name, idx, var_name, retrain=True):
        if self.data_dict is not None and name in self.data_dict:
            value = self.data_dict[name][idx]
        else:
            value = initial_value

        if self.trainable and retrain:
            var = tf.Variable(value, name=var_name)
        else:
            var = tf.constant(value, dtype=tf.float32, name=var_name)

        self.var_dict[(name, idx)] = var

        # print var_name, var.get_shape().as_list()
        assert var.get_shape() == initial_value.get_shape()

        return var
        

# DISCRIMINATOR

# PATCH GAN

In [8]:
def patch_GAN(input_=None, filters_=64, name_=None, reuse_=None):
    with tf.variable_scope('Patch_GAN'):
        if reuse_:
            tf.get_variable_scope().reuse_variables()
        input_ += tf.random_normal(shape=tf.shape(input_), mean=0.0, stddev=0.1, dtype=tf.float32)
        with tf.name_scope('D_1st_Block'):
            conv1 = tf.layers.conv2d(inputs=input_, 
                                     filters=filters_, 
                                     kernel_size=4, 
                                     strides=2, 
                                     padding='SAME', 
                                     activation=None,
                                     kernel_initializer=tf.random_normal_initializer(0, 0.02),
                                     bias_initializer=tf.constant_initializer(0.0),
                                     name='D_1st_Block')

            conv1 = batchnorm(conv1)
            conv1 = tf.nn.leaky_relu(conv1)
        
        with tf.name_scope('D_2nd_Block'):
            conv2 = tf.layers.conv2d(inputs=conv1, 
                                     filters=filters_*2, 
                                     kernel_size=4, 
                                     strides=2, 
                                     padding='SAME', 
                                     activation=None, 
                                     kernel_initializer=tf.random_normal_initializer(0, 0.02),
                                     bias_initializer=tf.constant_initializer(0.0),
                                     name='D_2nd_Block')

            conv2 = batchnorm(conv2)
            conv2 = tf.nn.leaky_relu(conv2)
        
        with tf.name_scope('D_3rd_Block'):
            conv3 = tf.layers.conv2d(inputs=conv2, 
                                     filters=filters_*4, 
                                     kernel_size=4, 
                                     strides=2, 
                                     padding='SAME', 
                                     activation=None, 
                                     kernel_initializer=tf.random_normal_initializer(0, 0.02),
                                     bias_initializer=tf.constant_initializer(0.0),
                                     name='D_3rd_Block_conv_3')

            conv3 = batchnorm(conv3)
            conv3 = tf.nn.leaky_relu(conv3)
        
        with tf.name_scope('D_4th_Block'):
            conv4 = tf.layers.conv2d(inputs=conv3, 
                                     filters=filters_*8, 
                                     kernel_size=4, 
                                     strides=2, 
                                     padding='SAME', 
                                     activation=None, 
                                     kernel_initializer=tf.random_normal_initializer(0, 0.02),
                                     bias_initializer=tf.constant_initializer(0.0),
                                     name='D_4th_Block_conv_4')

            conv4 = batchnorm(conv4)
            conv4 = tf.nn.leaky_relu(conv4)
        
        with tf.name_scope('D_5th_Block'):
            conv5 = tf.layers.conv2d(inputs=conv4, 
                                     filters=1, 
                                     kernel_size=4, 
                                     strides=1, 
                                     padding='SAME', 
                                     activation=None, 
                                     kernel_initializer=tf.random_normal_initializer(0, 0.02),
                                     bias_initializer=tf.constant_initializer(0.0),
                                     name='D_5th_Block_conv_5')    
            
        return tf.nn.sigmoid(conv5), conv5

# VANILA GAN

In [9]:
def vanila_GAN(input_=None, filters_=64, name_=None, reuse_=None):
    with tf.variable_scope('Vanila_GAN'):
        if reuse_:
            tf.get_variable_scope().reuse_variables()
        input_ += tf.random_normal(shape=tf.shape(input_), mean=0.0, stddev=0.1, dtype=tf.float32)
        with tf.name_scope('D_1st_Block'):
            conv1 = tf.layers.conv2d(inputs=input_, 
                                     filters=filters_, 
                                     kernel_size=4, 
                                     strides=2, 
                                     padding='SAME', 
                                     activation=None,
                                     kernel_initializer=tf.random_normal_initializer(0, 0.02),
                                     bias_initializer=tf.constant_initializer(0.0),
                                     name='D_1st_Block')

            conv1 = batchnorm(conv1)
            conv1 = tf.nn.leaky_relu(conv1)
        
        with tf.name_scope('D_2nd_Block'):
            conv2 = tf.layers.conv2d(inputs=conv1, 
                                     filters=filters_*2, 
                                     kernel_size=4, 
                                     strides=2, 
                                     padding='SAME', 
                                     activation=None, 
                                     kernel_initializer=tf.random_normal_initializer(0, 0.02),
                                     bias_initializer=tf.constant_initializer(0.0),
                                     name='D_2nd_Block')

            conv2 = batchnorm(conv2)
            conv2 = tf.nn.leaky_relu(conv2)
        
        with tf.name_scope('D_3rd_Block'):
            conv3 = tf.layers.conv2d(inputs=conv2, 
                                     filters=filters_*4, 
                                     kernel_size=4, 
                                     strides=2, 
                                     padding='SAME', 
                                     activation=None, 
                                     kernel_initializer=tf.random_normal_initializer(0, 0.02),
                                     bias_initializer=tf.constant_initializer(0.0),
                                     name='D_3rd_Block_conv_3')

            conv3 = batchnorm(conv3)
            conv3 = tf.nn.leaky_relu(conv3)
        
        with tf.name_scope('D_4th_Block'):
            conv4 = tf.layers.conv2d(inputs=conv3, 
                                     filters=filters_*8, 
                                     kernel_size=4, 
                                     strides=2, 
                                     padding='SAME', 
                                     activation=None, 
                                     kernel_initializer=tf.random_normal_initializer(0, 0.02),
                                     bias_initializer=tf.constant_initializer(0.0),
                                     name='D_4th_Block_conv_4')

            conv4 = batchnorm(conv4)
            conv4 = tf.nn.leaky_relu(conv4)
        
        with tf.name_scope('D_5th_Block'):
            conv5 = tf.layers.conv2d(inputs=conv4, 
                                     filters=filters_*8, 
                                     kernel_size=4, 
                                     strides=2, 
                                     padding='SAME', 
                                     activation=None, 
                                     kernel_initializer=tf.random_normal_initializer(0, 0.02),
                                     bias_initializer=tf.constant_initializer(0.0),
                                     name='D_5th_Block_conv_5')   
            conv5 = batchnorm(conv5)
            conv5 = tf.nn.leaky_relu(conv5)
            
        with tf.name_scope('D_6th_Block'):
            flat1 = tf.layers.flatten(conv5)
            dense1 = tf.layers.dense(flat1, 1024)
            dense1 = tf.nn.leaky_relu(dense1)
            dense1 = tf.layers.batch_normalization(dense1,
                                         epsilon=1e-5, 
                                         momentum=0.1, 
                                         training=True, 
                                         gamma_initializer=tf.random_normal_initializer(1.0, 0.01))
            out = tf.layers.dense(dense1, 1)
            
        return tf.nn.sigmoid(out), out

# CREATE NETWORK

In [10]:
with tf.name_scope('View_Synthesis'):
    with tf.name_scope('Generator'):
        vgg = Vgg16(vgg16_npy_path= 'vgg16.npy', output_dim = 15360, retrain="complete")
        train_mode = tf.placeholder(tf.bool)
        input_vgg = tf.divide(image, 255)
        vgg.build(input_vgg, train_mode)
        
    with tf.name_scope('Discriminator_Patch'):
        #Concat A and real B
        #real_input = tf.concat([vgg.prob, color_norm], 3)
        #Concat A and fake B
        #fake_input = tf.concat([vgg.prob, vgg.pred_right], 3)
        
        with tf.name_scope('Discriminator_Real'):
            sigmoid_real, real_logits = patch_GAN(color_norm, 64, 'D_real', False)
        with tf.name_scope('Discriminator_Fake'):
            sigmoid_fake, fake_logits = patch_GAN(vgg.pred_right, 64, 'D_fake', True)
            
#     with tf.name_scope('Discriminator_Vanila'):      
#         with tf.name_scope('Discriminator_Real'):
#             sigmoid_real, real_logits = vanila_GAN(color_norm, 64, 'D_real', False)
#         with tf.name_scope('Discriminator_Fake'):
#             sigmoid_fake, fake_logits = vanila_GAN(vgg.pred_right, 64, 'D_fake', True)    

# LOSS

In [11]:
def sigmoid_cross_entropy_with_logits(x, y):
    try:
        return tf.nn.sigmoid_cross_entropy_with_logits(logits=x, labels=y)
    except:
        return tf.nn.sigmoid_cross_entropy_with_logits(logits=x, targets=y)

with tf.name_scope('Loss'): 
    output_image = tf.summary.image('Target', tf.cast(tf.reshape(vgg.pred_right, 
                            [-1, IMG_HEIGHT, IMG_WIDTH, CH_OUTPUT]), tf.uint8) , 1)
    
    #Label smoothing
    zero_smooth = tf.random_uniform(tf.shape(sigmoid_fake), minval=0.0, maxval=0.2)
    one_smooth = tf.random_uniform(tf.shape(sigmoid_fake), minval=0.0, maxval=-0.2)
    
    l1_loss = tf.reduce_mean(tf.losses.absolute_difference(color_norm, vgg.pred_right))
    tf.summary.scalar('L1_loss', l1_loss)
    
    G_Adv_loss = tf.reduce_mean(
            sigmoid_cross_entropy_with_logits(fake_logits, tf.ones_like(sigmoid_fake)+one_smooth), name='G_adv_loss')
        
    G_Adv_loss = tf.reduce_mean(sigmoid_real)
    
    G_Total_Loss = tf.add(G_Adv_loss, (LAMBDA_L1 * l1_loss), name='G_total_loss')
    
    tf.summary.scalar('Generator_Adv_Loss', G_Adv_loss)
    tf.summary.scalar('Generator_Total_Loss', G_Total_Loss)
    
    ###################################################################################################
    D_Adv_Real = tf.reduce_mean(
        sigmoid_cross_entropy_with_logits(real_logits, tf.ones_like(sigmoid_real)+one_smooth), name='D_real_adv_loss')

    D_Adv_Fake = tf.reduce_mean(
        sigmoid_cross_entropy_with_logits(fake_logits, tf.zeros_like(sigmoid_fake)+zero_smooth), name='D_fake_adv_loss')
    D_Total_Loss = tf.add(D_Adv_Real, D_Adv_Fake, name='D_total_loss')
        
    tf.summary.scalar('Discriminator_Real', D_Adv_Real)
    tf.summary.scalar('Discriminator_Fake', D_Adv_Fake)
    tf.summary.scalar('Discriminator_Total_Loss', D_Total_Loss)
    
#     shift_image = tf.summary.image('Shift', tf.cast(tf.reshape(image_stack[:, :, :, (DISP_RANGE*3)-3:DISP_RANGE*3],
#                                     [-1, IMG_WIDTH, IMG_HEIGHT, CH_OUTPUT]), tf.uint8) , 1)
  
    disp_image = tf.summary.image('Z', tf.cast(tf.reshape(vgg.prob[:, :, :, 3:4]*255, 
                                    [-1, IMG_HEIGHT, IMG_WIDTH, 1]), tf.uint8) , 1)
    
    disp_image2 = tf.summary.image('Z2', tf.cast(tf.reshape(vgg.prob[:, :, :, 14:15]*255, 
                                    [-1, IMG_HEIGHT, IMG_WIDTH, 1]), tf.uint8) , 1)
    
    disp_image3 = tf.summary.image('Z3', tf.cast(tf.reshape(vgg.prob[:, :, :, 27:28]*255, 
                                    [-1, IMG_HEIGHT, IMG_WIDTH, 1]), tf.uint8) , 1)


# TRAIN

In [12]:
with tf.name_scope('Train'):
    batch = tf.Variable(0, dtype=tf.float32)
    learning_rate = tf.train.exponential_decay(
                      LR,                    # Base learning rate.
                      batch,      # Current index into the dataset.
                      DECAY_STEP,       # Decay step.
                      0.97,       # Decay rate.
                      staircase=True)
    learning_rate_D = tf.train.exponential_decay(
                      LR_D,                    # Base learning rate.
                      batch,      # Current index into the dataset.
                      DECAY_STEP,       # Decay step.
                      0.97,       # Decay rate.
                      staircase=True)
    #train_op = tf.train.AdamOptimizer(learning_rate=learning_rate, name='optimizer_adam').minimize(l1_loss)
    train_G = tf.train.AdamOptimizer(learning_rate=learning_rate, name='optimizer_G').minimize(G_Total_Loss)
    train_D = tf.train.AdamOptimizer(learning_rate=learning_rate_D, name='optimizer_D').minimize(D_Total_Loss)
    

# INPUT PARSING

In [13]:
#To get one record and parse it to get the label and image out
def parser(record):
    keys_to_features = {
        "image_raw": tf.FixedLenFeature([], tf.string),
        "label":     tf.FixedLenFeature([], tf.string)
    }
    #Read one record
    parsed = tf.parse_single_example(record, keys_to_features)
    #Take the image and bytes
    image = tf.decode_raw(parsed["image_raw"], tf.uint8)
    label = tf.decode_raw(parsed["label"], tf.uint8)
    #Cast to float
    image = tf.cast(image, tf.float32)
    label = tf.cast(label, tf.float32)
    image = tf.reshape(image, shape=[IMG_HEIGHT, IMG_WIDTH, CH_INPUT])
    label = tf.reshape(label, shape=[IMG_HEIGHT, IMG_WIDTH, CH_OUTPUT])
    #Normalize the input and label into [0...1]
    #image = tf.divide(image, 255)
    #label = tf.divide(label, 255)

    return {'image': image}, {'label': label}

def input_fn(filenames):
    #Create data record
    dataset = tf.data.TFRecordDataset(filenames=filenames, num_parallel_reads=1000)
    dataset = dataset.map(parser, num_parallel_calls=1000)
    dataset = dataset.shuffle(500).repeat().batch(BATCH_SIZE)
    #dataset = dataset.prefetch(buffer_size=2)
    return dataset

def test_fn(filenames):
    #Create data record
    dataset = tf.data.TFRecordDataset(filenames=filenames, num_parallel_reads=100)
    dataset = dataset.map(parser, num_parallel_calls=100)
    dataset = dataset.batch(BATCH_TEST)
    return dataset

def train_input_fn():
    return input_fn(filenames=["train.tfrecords"])

def test_input_fn():
    return test_fn(filenames=["test.tfrecords"])

# CREATE TRAIN SET

In [14]:
with tf.name_scope('Data_Folder_Read'):
    input_path = INPUT_PATH
    label_path = GT_PATH
    addrs = sorted(glob.glob(input_path))
    labels = sorted(glob.glob(label_path))
    
with tf.name_scope('Create_Training_Set'):
    train_dataset = train_input_fn()
    iterator = train_dataset.make_initializable_iterator()
    next_batch = iterator.get_next()

# TRAIN

In [None]:
merged = tf.summary.merge_all()
saver = tf.train.Saver()

config = tf.ConfigProto()
config.gpu_options.allow_growth = True

sess=tf.Session(config=config)
sess.run(tf.group(tf.global_variables_initializer(), 
                  iterator.initializer)
                 )
#saver.restore(sess, "saver/Synthesis/model15000.ckpt")
writer = tf.summary.FileWriter('log/Synthesis',sess.graph)
run_options = tf.RunOptions(report_tensor_allocations_upon_oom = True, trace_level=tf.RunOptions.FULL_TRACE)
run_metadata = tf.RunMetadata()
for step in range(EPOCH+1):
    train_x, train_y = sess.run(next_batch)
    
    #_, l1_loss_, output_, summary_ = sess.run([train_op, l1_loss, vgg.pred_right, merged], {tf_x:train_x['image'], tf_y:train_y['label'], train_mode:True}, options=run_options, run_metadata=run_metadata)
    if step%1 == 0:
        _, G_loss_, output_, summary_ = sess.run([train_G, G_Adv_loss, vgg.pred_right, merged], {tf_x:train_x['image'], tf_y:train_y['label'], train_mode:True}, options=run_options, run_metadata=run_metadata)
    if step%1 == 0:
        _, D_loss_ = sess.run([train_D, D_Total_Loss], {tf_x:train_x['image'], tf_y:train_y['label'], train_mode:True}, options=run_options, run_metadata=run_metadata)
   
    if step %2000 == 0:
        save_path = saver.save(sess, "saver/Synthesis/model%i.ckpt" %step)
        print("Model saved in path: %s" % save_path)
     
    if step == 15000:
        save_path = saver.save(sess, "saver/Synthesis/model%i.ckpt" %step)
        print("Model saved in path: %s" % save_path)
        
    if step%30 == 0:
        #writer.add_run_metadata(run_metadata, 'step%d' % step)
        writer.add_summary(summary_, step)     
        #print('Step:', step, '| L_1 loss:%.4f' %l1_loss_)
        print('Step:', step, '| G loss:%.4f' %G_loss_, '| D loss:%.4f' %D_loss_)

Step: 0 | G loss:0.5791 | D loss:2.5322
Step: 30 | G loss:0.2240 | D loss:2.3825
Step: 60 | G loss:0.2564 | D loss:2.1606
Step: 90 | G loss:0.2412 | D loss:2.1278
Step: 120 | G loss:0.2753 | D loss:1.9879
Step: 150 | G loss:0.2373 | D loss:2.0484
Step: 180 | G loss:0.1955 | D loss:2.2356
Step: 210 | G loss:0.2306 | D loss:2.0410
Step: 240 | G loss:0.2512 | D loss:1.9617
Step: 270 | G loss:0.2170 | D loss:2.0616
Step: 300 | G loss:0.2389 | D loss:1.9927
Step: 330 | G loss:0.2350 | D loss:2.0040
Step: 360 | G loss:0.2160 | D loss:2.0999
Step: 390 | G loss:0.2193 | D loss:2.0322
Step: 420 | G loss:0.2330 | D loss:1.9802
Step: 450 | G loss:0.2321 | D loss:1.9766
Step: 480 | G loss:0.2152 | D loss:2.0056
Step: 510 | G loss:0.2142 | D loss:2.0272
Step: 540 | G loss:0.2151 | D loss:2.0114
Step: 570 | G loss:0.2396 | D loss:1.9640
Step: 600 | G loss:0.1985 | D loss:2.0586
Step: 630 | G loss:0.2135 | D loss:2.0085
Step: 660 | G loss:0.2149 | D loss:2.1239
Step: 690 | G loss:0.2144 | D loss:2.00

Step: 5730 | G loss:0.2292 | D loss:1.8124
Step: 5760 | G loss:0.2334 | D loss:1.7855
Step: 5790 | G loss:0.2253 | D loss:1.8241
Step: 5820 | G loss:0.2196 | D loss:1.8496
Step: 5850 | G loss:0.2449 | D loss:1.8149
Step: 5880 | G loss:0.2375 | D loss:1.7801
Step: 5910 | G loss:0.2437 | D loss:1.7584
Step: 5940 | G loss:0.2242 | D loss:1.8242
Step: 5970 | G loss:0.2358 | D loss:1.7757
Step: 6000 | G loss:0.2385 | D loss:1.7578
Step: 6030 | G loss:0.2472 | D loss:1.7103
Step: 6060 | G loss:0.2442 | D loss:1.7439
Step: 6090 | G loss:0.2195 | D loss:1.8592
Step: 6120 | G loss:0.2431 | D loss:1.7234
Step: 6150 | G loss:0.2454 | D loss:1.8042


# FORWARD

In [None]:
with tf.name_scope('Test_Folder_Read'):
    input_path = TEST_PATH
    label_path = GT_TEST_PATH
    addrs = sorted(glob.glob(input_path))
    labels = sorted(glob.glob(label_path))
    
with tf.name_scope('Create_Datarecord_Test'):
    # Divide the data into % train and % test
    test_addrs = addrs[:BATCH_TEST]
    test_labels = labels[:BATCH_TEST]
    createDataRecord('test.tfrecords', test_addrs, test_labels)

In [None]:
with tf.name_scope('Create_Test_Set'):
    test_dataset = test_input_fn()
    iterator = test_dataset.make_initializable_iterator()
    next_batch = iterator.get_next()

In [None]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess=tf.Session(config=config)

merged = tf.summary.merge_all()
sess.run(iterator.initializer)
saver = tf.train.Saver()
run_options = tf.RunOptions(report_tensor_allocations_upon_oom = True)

%timeit
saver.restore(sess, "saver/Synthesis/model30000.ckpt")
test_x, test_y = sess.run(next_batch)
l1_loss_, output_, depth_ = sess.run([l1_loss, vgg.pred_right, vgg.prob], {tf_x:test_x['image'], tf_y:test_y['label'], train_mode:False}, options=run_options)


In [None]:
for i in range(BATCH_TEST):
    plt.figure(i)
    temp = output_[i,:,:,:]
    #color = np.reshape(output_[0], [256,256,3])
    imgplots = plt.imshow((temp).astype('uint8'))
    plt.show()
    temp = cv2.cvtColor(temp, cv2.COLOR_BGR2RGB)
    cv2.imwrite("outputGD%i.png" %i, temp)

In [None]:
for d in range(DISP_RANGE):
    plt.figure(d)
    temp = depth_[6,:,:,d:d+1]*255
    temp  = np.squeeze(temp, 2)
    imgplot = plt.imshow((temp).astype('uint8'), cmap='gray')
    plt.show()
    cv2.imwrite("output%i.png" %d, temp)

In [None]:
import math
def psnr(img1, img2):
    mse = np.mean( (img1 - img2) ** 2 )
    if mse == 0:
        return 100
    PIXEL_MAX = 255.0
    return 20 * math.log10(PIXEL_MAX / math.sqrt(mse))

original = cv2.imread("GT7.png")
contrast = cv2.imread("output7.png")

d=psnr(original,contrast)
print(d)