# Eigen et al 2014

Eigen, David, Christian Puhrsch, and Rob Fergus. "Depth map prediction from a single image using a multi-scale deep network." Advances in neural information processing systems. 2014. [[Eigen2014](https://papers.nips.cc/paper/5539-depth-map-prediction-from-a-single-image-using-a-multi-scale-deep-network.pdf)]

## Datasets: 
- [Make3D](http://make3d.cs.cornell.edu/data.html#make3d)
- [NYU](http://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html)

In [None]:
import tensorflow as tf
import numpy as np
from scipy.misc import imresize
from matplotlib import pyplot as plt

In [None]:
# Make3D data
from Make3D import train_pairs, test_pairs

# NYU data
# from NYU import nyu_data
# train_pairs, test_pairs = nyu_data()

# Visualize samples from the dataset

In [None]:
_, axis = plt.subplots(5, 2, figsize=(10,20))
plt.tight_layout()
for (rgb, d), (ax1, ax2) in zip(train_pairs[:10], axis):
    ax1.axis('off'), ax2.axis('off')
    ax1.imshow(rgb)
    ax2.imshow(imresize(d, rgb.shape))
plt.show()

# Take the paper's convolutional network approach

## Simplify dataset first
- Convert to grayscale
- Scale targets down so the convolutional network can use striding and so we do not need padding
- Normalize values

In [None]:
train_data, train_targets = zip(*train_pairs)
test_data, test_targets = zip(*test_pairs)

def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])

# Reshape data/targets to what the paper uses in order to use the same network.
train_data = [imresize(rgb2gray(img), (304, 228))/255 for img in train_data]
train_targets = [imresize(img, (74, 55))/255 for img in train_targets]
test_data = [imresize(rgb2gray(img), (304, 228))/255 for img in test_data]
test_targets = [imresize(img, (74, 55))/255 for img in test_targets]

train_x, train_t = np.asarray(train_data), np.asarray(train_targets)
test_x, test_t = np.asarray(test_data), np.asarray(test_targets)

print('train input/target shapes', train_data[0].shape, train_targets[0].shape)
print('train input min/max/ptp', np.min(train_data), np.max(train_data), np.ptp(train_data))
print('train target min/max/ptp', np.min(train_targets), np.max(train_targets), np.ptp(train_targets))

tuples = zip(train_x[:10], train_t[:10])
fig, axis = plt.subplots(5, 2, figsize=(10,20))
plt.tight_layout(), plt.gray()
for (rgb, d), (ax1, ax2) in zip(tuples, axis):
    ax1.axis('off'), ax2.axis('off')
    ax1.imshow(rgb)
    ax2.imshow(imresize(d, rgb.shape))
plt.show()

## Define Tensorflow Graph

In [None]:
from alexnet import alexnet_v2

tf.reset_default_graph()

x = tf.placeholder(tf.float32, (None, 304, 228))
t = tf.placeholder(tf.float32, (None, 74, 55))
training = tf.placeholder_with_default(False, None)
t_ = tf.expand_dims(t, 3) 
x_ = tf.expand_dims(x, 3)  # conv2d expects a channel dimension

def generator(x):
    # coarse network implementation
    coarse = tf.layers.conv2d(x, filters=96, kernel_size=11, strides=4, activation=tf.nn.relu)
    coarse = tf.layers.max_pooling2d(coarse, pool_size=2, strides=2)
    coarse = tf.layers.conv2d(coarse, filters=256, kernel_size=5, activation=tf.nn.relu, padding='same')
    coarse = tf.layers.max_pooling2d(coarse, pool_size=2, strides=2)
    coarse = tf.layers.conv2d(coarse, filters=384, kernel_size=3, activation=tf.nn.relu, padding='same')
    coarse = tf.layers.conv2d(coarse, filters=384, kernel_size=3, activation=tf.nn.relu, padding='same')
    coarse = tf.layers.conv2d(coarse, filters=256, kernel_size=3, activation=tf.nn.relu, strides=2)
    coarse = tf.reshape(coarse, (-1, 8*6*256))
    coarse = tf.layers.dense(coarse, units=4096, activation=tf.nn.relu)
    coarse = tf.layers.dropout(coarse, rate=.5, training=training)  # kill neurons in training
    coarse = tf.layers.dense(coarse, units=(74*55))
    coarse = tf.reshape(coarse, (-1, 74, 55, 1))

    # fine network implementation
    fine = tf.layers.conv2d(x_, filters=63, kernel_size=9, strides=2, activation=tf.nn.relu)
    fine = tf.layers.max_pooling2d(fine, pool_size=2, strides=2)
    fine = tf.concat([fine, coarse], 3)  # join with coarse output
    fine = tf.layers.conv2d(fine, filters=64, kernel_size=5, activation=tf.nn.relu, padding='same')
    fine = tf.layers.conv2d(fine, filters=1, kernel_size=5, padding='same')

    #y = tf.squeeze(fine, axis=3)  # remove channel dimension
    return fine #y

def discriminator(x):
    resized = tf.image.resize_images(x, (224,224))
    D_logit, _ = alexnet_v2(resized,
               num_classes=2, 
               is_training=training,
               dropout_keep_prob=0.5,
               spatial_squeeze=True)
    D_prob = tf.nn.sigmoid(D_logit)  # scalars between 0 and 1, 1 being its a real depthmap
    return D_prob, D_logit
    
G_sample = generator(x_)
discriminator_template = tf.make_template('discriminator', discriminator)
D_real, D_logit_real = discriminator_template(t_)
D_fake, D_logit_fake = discriminator_template(G_sample)

# dl2 = tf.log(1. - D_fake)
# dl1 = tf.log(D_real)
# # dl2 = tf.Print(dl2, [dl2, dl1])
# D_loss_real = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=D_logit_real, labels=tf.one_hot(tf.zeros_like(D_logit_real), 2))
# D_loss_fake = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.one_hot(tf.ones_like(D_logit_fake), 2))
# D_loss = tf.reduce_mean(D_loss_real + D_loss_fake)
# G_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.one_hot(tf.ones_like(D_logit_fake), 2))

# D_loss = -tf.reduce_mean(dl1 + dl2)
# G_loss = tf.reduce_mean(1 - tf.log(D_fake))

D_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_real, labels=tf.ones_like(D_logit_real)))
D_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.zeros_like(D_logit_fake)))
D_loss = D_loss_real + D_loss_fake
G_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.ones_like(D_logit_fake)))

D_solver = tf.train.AdamOptimizer().minimize(D_loss)#, var_list=theta_D)
G_solver = tf.train.AdamOptimizer().minimize(G_loss)

    
#loss = tf.reduce_mean(tf.square(t - y))  # TODO: Implement loss from paper

#nr_pixel = 74*55*32.
#nr_missing = tf.reduce_sum(tf.cast(t==0, tf.float32))

#nr_missing = tf.Print(nr_missing, [nr_missing])

# n = tf.count_nonzero(y)
# d = y - tf.log(t)
# dsq = tf.square(d)

# loss = tf.reduce_mean( (1/n) * tf.reduce_sum(dsq, name="sum1") - (1/n**2) * (tf.reduce_sum(d))**2 )

# optimizer = tf.train.AdamOptimizer(0.0001).minimize(loss)

## Train
Regularily evaluate the loss on the test data and compute test predictions when done with training.

In [None]:
from sys import stdout

def print_progress(iteration, total, prefix='', suffix='', length=50):
    percent = '{0:.2f}'.format(100 * (iteration / float(total)))
    filledLength = int(round(length * iteration / float(total)))
    bar = '=' * filledLength + '-' * (length - filledLength)
    stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percent, '%', suffix))
    if iteration == total:
        stdout.write('\n')
    stdout.flush()

In [None]:
EPOCHS = 1000

gloss = 1
dloss = 0.99

sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
for i in range(1, EPOCHS + 1):
    batch = np.random.permutation(len(train_t))[:32]
    batch_x, batch_t = train_x[batch], train_t[batch]
    if gloss > dloss:
        sess.run(G_solver, {x: batch_x, training: True})
    else: 
        sess.run(D_solver, {x: batch_x, t: batch_t, training: True})
    dloss, gloss = sess.run([D_loss, G_loss], 
                            {x: test_x, t: test_t, training: False})
    if i % 1 == 0:
        text = 'Losses: {:.5f}, {:.5f}'.format(gloss, dloss)
        print_progress(i, EPOCHS, suffix=text, prefix=i)
        

In [None]:

test_p = sess.run(G_sample, {x: test_x})
print(sess.run([G_loss, D_loss], {x: test_x, t: test_t}))
test_p = np.squeeze(test_p)


# To Do

- loss function
- automatic early stoppage (alle 10 epochs auf test set evaluieren -> achtung overfitting)
- pretraining
- local fine scale network
- KITTI dataset?

## Visualize results on test data

In [None]:
triples = zip(test_x[:20], test_t[:20], test_p[:20])
_, axis = plt.subplots(5, 3, figsize=(10,20))
plt.tight_layout(), plt.gray()
for (rgb, d, p), (ax1, ax2, ax3) in zip(triples, axis):
    ax1.axis('off'), ax2.axis('off'), ax3.axis('off')
    ax1.imshow(rgb)
    ax2.imshow(imresize(d, rgb.shape))
    ax3.imshow(imresize(p, rgb.shape))
plt.show()

## Evaluation