In [1]:
import tensorflow as tf
import numpy as np
import math
import tensorflow.examples.tutorials.mnist.input_data as input_data
import matplotlib.pyplot as plt

In [2]:
def ll(Y, output):
    return np.sum(np.square(output - Y))

In [3]:
def lrelu(x, leak=0.2, name="lrelu"):
    """Leaky rectifier.
    Parameters
    ----------
    x : Tensor
        The tensor to apply the nonlinearity to.
    leak : float, optional
        Leakage parameter.
    name : str, optional
        Variable scope to use.
    Returns
    -------
    x : Tensor
        Output of the nonlinearity.
    """
    with tf.variable_scope(name):
        f1 = 0.5 * (1 + leak)
        f2 = 0.5 * (1 - leak)
        return f1 * x + f2 * abs(x)

In [4]:
"""Tutorial on how to create a convolutional autoencoder w/ Tensorflow.

Parag K. Mital, Jan 2016
"""

# %%
def autoencoder(input_shape=[None, 784],
                n_filters=[1, 4, 8, 16],
                filter_sizes=[3, 3, 3, 3],
                corruption=False):
    """Build a deep denoising autoencoder w/ tied weights.

    Parameters
    ----------
    input_shape : list, optional
        Description
    n_filters : list, optional
        Description
    filter_sizes : list, optional
        Description

    Returns
    -------
    x : Tensor
        Input placeholder to the network
    z : Tensor
        Inner-most latent representation
    y : Tensor
        Output reconstruction of the input
    cost : Tensor
        Overall cost to use for training

    Raises
    ------
    ValueError
        Description
    """
    # %%
    # input to the network
    x = tf.placeholder(
        tf.float32, input_shape, name='x')
    target = tf.placeholder(
        tf.float32, input_shape, name='target')


    # %%
    # ensure 2-d is converted to square tensor.
    if len(x.get_shape()) == 2:
        x_dim = np.sqrt(x.get_shape().as_list()[1])
        if x_dim != int(x_dim):
            raise ValueError('Unsupported input dimensions')
        x_dim = int(x_dim)
        x_tensor = tf.reshape(
            x, [-1, x_dim, x_dim, n_filters[0]])
        y_tensor = tf.reshape(
            target, [-1, x_dim, x_dim, n_filters[0]])
    elif len(x.get_shape()) == 4:
        x_tensor = x
    else:
        raise ValueError('Unsupported input dimensions')
    current_input = x_tensor

    # %%
    # Optionally apply denoising autoencoder
    #if corruption:
    #    current_input = corrupt(current_input)

    # %%
    # Build the encoder
    encoder = []
    shapes = []
    for layer_i, n_output in enumerate(n_filters[1:]):
        n_input = current_input.get_shape().as_list()[3]
        shapes.append(current_input.get_shape().as_list())
        
        W_means = tf.Variable(tf.zeros([
                filter_sizes[layer_i],
                filter_sizes[layer_i],
                n_input, n_output]))
        W_logdevs = tf.Variable(tf.zeros([
                filter_sizes[layer_i],
                filter_sizes[layer_i],
                n_input, n_output]))

        b_means = tf.Variable(tf.zeros([n_output]))
        b_logdevs = tf.Variable(tf.zeros([n_output]))
        
        z_w = tf.random_normal(shape=W_means.shape)
        z_b = tf.random_normal(shape=b_means.shape)
        
        W = tf.add(tf.multiply(z_w, tf.exp(W_logdevs / 2)), W_means)
        b = tf.add(tf.multiply(z_b, tf.exp(b_logdevs / 2)), b_means)
        
        
        encoder.append((W_means, W_logdevs))
        output = lrelu(
            tf.add(tf.nn.conv2d(
                current_input, W, strides=[1, 2, 2, 1], padding='SAME'), b))
        current_input = output
        print(W.shape)
        print(b.shape)

    # %%
    # store the latent representation
    z = current_input
    print('Shape z: ', z.shape)
    encoder.reverse()
    shapes.reverse()

    # %%
    # Build the decoder using the same weights
    for layer_i, shape in enumerate(shapes):
        W_means, W_logdevs = encoder[layer_i]
        b_means = tf.Variable(tf.zeros([W_means.get_shape().as_list()[2]]))
        b_logdevs = tf.Variable(tf.zeros([W_means.get_shape().as_list()[2]]))
        
        z_w = tf.random_normal(shape=W_means.shape)
        z_b = tf.random_normal(shape=b_means.shape)
        
        W = tf.add(tf.multiply(z_w, tf.exp(W_logdevs / 2)), W_means)
        b = tf.add(tf.multiply(z_b, tf.exp(b_logdevs / 2)), b_means)
        output = tf.add(
            tf.nn.conv2d_transpose(
                current_input, W,
                tf.stack([tf.shape(x)[0], shape[1], shape[2], shape[3]]),
                strides=[1, 2, 2, 1], padding='SAME'), b)
        
        if layer_i < len(shapes) - 1:
            output = lrelu(output)
            
        current_input = output
        print(W.shape)
        print(b.shape)
        
    # gaussian likelihood

    output = tf.contrib.layers.flatten(output)
    
    """
    W_means = tf.Variable(tf.zeros([784,784]))
    W_logdevs = tf.Variable(tf.zeros([784,784]))
    b_means = tf.Variable(tf.zeros([784]))
    b_logdevs = tf.Variable(tf.zeros([784]))
    
    z_w = tf.random_normal(shape=W_means.shape)
    z_b = tf.random_normal(shape=b_means.shape)

    W = tf.add(tf.multiply(z_w, tf.exp(W_logdevs / 2)), W_means)
    b = tf.add(tf.multiply(z_b, tf.exp(b_logdevs / 2)), b_means)
    
    output = tf.matmul(output, W) + b
    """


    # %%
    # now have the reconstruction through the network
    y = current_input
    # y = reconstruction
    # cost function measures pixel-wise difference
    # cost = tf.reduce_sum(tf.square(y - target))
    cost = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output))
    """
    cost = -tf.reduce_sum(tf.reduce_sum(
            y_tensor * tf.log(y + 1e-10) + \
            (1 - y_tensor) * tf.log(1 - y + 1e-10),
            reduction_indices=[1]
        ))
    """

    # %%
    return {'x': x, 'target': target, 'z': z, 'y': y, 'cost': cost}

def debug():
    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
    ae = autoencoder()
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    batch_xs, _ = mnist.train.next_batch(1)
    sess.run(ae['cost'], feed_dict={ae['x']: batch_xs})

# %%
def test_mnist():
    """Test the convolutional autoencder using MNIST."""

    # %%
    # load MNIST as before
    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
    mean_img = np.mean(mnist.train.images, axis=0)
    ae = autoencoder()

    # %%
    learning_rate = 0.01
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(ae['cost'])

    # %%
    # We create a session to use the graph
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    # %%
    # Fit all training data
    batch_size = 100
    n_epochs = 100
    for epoch_i in range(n_epochs):
        for batch_i in range(mnist.train.num_examples // batch_size):
            batch_xs, _ = mnist.train.next_batch(batch_size)
            train = np.array([img - mean_img for img in batch_xs])
            sess.run(optimizer, feed_dict={ae['x']: batch_xs, ae['target']: batch_xs})
        print(epoch_i, sess.run(ae['cost'], feed_dict={ae['x']: batch_xs, ae['target']: batch_xs}))

    # %%
    # Plot example reconstructions
    n_examples = 10
    test_xs, _ = mnist.test.next_batch(n_examples)
    test_xs_norm = np.array([img - mean_img for img in test_xs])
    recon = sess.run(ae['y'], feed_dict={ae['x']: test_xs, ae['target']: test_xs})
    
    #print(recon.shape)
    fig, axs = plt.subplots(2, n_examples, figsize=(10, 2))
    for example_i in range(n_examples):
        error = ll(test_xs[example_i, :], np.reshape(recon[example_i, ...], (784,))) # + mean_img)
        print(error)
        
        axs[0][example_i].imshow(
            np.reshape(test_xs[example_i, :], (28, 28)))
        axs[1][example_i].imshow(
            np.reshape(
                np.reshape(recon[example_i, ...], (784,)), # + mean_img,
                (28, 28)))
    plt.show()


# %%
if __name__ == '__main__':
    test_mnist()

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
(3, 3, 1, 4)
(4,)
(3, 3, 4, 8)
(8,)
(3, 3, 8, 16)
(16,)
Shape z:  (?, 4, 4, 16)
(3, 3, 8, 16)
(8,)
(3, 3, 4, 8)
(4,)
(3, 3, 1, 4)
(1,)
0 960254.0


KeyboardInterrupt: 

With additional FC and sigmoid: Total Loss: 5115, img reconstr error: 70

Without FC: Loss: 4900, large img reconstr error

### Conclusion: No sigmoid after deconv layer???

In [None]:
171000