In [1]:
%config IPCompleter.greedy=True

In [1]:
#adhere to https://github.com/tensorflow/tensorflow/blob/r0.7/tensorflow/examples/tutorials/mnist/input_data.py

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf
import weapon_data as weapons


  from ._conv import register_converters as _register_converters


# Variational Autoencoder

In [2]:
weapon_data = weapons.DataSet(seed=19071991) 

num_samples = weapon_data.num_examples
input_dimension = weapon_data.num_features

148
24


In [9]:
# Functions to get variables
def weights(shape):
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)

def bias(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)

### Building the networks
#### The encoder network $q_\phi(z|x)$

The decoder network takes the input image and calculates the mean $\mu =$ `z_mu` and the log variance $\log\sigma^2 =$ `z_ls2` of the Gaussian, thus producing the latent variable z.

In [10]:
n_z = 2 #Dimension of the latent space
# Input
x = tf.placeholder(tf.float32, shape=[None, input_dimension]) #Batchsize x Number of Pixels
n_hidden_1 = 5
n_hidden_2 = 6 #one more to spot errors

# First hidden layer
W_fc1 = weights([input_dimension, n_hidden_1])
b_fc1 = bias([n_hidden_1])
h_1   = tf.nn.softplus(tf.matmul(x, W_fc1) + b_fc1)

# Second hidden layer
W_fc2 = weights([n_hidden_1, n_hidden_2]) 
b_fc2 = bias([n_hidden_2])
h_2   = tf.nn.softplus(tf.matmul(h_1, W_fc2) + b_fc2)


# Parameters for the Gaussian
z_mu = tf.add(tf.matmul(h_2, weights([n_hidden_2, n_z])), bias([n_z]))
# A little trick:
#  sigma is always > 0.
#  We don't want to enforce that the network produces only positive numbers, therefore we let 
#  the network model the parameter log(\sigma^2) $\in [\infty, \infty]$
z_ls2 = tf.add(tf.matmul(h_2, weights([n_hidden_2, n_z])), bias([n_z])) 

#### The decoder network $p_\theta(x|z)$ a.k.a. generator network

Samples from a Gaussian using the given mean and the std. The sampling is done by addding a random number ensuring that backpropagation works fine.

In [11]:
batch_size = 2 #We have to define the batch size with the current version of TensorFlow
eps = tf.random_normal((batch_size, n_z), 0, 1, dtype=tf.float32) # Adding a random number
z = tf.add(z_mu, tf.multiply(tf.sqrt(tf.exp(z_ls2)), eps))  # The sampled z

In [12]:
n_hidden_1 = 5
n_hidden_2 = 6

W_fc1_g = weights([n_z, n_hidden_1])
b_fc1_g = bias([n_hidden_1])
h_1_g   = tf.nn.softplus(tf.matmul(z, W_fc1_g) + b_fc1_g)

W_fc2_g = weights([n_hidden_1, n_hidden_2])
b_fc2_g = bias([n_hidden_2])
h_2_g   = tf.nn.softplus(tf.matmul(h_1_g, W_fc2_g) + b_fc2_g)

#x_mu = tf.add(tf.matmul(h_2_g,  weights([n_hidden_2, input_dimension])), bias([input_dimension]))
#x_ls2 = tf.add(tf.matmul(h_2_g,  weights([n_hidden_2, input_dimension])), bias([input_dimension]))
x_reconstr_mean = (tf.add(tf.matmul(h_2_g,  weights([n_hidden_2, input_dimension])), bias([input_dimension])))

#### Defining the loss function

##### The reconstruction error
We assume that the data x, is Gaussian distributed with diagnoal covariance matrix $\Sigma_{ij} = \delta_{i,j} \sigma_i^2$. The parameters of that Gaussian are determined by the encoder network. The reconstruction error for the $i-th$ example in the min-batch is given by 
$$
    \mathbb{E}_{q(z|x^{(i)})}\left( \log\left(p(x^{(i)}|z)\right)\right) 
$$
we approximate the expectation with samplinging from the distribution (eaven with $L=1$)
$$
    \mathbb{E}_{q(z|x^{(i)})}\left( \log\left(p(x^{(i)}|z)\right)\right) \approx 
    \frac{1}{L} \sum_{i=1}^L \log\left(p(x^{(i)}|z^{(i,l)})\right) \approx \log\left(p(x^{(i)}|z^{(i,l)})\right)
$$

For the simple $J-dimensional$ Gaussian, we obtain the following reconstruction error (neglegting a constant term)
$$
    -\log\left(p(x^{(i)}|z^{(i)})\right) = \sum_{j=1}^D \frac{1}{2} \log(\sigma_{x_j}^2) + \frac{(x^{(i)}_j - \mu_{x_j})^2}{2 \sigma_{x_j}^2}
$$

##### The regularisation term

$$
    -D_{\tt{KL}} \left( q(z|x^{(i)}) || p(z) \right) = \frac{1}{2} \sum_{j=1}^{J} \left(1 + \log(\sigma_{z_j}^{(i)^2}) - \mu_{z_j}^{(i)^2} - \sigma_{z_j}^{(i)^2} \right)
$$

In [13]:
def kullbackLeibler(mu, log_sigma):
    """(Gaussian) Kullback-Leibler divergence KL(q||p), per training example"""
    # (tf.Tensor, tf.Tensor) -> tf.Tensor
    with tf.name_scope("KL_divergence"):
        # = -0.5 * (1 + log(sigma**2) - mu**2 - sigma**2)
        return -0.5 * tf.reduce_sum(1 + 2 * log_sigma - mu**2 -
                                    tf.exp(2 * log_sigma), 1)

def crossEntropy(obs, actual, offset=1e-7):
    """Binary cross-entropy, per training example"""
    # (tf.Tensor, tf.Tensor, float) -> tf.Tensor
    with tf.name_scope("cross_entropy"):
        # bound by clipping to avoid nan
        obs_ = tf.clip_by_value(obs, offset, 1 - offset)
    return -tf.reduce_sum(actual * tf.log(obs_) + (1 - actual) * tf.log(1 - obs_), 1)

def l1_loss(obs, actual):
    """L1 loss (a.k.a. LAD), per training example"""
    # (tf.Tensor, tf.Tensor, float) -> tf.Tensor
    with tf.name_scope("l1_loss"):
        return tf.reduce_sum(tf.abs(obs - actual) , 1)

def l2_loss(obs, actual):
    """L2 loss (a.k.a. Euclidean / LSE), per training example"""
    # (tf.Tensor, tf.Tensor, float) -> tf.Tensor
    with tf.name_scope("l2_loss"):
        return tf.reduce_sum(tf.square(obs - actual), 1)

In [14]:
#reconstr_loss = tf.reduce_sum(0.5 * x_ls2 + (tf.square(x-x_mu)/(2.0 * tf.exp(x_ls2))), 1) #varies between implementations
#reconstr_loss = -tf.reduce_sum(x * tf.log(1e-10 + x_reconstr_mean) + (1-x) * tf.log(1e-10 + 1 - x_reconstr_mean), 1)
#l2 reconstr_loss = tf.reduce_sum(tf.square(x - x_reconstr_mean), 1)
reconstr_loss = l2_loss(x_reconstr_mean, x)
#latent_loss = kullbackLeibler(z_mu, tf.exp(z_ls2))
latent_loss = -0.5 * tf.reduce_sum(1 + z_ls2 - tf.square(z_mu) - tf.exp(z_ls2), 1)
cost = tf.reduce_mean(reconstr_loss + latent_loss)   # average over batch

# Use ADAM optimizer
optimizer =  tf.train.AdamOptimizer(learning_rate=0.01).minimize(cost)

In [23]:
# This takes quite some time to converge. I am courious what would happen 
# if a proper optimizer is finally implemented in TensorFlow

runs = int(148/batch_size)*10#2000 #Set to 0, for no training
init = tf.global_variables_initializer()
saver = tf.train.Saver()

while False:
    sess = tf.Session()  
    sess.run(init)
    batch_xs = next_batch(batch_size)
    dd = sess.run([cost], feed_dict={x: batch_xs})
    print('Test run of cost operation in while loop results in {}'.format(dd))
    if not np.isnan(dd) and not np.isinf(dd):
        break
    else:
        sess.close()
        
sess = tf.Session()
sess.run(init)
batch_xs = next_batch(batch_size)
dd = sess.run([cost], feed_dict={x: batch_xs})
print('Test run after starting {}'.format(dd))

for epoch in range(runs):
    avg_cost = 0.
    batch_xs = next_batch(batch_size)
    _,d, z_mean_val, z_log_sigma_sq_val = sess.run((optimizer, cost, z_mu, z_ls2), feed_dict={x: batch_xs})
    avg_cost += d / batch_size

    # Display logs per epoch step
    if epoch % 10 == 0:
        save_path = saver.save(sess, "weapon_data_model/vae.ckpt") #Saves the weights (not the graph)
        #print("Model saved in file: {}".format(save_path))
        print("Epoch:"+ '%04d' % (epoch+1) + ", cost=" + "{:.9f}".format(avg_cost))
        #print ("{} {} mean sigma2 {}".format(z_mean_val.min(), z_mean_val.max(), np.mean(np.exp(z_log_sigma_sq_val))))

sess.close()

Test run after starting [530420.9]
Epoch:0001, cost=331802.500000000
Epoch:0011, cost=108173.007812500
Epoch:0021, cost=331982.937500000
Epoch:0031, cost=202402.531250000
Epoch:0041, cost=23465.378906250
Epoch:0051, cost=37647.515625000
Epoch:0061, cost=10498.804687500
Epoch:0071, cost=63332.656250000
Epoch:0081, cost=21577.066406250
Epoch:0091, cost=15281.964843750
Epoch:0101, cost=44165.578125000
Epoch:0111, cost=8360.294921875
Epoch:0121, cost=10160.234375000
Epoch:0131, cost=15307.389648438
Epoch:0141, cost=4382.636718750
Epoch:0151, cost=14944.000000000
Epoch:0161, cost=97967.117187500
Epoch:0171, cost=32260.865234375
Epoch:0181, cost=31626.935546875
Epoch:0191, cost=3921.431884766
Epoch:0201, cost=28113.908203125
Epoch:0211, cost=59486.656250000
Epoch:0221, cost=40041.589843750
Epoch:0231, cost=101753.984375000
Epoch:0241, cost=4276.640625000
Epoch:0251, cost=55199.187500000
Epoch:0261, cost=7904.743164062
Epoch:0271, cost=18384.005859375
Epoch:0281, cost=149949.187500000
Epoch:0

In [20]:
saver = tf.train.Saver()
with tf.Session() as sess:
    saver.restore(sess, "weapon_data_model/vae.ckpt")
    x_sample = next_batch(batch_size)
    x_reconstr_mean
    
    var = (x_reconstr_mean, z, z_mu, z_ls2, cost, reconstr_loss, latent_loss)
    out = sess.run(var, feed_dict={x: x_sample})
    x_reconstr_mean, z_vals, z_mu_val,z_ls2_val, cost_val, reconstr_loss_val,latent_loss_val = out
    
    print(x_reconstr_mean)
    
    #var = (x_mu, x_ls2, z, z_mu, z_ls2, cost, reconstr_loss, latent_loss)
    #out = sess.run(var, feed_dict={x: x_sample})
    #x_mu_val, x_ls2_val, z_vals, z_mu_val,z_ls2_val, cost_val, reconstr_loss_val,latent_loss_val = out
    
    #print(x_mu_val, x_ls2_val)

INFO:tensorflow:Restoring parameters from weapon_data_model/vae.ckpt


TypeError: Fetch argument array([[ 6.7409782e+01,  2.8279922e+01,  5.9980602e+00,  1.0671598e+02,
        -1.9471575e+00,  1.0835794e+00,  1.5019717e+00,  3.2939026e+00,
         1.4040254e+01,  3.4450936e-01,  1.6532490e+00, -8.0291986e-01,
         6.1896240e+02,  2.7491537e+01,  1.0997212e+01,  4.1996454e+02,
         7.6064050e-01, -1.4602741e+00,  2.3844433e-01,  3.1823854e+00,
         2.9108608e+00,  2.2712715e+00,  1.6112109e+00, -4.3679368e-01],
       [ 4.7143547e+01,  1.9816837e+01,  4.2732105e+00,  7.4646439e+01,
        -1.3100367e+00,  8.5092115e-01,  1.0970207e+00,  2.3159807e+00,
         9.8729258e+00,  2.6554835e-01,  1.1988659e+00, -5.3215116e-01,
         4.3251254e+02,  1.9304661e+01,  7.6553459e+00,  2.9352896e+02,
         5.7675612e-01, -1.0075586e+00,  2.2959353e-01,  2.2375538e+00,
         2.0603664e+00,  1.5944074e+00,  1.1246222e+00, -2.4407931e-01]],
      dtype=float32) has invalid type <class 'numpy.ndarray'>, must be a string or Tensor. (Can not convert a ndarray into a Tensor or Operation.)