In [16]:
import tensorflow as tf
from edward.models import Normal, Empirical, Categorical, Multinomial
import edward as ed
import numpy as np

# Defining the layers.

In Edward you can define weight variables with different kinds of priors. Here is my simple implementation of conv2d tensor with Edward weights (0 mean 1 variance Gaussian prior). It uses **tf.nn.conv2d** layers.

In [2]:
def edward_conv2d(_input, _filter, _kernel, _stride, name, reuse=None):
    with tf.variable_scope(name, reuse=reuse):
        #############
        # Variables #
        #############
        #Weights 
        w = Normal(loc=tf.zeros([_kernel[0],_kernel[1], _input.shape[-1]._value,_filter]), scale=tf.ones([_kernel[0],_kernel[1],1,_filter]))
        #Bias
        b = tf.get_variable("b", shape=(_filter,))
        
        #################
        # Actual Conv2d #
        #################
        #Conv2d
        op = tf.nn.conv2d(_input, w, strides=[1,_stride,_stride,1], padding='SAME')
        _output = tf.nn.bias_add(op, b, data_format='NHWC')
        _output = tf.nn.relu(_output)
        return _output, w, b

In [3]:
def edward_dense(_input, _output, name, reuse=None):
    with tf.variable_scope(name, reuse=reuse):
        #############
        # Variables #
        #############
        #Weights 
        w = Normal(loc=tf.zeros([_input.shape[-1]._value, _output]), scale=tf.ones([_input.shape[-1]._value, _output]))
        #Bias
        b = tf.get_variable("b", shape=(_output,))
        
        return tf.nn.relu(tf.matmul(_input, w)+b), w, b

# Defining the model.

The model is defined as 2 layer convolution network with 16 and 32 kernels of size 3 by 3. I have a 2 layer dense network (64 and 10 hiddne layers, respectively) on top of the convolution structure. 

In [4]:
# Including weights to tf.nn.conv2d layer.
# Simple convolution network.

data = tf.placeholder(tf.float32, [None, 28,28,1])
conv1, w_conv1, b = edward_conv2d(data, 16, [3,3], 2, "conv1")
conv2, w_conv2, b = edward_conv2d(conv1, 32, [3,3], 2, "conv2")
flat = tf.contrib.layers.flatten(conv2)
dense1, w_dense1, b = edward_dense(flat, 64, "dense1")
dense2, w_dense2, b = edward_dense(dense1, 10, "dense2")
target = Multinomial(total_count=4., logits=dense2)

# Loading the data

We use first 4000 data points from MNIST.  
This is the same setup as Remy's implementation using NUTs.

In [6]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [7]:
X = np.expand_dims(mnist.train._images.reshape(55000,28,28)[:4000], 3)
Y = mnist.train._labels[:4000]

# Posterior Inference with SGHMC.

I played around with different stepsize but it seems that everything goes to **Nan**.  
I definitely need to explore this more.

In [8]:
T = 10000       # Number of samples.
nburn = 1000    # Number of burn-in samples.
stride = 10     # Frequency with which to plot samples.
stepsize = 1e-4 # Stepsize for sgHMC inference.

In [9]:
qw_conv1 = Empirical(params=tf.Variable(tf.random_normal([T]+[s._value for s in w_conv1.shape])))
qw_conv2 = Empirical(params=tf.Variable(tf.random_normal([T]+[s._value for s in w_conv2.shape])))
qw_dense1 = Empirical(params=tf.Variable(tf.random_normal([T]+[s._value for s in w_dense1.shape])))
qw_dense2 = Empirical(params=tf.Variable(tf.random_normal([T]+[s._value for s in w_dense2.shape])))

In [10]:
inference = ed.SGHMC({w_conv1: qw_conv1, w_conv2: qw_conv2, w_dense1: qw_dense1, w_dense2: qw_dense2},\
                     data={data: X, target: Y})
inference.run(step_size=stepsize)

10000/10000 [100%] ██████████████████████████████ Elapsed: 239s | Acceptance Rate: 1.000


In [15]:
qw_dense1.eval()

array([[ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       ..., 
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan]], dtype=float32)

I cannot seem to resolve this NaN issue.