In [None]:
import itertools
from random import shuffle
import numpy as np
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import pylab as pl
from IPython import display
%matplotlib inline

## Variational Inference with the Spherical Fisher Divergence

This notebook explores using the [Spherical Fisher Divergence (SFD)](https://arxiv.org/abs/1510.00861) for posterior variational inference.  

### 0.  Data

Throughout this notebook, we'll use data drawn from a Gaussian Mixture Model (GMM).  Here is a function to draw samples from a GMM...

In [None]:
def shuffle_in_unison_inplace(a, b):
    assert a.shape[0] == b.shape[0]
    p = np.random.permutation(a.shape[0])
    return a[p], b[p]

Let's generate some training data...

In [None]:
N = 300 # number of datapoints 
input_d = 2

# Define mixture model
pi = np.array([.35, .65])
mu_s = [np.array([-5., -5.]), np.array([5., 5.])]
cov_s = [np.array([[1., 0.], [0., 1.]]), np.array([[1., 0.], [0., 1.]])]

# draw_samples
X_train, y_train = draw_samples(pi, mu_s, cov_s, N)

# shuffle 
X_train, y_train = shuffle_in_unison_inplace(X_train,y_train)

### 1.  Logistic Regression
Our first task will be to classify the component from which each datapoint was drawn.  Let's now write our first TensorFlow code.  The first thing to do is make *symbolic variables* for the input features and the labels.  The data are put in what are called *place holders*...

In [None]:
### Make symbolic variables
X = tf.placeholder("float", [None, input_d]) # samples to discriminate
Y = tf.placeholder("float", [None, 1]) # labels

Next we need to create functions to initialize and run the regression model...

In [None]:
def init_regression_model(in_size, std=.1):
    return {'w':tf.Variable(tf.random_normal([in_size, 1], stddev=std)), 'b':tf.Variable(tf.zeros([1,]))}

def linear_regressor(X, params):
    return tf.matmul(X, params['w']) + params['b']

Let's use those functions and create a symbolic cost...

In [None]:
# initalize the model parameters
model_params = init_regression_model(input_d)

# define the model's output
linear_model_out = linear_regressor(X, model_params)

# define the cost function
ce_cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(linear_model_out, Y))

Lastly, let's train the model...

In [None]:
# Set training params
n_epochs = 20
learning_rate = .1

# get the training operator
train_model = tf.train.GradientDescentOptimizer(learning_rate).minimize(ce_cost, var_list=[model_params['w'], model_params['b']])

final_params = None
with tf.Session() as session:
    tf.initialize_all_variables().run()
    
    for epoch_idx in xrange(n_epochs):
        
        # perform update
        _, loss = session.run([train_model, ce_cost], feed_dict={X: X_train, Y: y_train})
        print "Epoch %d.  Cross-entropy error: %.3f" %(epoch_idx, loss)
        
    # save the final params
    # NOTICE: this needs to be done within the session!
    final_params = {'w':session.run(model_params['w']), 'b':session.run(model_params['b'])}
    
print final_params