# Setup

I would recommend installing Tensorflow inside a conda environment. If you don't already have conda or Anaconda installed, you can install it quickly using `miniconda`: http://conda.pydata.org/miniconda.html

After you have conda installed, run the following commands:

```bash
# create and activate the conda environment
conda create -n tensorflow python=3 numpy scipy matplotlib ipython
source activate tensorflow

# install the IPython kernel for the Jupyter notebook
ipython kernel install --name=tensorflow --display-name=tensorflow

# install tensorflow
conda install -c conda-forge tensorflow
```

You should now be able to open this notebook in Jupyter. Make sure you are running the right kernel by selecting "Kernel > Change kernel > tensorflow" from the menubar.

Note that this will not include GPU support, but you don't need that anyway unless you're running a machine that has a good GPU. For our lab, that will mean `theano`, though it doesn't have a GPU yet.

# TensorFlow basics

There are three important things to know about in TensorFlow:

* **Tensors** -- these are like
* **Ops**
* **Variables**

# Constructing the graph

In [1]:
import tensorflow as tf
import numpy as np
import tfutils
import scipy.stats

%load_ext autoreload
%autoreload 2

In [2]:
def gaussian_logpdf(X, mu, sigma, name=None):
    """Construct a tensorflow op for a Gaussian probability."""
    with tf.op_scope([X, mu, sigma], name, "gaussian_logpdf"):
        Z = -0.5 * tf.log(2 * np.pi * sigma ** 2)
        e = -(X - mu) ** 2 / (2 * sigma ** 2)
        logpdf = Z + e
        return logpdf

In [3]:
np.random.seed(123)
true_mu = np.random.uniform(0, 10)
true_sigma = np.random.uniform(0, 10)
initial_mu = np.random.rand()
initial_sigma = np.random.rand()
print("True:    mu={:.4f}, sigma={:.4f}".format(true_mu, true_sigma))
print("Initial: mu={:.4f}, sigma={:.4f}".format(initial_mu, initial_sigma))

True:    mu=6.9647, sigma=2.8614
Initial: mu=0.2269, sigma=0.5513


In [4]:
# create a placeholder for the input to our Gaussian distribution. a
# placeholder is a special type of Tensor that is only given a value
# at runtime by the user (i.e., it is not the product of an op).
X = tf.placeholder(tf.float32, shape=(None, 1), name="X")

# create trainable variables for the parameters of the Gaussian distribution
mu = tf.Variable(initial_value=initial_mu, name="mu")
sigma = tf.Variable(initial_value=initial_sigma, name="sigma")

# compute the log probability of X given mu and sigma
logp = tf.reduce_sum(gaussian_logpdf(X, mu, sigma), name="logp")

# when we run this op, it will actually assign initial values to all the variables
init = tf.initialize_all_variables()

In [5]:
tfutils.show_graph(tf.get_default_graph())

# Doing a forward pass

In [6]:
def sample_X():
    X_vals = np.random.normal(true_mu, true_sigma, (1000, 1))
    return X_vals

X_vals = sample_X()
X_vals[:10]

array([[  7.88636402],
       [  6.81727939],
       [  6.38039257],
       [ 12.6283863 ],
       [  2.33123742],
       [  3.77720148],
       [  5.68438796],
       [ 11.73864513],
       [  6.55444681],
       [  5.19294313]])

In [7]:
with tf.Session() as sess:
    sess.run(init)
    logp_val = sess.run(logp, feed_dict={X: X_vals})

logp_val

-84496.938

In [8]:
# verify that we are getting the correct values from TensorFlow
# by comparing them to scipy
scipy.stats.norm.logpdf(X_vals, initial_mu, initial_sigma)[:10]

array([[ -96.83364489],
       [ -71.77278977],
       [ -62.61386693],
       [-253.32413293],
       [  -7.60835477],
       [ -21.05892151],
       [ -49.31987532],
       [-218.32361547],
       [ -66.18749611],
       [ -40.89304153]])

# Computing gradients

In [9]:
# compute gradients of the negative log probability with respect to the parameters
grads = tf.gradients(-logp, [mu, sigma])
grads

[<tf.Tensor 'gradients/gaussian_logpdf/sub_grad/Reshape_1:0' shape=() dtype=float32>,
 <tf.Tensor 'gradients/AddN:0' shape=() dtype=float32>]

In [10]:
tfutils.show_graph(tf.get_default_graph())

In [11]:
with tf.Session() as sess:
    sess.run(init)
    grad_vals = sess.run(grads, feed_dict={X: X_vals})

print("True:      mu={:.4f}, sigma={:.4f}".format(true_mu, true_sigma))
print("Initial:   mu={:.4f}, sigma={:.4f}".format(initial_mu, initial_sigma))
print("Gradients: mu={:.4f}, sigma={:.4f}".format(*grad_vals))

True:      mu=6.9647, sigma=2.8614
Initial:   mu=0.2269, sigma=0.5513
Gradients: mu=-21631.4395, sigma=-303541.5312


# Optimization

In [12]:
optimizer = tf.train.AdamOptimizer(0.1)
train = optimizer.apply_gradients(zip(*[grads, [mu, sigma]]))

# we need to recreate the initializer, because the Adam optimizer creates
# additional variables
init = tf.initialize_all_variables()

In [13]:
tfutils.show_graph(tf.get_default_graph())

In [14]:
with tf.Session() as sess:
    sess.run(init)
    for i in range(1000):
        ops_to_run = {
            "logp": logp,
            "mu": mu,
            "sigma": sigma,
            "train": train
        }
        results = sess.run(ops_to_run, feed_dict={X: sample_X()})
        if (i % 100) == 0:
            print("log p(X | mu={mu:.4f}, sigma={sigma:.4f}) = {logp:.4f}".format(**results))

log p(X | mu=0.3269, sigma=0.6513) = -89877.9688
log p(X | mu=3.6832, sigma=2.3319) = -3487.7825
log p(X | mu=5.2226, sigma=2.5283) = -2737.7327
log p(X | mu=6.1461, sigma=2.6106) = -2512.8628
log p(X | mu=6.6175, sigma=2.6566) = -2490.7810
log p(X | mu=6.8386, sigma=2.6906) = -2478.1865
log p(X | mu=6.9270, sigma=2.7215) = -2492.9719
log p(X | mu=6.9440, sigma=2.7484) = -2461.8560
log p(X | mu=6.9570, sigma=2.7709) = -2472.1116
log p(X | mu=6.9772, sigma=2.7900) = -2485.7253


In [15]:
with tf.Session() as sess:
    sess.run(init)
    logp_val = sess.run(logp, feed_dict={X: sample_X(), mu: true_mu, sigma: true_sigma})
    print("log p(X | mu={mu:.4f}, sigma={sigma:.4f}) = {logp:.4f}".format(
            mu=true_mu, sigma=true_sigma, logp=logp_val))

log p(X | mu=6.9647, sigma=2.8614) = -2449.9668
