Standardize examples under more TensorFlow idioms (#835)

* update docs * rm '#!/usr/bin/env python' * update tests/ * update notebooks/ and docs/tex/ * remove prettytensor-only solution * update examples/
blei-lab · Jan 29, 2018 · 4d6ed4c · 4d6ed4c
1 parent 562540b
commit 4d6ed4c
Show file tree

Hide file tree

Showing 72 changed files with 2,170 additions and 2,397 deletions.
diff --git a/README.md b/README.md
@@ -15,8 +15,9 @@ It supports __modeling__ with
 
 + Directed graphical models
 + Neural networks (via libraries such as
-    [Keras](http://keras.io) and [TensorFlow
-    Slim](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim))
+    [`tf.layers`](https://www.tensorflow.org/api_docs/python/tf/layers)
+    and
+    [Keras](http://keras.io))
 + Implicit generative models
 + Bayesian nonparametrics and probabilistic programs
 

diff --git a/docs/tex/getting-started.tex b/docs/tex/getting-started.tex
@@ -64,17 +64,17 @@ \subsubsection{Your first Edward program}
 inference. Specify a normal approximation over the weights and biases.
 
 \begin{lstlisting}[language=Python]
-qW_0 = Normal(loc=tf.Variable(tf.zeros([1, 2])),
-              scale=tf.nn.softplus(tf.Variable(tf.zeros([1, 2]))))
-qW_1 = Normal(loc=tf.Variable(tf.zeros([2, 1])),
-              scale=tf.nn.softplus(tf.Variable(tf.zeros([2, 1]))))
-qb_0 = Normal(loc=tf.Variable(tf.zeros(2)),
-              scale=tf.nn.softplus(tf.Variable(tf.zeros(2))))
-qb_1 = Normal(loc=tf.Variable(tf.zeros(1)),
-              scale=tf.nn.softplus(tf.Variable(tf.zeros(1))))
+qW_0 = Normal(loc=tf.get_variable("qW_0/loc", [D, 2]),
+              scale=tf.nn.softplus(tf.get_variable("qW_0/scale", [D, 2])))
+qW_1 = Normal(loc=tf.get_variable("qW_1/loc", [2, 1]),
+              scale=tf.nn.softplus(tf.get_variable("qW_1/scale", [2, 1])))
+qb_0 = Normal(loc=tf.get_variable("qb_0/loc", [2]),
+              scale=tf.nn.softplus(tf.get_variable("qb_0/scale", [2])))
+qb_1 = Normal(loc=tf.get_variable("qb_1/loc", [1]),
+              scale=tf.nn.softplus(tf.get_variable("qb_1/scale", [1])))
 \end{lstlisting}
 
-Defining \texttt{tf.Variable} allows the variational factors'
+Defining \texttt{tf.get_variable} allows the variational factors'
 parameters to vary. They are all initialized at 0. The standard
 deviation parameters are constrained to be greater than zero according
 to a

diff --git a/docs/tex/index.tex b/docs/tex/index.tex
@@ -14,9 +14,9 @@ \subsection{A library for probabilistic modeling, inference, and criticism.}
 \begin{itemize}
 \item Directed graphical models
 \item Neural networks (via libraries such as
-  \href{http://keras.io}{Keras} and
-  \href{https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim}{TensorFlow
-  Slim})
+  \href{\texttt{tf.layers}}{https://www.tensorflow.org/api_docs/python/tf/layers}
+  and
+  \href{http://keras.io}{Keras})
 \item Implicit generative models
 \item Bayesian nonparametrics and probabilistic programs
 \end{itemize}

diff --git a/docs/tex/troubleshooting.tex b/docs/tex/troubleshooting.tex
@@ -47,18 +47,17 @@ \subsubsection{Full Installation}
 minimal effort under a one-line interface. Observations was originally
 developed for Edward and it has since become a standalone library for
 general machine learning.
-  \item Neural networks are supported through four libraries:
+  \item Neural networks are supported through any library operating
+    on TensorFlow. For example:
   \texttt{tf.layers},
   \href{http://keras.io}{Keras} (>=1.0)
 \begin{lstlisting}[language=JSON]
 pip install keras==2.0.4
 \end{lstlisting}
+  and
   \href{https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim}{TensorFlow Slim}
-  (native in TensorFlow), and
-  \href{https://github.com/google/prettytensor}{PrettyTensor} (>=0.7.4)
-\begin{lstlisting}[language=JSON]
-pip install prettytensor
-\end{lstlisting}
+  (native in TensorFlow).
+
 Note that for Keras 2.0.5 and beyond, all neural net layer transformations cannot be directly applied on random variables anymore. For example, if \texttt{x} is a \texttt{ed.RandomVariable} object, one must call \texttt{tf.convert_to_tensor} before applying it to a layer transformation, \texttt{Dense(256)(tf.convert_to_tensor(x))}.
 See \href{https://github.com/fchollet/keras/issues/6979}{here} for more details.
   \item Notebooks require

diff --git a/docs/tex/tutorials/automated-transformations.tex b/docs/tex/tutorials/automated-transformations.tex
@@ -78,8 +78,8 @@ \subsubsection{Automated Transformations in Inference}
 \begin{lstlisting}[language=Python]
 from edward.models import Normal
 
-qx = Normal(loc=tf.Variable(tf.random_normal([])),
-            scale=tf.nn.softplus(tf.Variable(tf.random_normal([]))))
+qx = Normal(loc=tf.get_variable("qx/loc", []),
+            scale=tf.nn.softplus(tf.get_variable("qx/scale", [])))
 
 inference = ed.KLqp({x: qx})
 inference.run()
@@ -140,7 +140,7 @@ \subsubsection{Automated Transformations in Inference}
 \begin{lstlisting}[language=Python]
 from edward.models import Empirical
 
-qx = Empirical(params=tf.Variable(tf.random_normal([1000])))
+qx = Empirical(params=tf.get_variable("qx/params", [1000]))
 
 inference = ed.HMC({x: qx})
 inference.run(step_size=0.8)

diff --git a/docs/tex/tutorials/batch-training.tex b/docs/tex/tutorials/batch-training.tex
@@ -118,10 +118,10 @@ \subsubsection{Inference}
 Define the variational model to be a fully factorized normal across
 the weights.
 \begin{lstlisting}[language=Python]
-qw = Normal(loc=tf.Variable(tf.random_normal([D])),
-            scale=tf.nn.softplus(tf.Variable(tf.random_normal([D]))))
-qb = Normal(loc=tf.Variable(tf.random_normal([1])),
-            scale=tf.nn.softplus(tf.Variable(tf.random_normal([1]))))
+qw = Normal(loc=tf.get_variable("qw/loc", [D]),
+            scale=tf.nn.softplus(tf.get_variable("qw/scale", [D])))
+qb = Normal(loc=tf.get_variable("qb/loc", [1]),
+            scale=tf.nn.softplus(tf.get_variable("qb/scale", [1])))
 \end{lstlisting}
 
 Run variational inference with the Kullback-Leibler divergence.

diff --git a/docs/tex/tutorials/gan.tex b/docs/tex/tutorials/gan.tex
@@ -81,19 +81,18 @@ \subsubsection{Model}
 produce stochasticity in a physical system; it is typically a fixed
 uniform or normal distribution with some latent dimensionality.
 
-In Edward, we build the model as follows, using TensorFlow Slim to
+In Edward, we build the model as follows, using \texttt{tf.layers} to
 specify the neural network. It defines a 2-layer fully connected neural
 network and outputs a vector of length $28\times28$ with values in
 $[0,1]$.
 
 \begin{lstlisting}[language=Python]
 from edward.models import Uniform
-from tensorflow.contrib import slim
 
 def generative_network(eps):
-  h1 = slim.fully_connected(eps, 128, activation_fn=tf.nn.relu)
-  x = slim.fully_connected(h1, 784, activation_fn=tf.sigmoid)
-  return x
+  net = tf.layers.dense(eps, 128, activation=tf.nn.relu)
+  net = tf.layers.dense(net, 784, activation=tf.sigmoid)
+  return net
 
 with tf.variable_scope("Gen"):
   eps = Uniform(tf.zeros([M, d]) - 1.0, tf.ones([M, d]))
@@ -132,9 +131,10 @@ \subsubsection{Inference}
 
 \begin{lstlisting}[language=Python]
 def discriminative_network(x):
-  h1 = slim.fully_connected(x, 128, activation_fn=tf.nn.relu)
-  logit = slim.fully_connected(h1, 1, activation_fn=None)
-  return logit
+  """Outputs probability in logits."""
+  net = tf.layers.dense(x, 128, activation=tf.nn.relu)
+  net = tf.layers.dense(net, 1, activation=None)
+  return net
 \end{lstlisting}
 
 Let $p^*(\mathbf{x})$ represent the true data distribution.

diff --git a/docs/tex/tutorials/latent-space-models.tex b/docs/tex/tutorials/latent-space-models.tex
@@ -105,8 +105,8 @@ \subsubsection{Inference}
 One could instead run variational inference. This requires specifying
 a variational model and instantiating \texttt{KLqp}.
 \begin{lstlisting}[language=Python]
-qz = Normal(loc=tf.Variable(tf.random_normal([N * K])),
-            scale=tf.nn.softplus(tf.Variable(tf.random_normal([N * K]))))
+qz = Normal(loc=tf.get_variable("qz/loc", [N * K]),
+            scale=tf.nn.softplus(tf.get_variable("qz/scale", [N * K])))
 inference = ed.KLqp({z: qz}, data={x: x_train})
 \end{lstlisting}
 See this extended tutorial about

diff --git a/docs/tex/tutorials/linear-mixed-effects-models.tex b/docs/tex/tutorials/linear-mixed-effects-models.tex
@@ -164,12 +164,12 @@ \subsubsection{Model}
 service_ph = tf.placeholder(tf.float32, [None])
 
 # Set up fixed effects.
-mu = tf.Variable(tf.random_normal([]))
-service = tf.Variable(tf.random_normal([]))
+mu = tf.get_variable("mu", [])
+service = tf.get_variable("service", [])
 
-sigma_s = tf.sqrt(tf.exp(tf.Variable(tf.random_normal([]))))
-sigma_d = tf.sqrt(tf.exp(tf.Variable(tf.random_normal([]))))
-sigma_dept = tf.sqrt(tf.exp(tf.Variable(tf.random_normal([]))))
+sigma_s = tf.sqrt(tf.exp(tf.get_variable("sigma_s", [])))
+sigma_d = tf.sqrt(tf.exp(tf.get_variable("sigma_d", [])))
+sigma_dept = tf.sqrt(tf.exp(tf.get_variable("sigma_dept", [])))
 
 # Set up random effects.
 eta_s = Normal(loc=tf.zeros(n_s), scale=sigma_s * tf.ones(n_s))
@@ -194,14 +194,14 @@ \subsubsection{Inference}
 
 \begin{lstlisting}[language=Python]
 q_eta_s = Normal(
-    loc=tf.Variable(tf.random_normal([n_s])),
-    scale=tf.nn.softplus(tf.Variable(tf.random_normal([n_s]))))
+    loc=tf.get_variable("q_eta_s/loc", [n_s]),
+    scale=tf.nn.softplus(tf.get_variable("q_eta_s/scale", [n_s])))
 q_eta_d = Normal(
-    loc=tf.Variable(tf.random_normal([n_d])),
-    scale=tf.nn.softplus(tf.Variable(tf.random_normal([n_d]))))
+    loc=tf.get_variable("q_eta_d/loc", [n_d]),
+    scale=tf.nn.softplus(tf.get_variable("q_eta_d/scale", [n_d])))
 q_eta_dept = Normal(
-    loc=tf.Variable(tf.random_normal([n_dept])),
-    scale=tf.nn.softplus(tf.Variable(tf.random_normal([n_dept]))))
+    loc=tf.get_variable("q_eta_dept/loc", [n_dept]),
+    scale=tf.nn.softplus(tf.get_variable("q_eta_dept/scale", [n_dept])))
 
 latent_vars = {
     eta_s: q_eta_s,

diff --git a/docs/tex/tutorials/mixture-density-network.tex b/docs/tex/tutorials/mixture-density-network.tex
@@ -62,21 +62,20 @@ \subsubsection{Model}
 per-component mean and standard deviation are given by the output of a
 feedforward network.
 
-We leverage TensorFlow Slim to construct neural networks. We specify
+We use \texttt{tf.layers} to construct neural networks. We specify
 a three-layer network with 15 hidden units for each hidden layer.
 
 \begin{lstlisting}[language=Python]
 from edward.models import Categorical, Mixture, Normal
-from tensorflow.contrib import slim
 
 def neural_network(X):
   """loc, scale, logits = NN(x; theta)"""
   # 2 hidden layers with 15 hidden units
-  hidden1 = slim.fully_connected(X, 15)
-  hidden2 = slim.fully_connected(hidden1, 15)
-  locs = slim.fully_connected(hidden2, K, activation_fn=None)
-  scales = slim.fully_connected(hidden2, K, activation_fn=tf.exp)
-  logits = slim.fully_connected(hidden2, K, activation_fn=None)
+  net = tf.layers.dense(X, 15, activation=tf.nn.relu)
+  net = tf.layers.dense(net, 15, activation=tf.nn.relu)
+  locs = tf.layers.dense(net, K, activation=None)
+  scales = tf.layers.dense(net, K, activation=tf.exp)
+  logits = tf.layers.dense(net, K, activation=None)
   return locs, scales, logits
 
 K = 20  # number of mixture components

diff --git a/docs/tex/tutorials/probabilistic-pca.tex b/docs/tex/tutorials/probabilistic-pca.tex
@@ -112,10 +112,10 @@ \subsubsection{Inference}
 measure.
 
 \begin{lstlisting}[language=Python]
-qw = Normal(loc=tf.Variable(tf.random_normal([D, K])),
-            scale=tf.nn.softplus(tf.Variable(tf.random_normal([D, K]))))
-qz = Normal(loc=tf.Variable(tf.random_normal([N, K])),
-            scale=tf.nn.softplus(tf.Variable(tf.random_normal([N, K]))))
+qw = Normal(loc=tf.get_variable("qw/loc", [D, K]),
+            scale=tf.nn.softplus(tf.get_variable("qw/scale", [D, K])))
+qz = Normal(loc=tf.get_variable("qz/loc", [N, K]),
+            scale=tf.nn.softplus(tf.get_variable("qz/scale", [N, K])))
 
 inference = ed.KLqp({w: qw, z: qz}, data={x: x_train})
 inference.run(n_iter=500, n_print=100, n_samples=10)

diff --git a/docs/tex/tutorials/supervised-classification.tex b/docs/tex/tutorials/supervised-classification.tex
@@ -113,8 +113,8 @@ \subsubsection{Inference}
 Perform variational inference.
 Define the variational model to be a fully factorized normal.
 \begin{lstlisting}[language=Python]
-qf = Normal(loc=tf.Variable(tf.random_normal([N])),
-            scale=tf.nn.softplus(tf.Variable(tf.random_normal([N]))))
+qf = Normal(loc=tf.get_variable("qf/loc", [N]),
+            scale=tf.nn.softplus(tf.get_variable("qf/scale", [N])))
 \end{lstlisting}
 
 Run variational inference for \texttt{500} iterations.

diff --git a/docs/tex/tutorials/supervised-regression.tex b/docs/tex/tutorials/supervised-regression.tex
@@ -78,10 +78,10 @@ \subsubsection{Inference}
 Define the variational model to be a fully factorized normal across
 the weights.
 \begin{lstlisting}[language=Python]
-qw = Normal(loc=tf.Variable(tf.random_normal([D])),
-            scale=tf.nn.softplus(tf.Variable(tf.random_normal([D]))))
-qb = Normal(loc=tf.Variable(tf.random_normal([1])),
-            scale=tf.nn.softplus(tf.Variable(tf.random_normal([1]))))
+qw = Normal(loc=tf.get_variable("qw/loc", [D]),
+            scale=tf.nn.softplus(tf.get_variable("qw/scale", [D])))
+qb = Normal(loc=tf.get_variable("qb/loc", [1]),
+            scale=tf.nn.softplus(tf.get_variable("qb/scale", [1])))
 \end{lstlisting}
 
 Run variational inference with the Kullback-Leibler divergence, using

diff --git a/docs/tex/tutorials/tensorboard.tex b/docs/tex/tutorials/tensorboard.tex
@@ -130,14 +130,12 @@ \subsubsection{Inference}
 
 \begin{lstlisting}[language=Python]
 with tf.name_scope("posterior"):
-  qw = Normal(loc=tf.Variable(tf.random_normal([D]), name="qw/loc"),
-              scale=tf.nn.softplus(tf.Variable(tf.random_normal([D]),
-                                               name="qw/unconstrained_scale")),
-              name="qw")
-  qb = Normal(loc=tf.Variable(tf.random_normal([1]), name="qb/loc"),
-              scale=tf.nn.softplus(tf.Variable(tf.random_normal([1]),
-                                               name="qb/unconstrained_scale")),
-              name="qb")
+  qw_loc = tf.get_variable("qw/loc", [D])
+  qw_scale = tf.nn.softplus(tf.get_variable("qw/unconstrained_scale", [D]))
+  qw = Normal(loc=qw_loc, scale=qw_scale, name="qw")
+  qb_loc = tf.get_variable("qb/loc", [1])
+  qb_scale = tf.nn.softplus(tf.get_variable("qb/unconstrained_scale", [1]))
+  qb = Normal(loc=qb_loc, scale=qb_scale, name="qb")
 \end{lstlisting}
 
 Run variational inference with the Kullback-Leibler divergence.

diff --git a/docs/tex/tutorials/unsupervised.tex b/docs/tex/tutorials/unsupervised.tex
@@ -147,10 +147,19 @@ \subsubsection{Inference}
 
 \begin{lstlisting}[language=Python]
 T = 500  # number of MCMC samples
-qpi = Empirical(tf.Variable(tf.ones([T, K]) / K))
-qmu = Empirical(tf.Variable(tf.zeros([T, K, D])))
-qsigmasq = Empirical(tf.Variable(tf.ones([T, K, D])))
-qz = Empirical(tf.Variable(tf.zeros([T, N], dtype=tf.int32)))
+qpi = Empirical(tf.get_variable(
+    "qpi/params", [T, K],
+    initializer=tf.constant_initializer(1.0 / K)))
+qmu = Empirical(tf.get_variable(
+    "qmu/params", [T, K, D],
+    initializer=tf.zeros_initializer()))
+qsigmasq = Empirical(tf.get_variable(
+    "qsigmasq/params", [T, K, D],
+    initializer=tf.ones_initializer()))
+qz = Empirical(tf.get_variable(
+    "qz/params", [T, N],
+    initializer=tf.zeros_initializer(),
+    dtype=tf.int32))
 \end{lstlisting}
 
 Run Gibbs sampling. We write the training loop explicitly, so that we can track