From 054c520974cbf6d642f8cd78ab65c7b0823d15e4 Mon Sep 17 00:00:00 2001
From: Dustin Tran <dustinviettran@gmail.com>
Date: Sun, 28 Jan 2018 15:10:06 -0800
Subject: [PATCH 1/3] update docs

---
 README.md                    |  5 +++--
 docs/tex/index.tex           |  6 +++---
 docs/tex/troubleshooting.tex | 11 +++++------
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 6291d733a..33dd43f77 100644
--- a/README.md
+++ b/README.md
@@ -15,8 +15,9 @@ It supports __modeling__ with
 
 + Directed graphical models
 + Neural networks (via libraries such as
-    [Keras](http://keras.io) and [TensorFlow
-    Slim](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim))
+    [`tf.layers`](https://www.tensorflow.org/api_docs/python/tf/layers)
+    and
+    [Keras](http://keras.io))
 + Implicit generative models
 + Bayesian nonparametrics and probabilistic programs
 
diff --git a/docs/tex/index.tex b/docs/tex/index.tex
index be29c113a..006ab9925 100644
--- a/docs/tex/index.tex
+++ b/docs/tex/index.tex
@@ -14,9 +14,9 @@ \subsection{A library for probabilistic modeling, inference, and criticism.}
 \begin{itemize}
 \item Directed graphical models
 \item Neural networks (via libraries such as
-  \href{http://keras.io}{Keras} and
-  \href{https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim}{TensorFlow
-  Slim})
+  \href{\texttt{tf.layers}}{https://www.tensorflow.org/api_docs/python/tf/layers}
+  and
+  \href{http://keras.io}{Keras})
 \item Implicit generative models
 \item Bayesian nonparametrics and probabilistic programs
 \end{itemize}
diff --git a/docs/tex/troubleshooting.tex b/docs/tex/troubleshooting.tex
index 678b2acb5..3de76f97f 100644
--- a/docs/tex/troubleshooting.tex
+++ b/docs/tex/troubleshooting.tex
@@ -47,18 +47,17 @@ \subsubsection{Full Installation}
 minimal effort under a one-line interface. Observations was originally
 developed for Edward and it has since become a standalone library for
 general machine learning.
-  \item Neural networks are supported through four libraries:
+  \item Neural networks are supported through any library operating
+    on TensorFlow. For example:
   \texttt{tf.layers},
   \href{http://keras.io}{Keras} (>=1.0)
 \begin{lstlisting}[language=JSON]
 pip install keras==2.0.4
 \end{lstlisting}
+  and
   \href{https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim}{TensorFlow Slim}
-  (native in TensorFlow), and
-  \href{https://github.com/google/prettytensor}{PrettyTensor} (>=0.7.4)
-\begin{lstlisting}[language=JSON]
-pip install prettytensor
-\end{lstlisting}
+  (native in TensorFlow).
+
 Note that for Keras 2.0.5 and beyond, all neural net layer transformations cannot be directly applied on random variables anymore. For example, if \texttt{x} is a \texttt{ed.RandomVariable} object, one must call \texttt{tf.convert_to_tensor} before applying it to a layer transformation, \texttt{Dense(256)(tf.convert_to_tensor(x))}.
 See \href{https://github.com/fchollet/keras/issues/6979}{here} for more details.
   \item Notebooks require

From bbb450054d7c339245ceba703ea6cdc90fb2a5f9 Mon Sep 17 00:00:00 2001
From: Dustin Tran <dustinviettran@gmail.com>
Date: Sun, 28 Jan 2018 15:47:52 -0800
Subject: [PATCH 2/3] rm '#!/usr/bin/env python'

---
 examples/bayesian_linear_regression.py              | 1 -
 examples/bayesian_linear_regression_implicitklqp.py | 1 -
 examples/bayesian_logistic_regression.py            | 1 -
 examples/bayesian_nn.py                             | 1 -
 examples/beta_bernoulli.py                          | 1 -
 examples/bigan.py                                   | 1 -
 examples/cox_process.py                             | 1 -
 examples/deep_exponential_family.py                 | 1 -
 examples/dirichlet_categorical.py                   | 1 -
 examples/factor_analysis.py                         | 1 -
 examples/gan_synthetic_data.py                      | 1 -
 examples/gan_wasserstein.py                         | 1 -
 examples/gan_wasserstein_synthetic.py               | 1 -
 examples/invgamma_normal_mh.py                      | 1 -
 examples/irt.py                                     | 1 -
 examples/iwvi.py                                    | 1 -
 examples/lstm.py                                    | 1 -
 examples/mixture_gaussian_gibbs.py                  | 1 -
 examples/mixture_gaussian_mh.py                     | 1 -
 examples/normal.py                                  | 1 -
 examples/normal_normal.py                           | 1 -
 examples/normal_sgld.py                             | 1 -
 examples/pp_dirichlet_process.py                    | 1 -
 examples/pp_dynamic_shape.py                        | 1 -
 examples/pp_persistent_randomness.py                | 1 -
 examples/pp_stochastic_control_flow.py              | 1 -
 examples/pp_stochastic_recursion.py                 | 1 -
 examples/probabilistic_matrix_factorization.py      | 1 -
 examples/probabilistic_pca_subsampling.py           | 1 -
 examples/rasch_model.py                             | 1 -
 examples/sigmoid_belief_network.py                  | 1 -
 examples/stochastic_block_model.py                  | 1 -
 examples/vae.py                                     | 1 -
 examples/vae_convolutional.py                       | 1 -
 examples/vae_convolutional_prettytensor.py          | 1 -
 tests/data/generate_test_saver.py                   | 1 -
 tests/data/generate_toy_data_tfrecords.py           | 1 -
 37 files changed, 37 deletions(-)

diff --git a/examples/bayesian_linear_regression.py b/examples/bayesian_linear_regression.py
index d814945bb..038939fd5 100644
--- a/examples/bayesian_linear_regression.py
+++ b/examples/bayesian_linear_regression.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Bayesian linear regression using stochastic gradient Hamiltonian
 Monte Carlo.
 
diff --git a/examples/bayesian_linear_regression_implicitklqp.py b/examples/bayesian_linear_regression_implicitklqp.py
index 958f52361..b694f990c 100644
--- a/examples/bayesian_linear_regression_implicitklqp.py
+++ b/examples/bayesian_linear_regression_implicitklqp.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Bayesian linear regression. Inference uses data subsampling and
 scales the log-likelihood.
 
diff --git a/examples/bayesian_logistic_regression.py b/examples/bayesian_logistic_regression.py
index 65a16be0d..a9c123e7d 100644
--- a/examples/bayesian_logistic_regression.py
+++ b/examples/bayesian_logistic_regression.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Bayesian logistic regression using Hamiltonian Monte Carlo.
 
 We visualize the fit.
diff --git a/examples/bayesian_nn.py b/examples/bayesian_nn.py
index e978d5bc2..6860c0555 100644
--- a/examples/bayesian_nn.py
+++ b/examples/bayesian_nn.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Bayesian neural network using variational inference
 (see, e.g., Blundell et al. (2015); Kucukelbir et al. (2016)).
 
diff --git a/examples/beta_bernoulli.py b/examples/beta_bernoulli.py
index c602f323c..a21f62740 100644
--- a/examples/beta_bernoulli.py
+++ b/examples/beta_bernoulli.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """A simple coin flipping example. Inspired by Stan's toy example.
 """
 from __future__ import absolute_import
diff --git a/examples/bigan.py b/examples/bigan.py
index 1b16d06e3..c857bfa9e 100644
--- a/examples/bigan.py
+++ b/examples/bigan.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Adversarially Learned Inference (Dumoulin et al., 2017), aka
 Bidirectional Generative Adversarial Networks (Donahue et al., 2017),
 for joint learning of generator and inference networks for MNIST.
diff --git a/examples/cox_process.py b/examples/cox_process.py
index 0f59e91e8..f1ddb263a 100644
--- a/examples/cox_process.py
+++ b/examples/cox_process.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """A Cox process model for spatial analysis
 (Cox, 1955; Miller et al., 2014).
 
diff --git a/examples/deep_exponential_family.py b/examples/deep_exponential_family.py
index 2fd01c9a2..330c9225f 100644
--- a/examples/deep_exponential_family.py
+++ b/examples/deep_exponential_family.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Sparse Gamma deep exponential family (Ranganath et al., 2015). We
 apply it as a topic model on the collection of NIPS 2011 conference
 papers.
diff --git a/examples/dirichlet_categorical.py b/examples/dirichlet_categorical.py
index 81bf04347..ea944368f 100644
--- a/examples/dirichlet_categorical.py
+++ b/examples/dirichlet_categorical.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Dirichlet-Categorical model.
 
 Posterior inference with Edward's BBVI.
diff --git a/examples/factor_analysis.py b/examples/factor_analysis.py
index f20f06b4b..ec3ec3d6c 100644
--- a/examples/factor_analysis.py
+++ b/examples/factor_analysis.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Logistic factor analysis on MNIST. Using Monte Carlo EM, with HMC
 for the E-step and MAP for the M-step. We fit to just one data
 point in MNIST.
diff --git a/examples/gan_synthetic_data.py b/examples/gan_synthetic_data.py
index e2ab7e9f3..aa56c9b84 100644
--- a/examples/gan_synthetic_data.py
+++ b/examples/gan_synthetic_data.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Generative adversarial network for toy Gaussian data
 (Goodfellow et al., 2014).
 
diff --git a/examples/gan_wasserstein.py b/examples/gan_wasserstein.py
index 3dc688bf6..9d7feb40f 100644
--- a/examples/gan_wasserstein.py
+++ b/examples/gan_wasserstein.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Wasserstein generative adversarial network for MNIST (Arjovsky et
 al., 2017). It modifies GANs (Goodfellow et al., 2014) to optimize
 under the Wasserstein distance.
diff --git a/examples/gan_wasserstein_synthetic.py b/examples/gan_wasserstein_synthetic.py
index 28aa7f019..a68941a1c 100644
--- a/examples/gan_wasserstein_synthetic.py
+++ b/examples/gan_wasserstein_synthetic.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Wasserstein generative adversarial network for toy Gaussian data
 (Arjovsky et al., 2017). A gradient penalty is used to approximate the
 1-Lipschitz functional family in the Wasserstein distance (Gulrajani
diff --git a/examples/invgamma_normal_mh.py b/examples/invgamma_normal_mh.py
index f959830db..d542ce486 100644
--- a/examples/invgamma_normal_mh.py
+++ b/examples/invgamma_normal_mh.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """ InverseGamma-Normal model
 
 Posterior inference with Metropolis Hastings
diff --git a/examples/irt.py b/examples/irt.py
index 5955b2492..306a63bc8 100644
--- a/examples/irt.py
+++ b/examples/irt.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Bayesian Item Response Theory (IRT) Mixed Effects Model
 using variational inference.
 
diff --git a/examples/iwvi.py b/examples/iwvi.py
index 53d6cc300..c11f926d9 100644
--- a/examples/iwvi.py
+++ b/examples/iwvi.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """A demo of how to develop new inference algorithms in Edward. Here
 we implement importance-weighted variational inference. We test it on
 logistic regression.
diff --git a/examples/lstm.py b/examples/lstm.py
index 045871b97..762c90752 100644
--- a/examples/lstm.py
+++ b/examples/lstm.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """LSTM language model on text8.
 
 Default hyperparameters achieve ~78.4 NLL at epoch 50, ~76.1423 NLL at
diff --git a/examples/mixture_gaussian_gibbs.py b/examples/mixture_gaussian_gibbs.py
index f31e6cb0a..af66b309b 100644
--- a/examples/mixture_gaussian_gibbs.py
+++ b/examples/mixture_gaussian_gibbs.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Mixture of Gaussians, with block Gibbs for inference.
 """
 from __future__ import absolute_import
diff --git a/examples/mixture_gaussian_mh.py b/examples/mixture_gaussian_mh.py
index bdd125117..5bccca711 100644
--- a/examples/mixture_gaussian_mh.py
+++ b/examples/mixture_gaussian_mh.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Mixture of Gaussians.
 
 Perform inference with Metropolis-Hastings. It utterly fails. This is
diff --git a/examples/normal.py b/examples/normal.py
index fbe32e8cf..03fdc7b62 100644
--- a/examples/normal.py
+++ b/examples/normal.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Correlated normal posterior. Inference with Hamiltonian Monte Carlo.
 """
 from __future__ import absolute_import
diff --git a/examples/normal_normal.py b/examples/normal_normal.py
index 3765068c5..215090b8c 100644
--- a/examples/normal_normal.py
+++ b/examples/normal_normal.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Normal-normal model using Hamiltonian Monte Carlo."""
 from __future__ import absolute_import
 from __future__ import division
diff --git a/examples/normal_sgld.py b/examples/normal_sgld.py
index ac5cb5011..aad47d3d2 100644
--- a/examples/normal_sgld.py
+++ b/examples/normal_sgld.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Correlated normal posterior. Inference with stochastic gradient
 Langevin dynamics.
 """
diff --git a/examples/pp_dirichlet_process.py b/examples/pp_dirichlet_process.py
index a61fe0e83..08b071d80 100644
--- a/examples/pp_dirichlet_process.py
+++ b/examples/pp_dirichlet_process.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Dirichlet process.
 
 We implement sample generation from a Dirichlet process (with no base
diff --git a/examples/pp_dynamic_shape.py b/examples/pp_dynamic_shape.py
index af2dd11b7..86237b1fc 100644
--- a/examples/pp_dynamic_shape.py
+++ b/examples/pp_dynamic_shape.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Dynamic shapes.
 
 We build a random variable whose size depends on a sample from another
diff --git a/examples/pp_persistent_randomness.py b/examples/pp_persistent_randomness.py
index 6ccdb0c51..d2e7e81f7 100644
--- a/examples/pp_persistent_randomness.py
+++ b/examples/pp_persistent_randomness.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Persistent randomness.
 
 Our language defines random variables. They enable memoization in the
diff --git a/examples/pp_stochastic_control_flow.py b/examples/pp_stochastic_control_flow.py
index 15bf18edd..60e148e93 100644
--- a/examples/pp_stochastic_control_flow.py
+++ b/examples/pp_stochastic_control_flow.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Stochastic control flow.
 
 We sample from a geometric random variable by using samples from
diff --git a/examples/pp_stochastic_recursion.py b/examples/pp_stochastic_recursion.py
index 93e837bf4..40a703ca7 100644
--- a/examples/pp_stochastic_recursion.py
+++ b/examples/pp_stochastic_recursion.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Stochastic recursion.
 
 We sample from a geometric random variable by using samples from
diff --git a/examples/probabilistic_matrix_factorization.py b/examples/probabilistic_matrix_factorization.py
index 92e9cccc8..acd51d63d 100644
--- a/examples/probabilistic_matrix_factorization.py
+++ b/examples/probabilistic_matrix_factorization.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Probabilistic matrix factorization using variational inference.
 
 Visualizes the actual and the estimated rating matrices as heatmaps.
diff --git a/examples/probabilistic_pca_subsampling.py b/examples/probabilistic_pca_subsampling.py
index aa4157cba..5dd0a6641 100644
--- a/examples/probabilistic_pca_subsampling.py
+++ b/examples/probabilistic_pca_subsampling.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Probabilistic principal components analysis (Tipping and Bishop, 1999).
 
 Inference uses data subsampling.
diff --git a/examples/rasch_model.py b/examples/rasch_model.py
index a1c3ea570..65ebf557c 100644
--- a/examples/rasch_model.py
+++ b/examples/rasch_model.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Rasch model (Rasch, 1960)."""
 from __future__ import absolute_import
 from __future__ import division
diff --git a/examples/sigmoid_belief_network.py b/examples/sigmoid_belief_network.py
index 0de8b7457..47382d258 100644
--- a/examples/sigmoid_belief_network.py
+++ b/examples/sigmoid_belief_network.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Sigmoid belief network (Neal, 1990) trained on the Caltech 101
 Silhouettes data set.
 
diff --git a/examples/stochastic_block_model.py b/examples/stochastic_block_model.py
index 28e23ecb3..cba80a867 100644
--- a/examples/stochastic_block_model.py
+++ b/examples/stochastic_block_model.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Stochastic block model."""
 from __future__ import absolute_import
 from __future__ import division
diff --git a/examples/vae.py b/examples/vae.py
index 610947c3f..1029cfc8a 100644
--- a/examples/vae.py
+++ b/examples/vae.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Variational auto-encoder for MNIST data.
 
 References
diff --git a/examples/vae_convolutional.py b/examples/vae_convolutional.py
index e53733904..537de5343 100644
--- a/examples/vae_convolutional.py
+++ b/examples/vae_convolutional.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Convolutional variational auto-encoder for binarized MNIST.
 
 The neural networks are written with TensorFlow Slim.
diff --git a/examples/vae_convolutional_prettytensor.py b/examples/vae_convolutional_prettytensor.py
index d373a84bf..bcab70260 100644
--- a/examples/vae_convolutional_prettytensor.py
+++ b/examples/vae_convolutional_prettytensor.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Convolutional variational auto-encoder for binarized MNIST.
 
 The neural networks are written with Pretty Tensor.
diff --git a/tests/data/generate_test_saver.py b/tests/data/generate_test_saver.py
index 0dc8dd15b..d5353f7bd 100644
--- a/tests/data/generate_test_saver.py
+++ b/tests/data/generate_test_saver.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Generate `test_saver`."""
 from __future__ import absolute_import
 from __future__ import division
diff --git a/tests/data/generate_toy_data_tfrecords.py b/tests/data/generate_toy_data_tfrecords.py
index 2e2a4f560..6c3d91e61 100644
--- a/tests/data/generate_toy_data_tfrecords.py
+++ b/tests/data/generate_toy_data_tfrecords.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Generate `toy_data.tfrecords`."""
 from __future__ import absolute_import
 from __future__ import division

From 568afac9ac3e6d5af0aae7cd8f837c3f044e4882 Mon Sep 17 00:00:00 2001
From: Dustin Tran <dustinviettran@gmail.com>
Date: Sun, 28 Jan 2018 15:23:35 -0800
Subject: [PATCH 3/3] update examples/

---
 examples/bayesian_linear_regression.py        | 109 +++++++-------
 ...bayesian_linear_regression_implicitklqp.py | 137 +++++++++---------
 examples/bayesian_logistic_regression.py      | 106 +++++++-------
 3 files changed, 185 insertions(+), 167 deletions(-)

diff --git a/examples/bayesian_linear_regression.py b/examples/bayesian_linear_regression.py
index 038939fd5..8d0a6c997 100644
--- a/examples/bayesian_linear_regression.py
+++ b/examples/bayesian_linear_regression.py
@@ -19,6 +19,16 @@
 
 from edward.models import Normal, Empirical
 
+tf.flags.DEFINE_integer("N", default=40, help="Number of data points.")
+tf.flags.DEFINE_integer("D", default=1, help="Number of features.")
+tf.flags.DEFINE_integer("T", default=5000, help="Number of samples.")
+tf.flags.DEFINE_integer("nburn", default=100,
+                        help="Number of burn-in samples.")
+tf.flags.DEFINE_integer("stride", default=10,
+                        help="Frequency with which to plots samples.")
+
+FLAGS = tf.flags.FLAGS
+
 
 def build_toy_dataset(N, noise_std=0.5):
   X = np.concatenate([np.linspace(0, 2, num=N / 2),
@@ -28,73 +38,70 @@ def build_toy_dataset(N, noise_std=0.5):
   return X, y
 
 
-ed.set_seed(42)
-
-N = 40  # number of data points
-D = 1  # number of features
+def main(_):
+  ed.set_seed(42)
 
-# DATA
-X_train, y_train = build_toy_dataset(N)
-X_test, y_test = build_toy_dataset(N)
+  # DATA
+  X_train, y_train = build_toy_dataset(FLAGS.N)
+  X_test, y_test = build_toy_dataset(FLAGS.N)
 
-# MODEL
-X = tf.placeholder(tf.float32, [N, D])
-w = Normal(loc=tf.zeros(D), scale=tf.ones(D))
-b = Normal(loc=tf.zeros(1), scale=tf.ones(1))
-y = Normal(loc=ed.dot(X, w) + b, scale=tf.ones(N))
+  # MODEL
+  X = tf.placeholder(tf.float32, [FLAGS.N, FLAGS.D])
+  w = Normal(loc=tf.zeros(FLAGS.D), scale=tf.ones(FLAGS.D))
+  b = Normal(loc=tf.zeros(1), scale=tf.ones(1))
+  y = Normal(loc=ed.dot(X, w) + b, scale=tf.ones(FLAGS.N))
 
-# INFERENCE
-T = 5000                        # Number of samples.
-nburn = 100                     # Number of burn-in samples.
-stride = 10                    # Frequency with which to plot samples.
-qw = Empirical(params=tf.Variable(tf.random_normal([T, D])))
-qb = Empirical(params=tf.Variable(tf.random_normal([T, 1])))
+  # INFERENCE
+  qw = Empirical(params=tf.get_variable("qw/params", [FLAGS.T, FLAGS.D]))
+  qb = Empirical(params=tf.get_variable("qb/params", [FLAGS.T, 1]))
 
-inference = ed.SGHMC({w: qw, b: qb}, data={X: X_train, y: y_train})
-inference.run(step_size=1e-3)
+  inference = ed.SGHMC({w: qw, b: qb}, data={X: X_train, y: y_train})
+  inference.run(step_size=1e-3)
 
+  # CRITICISM
 
-# CRITICISM
+  # Plot posterior samples.
+  sns.jointplot(qb.params.eval()[FLAGS.nburn:FLAGS.T:FLAGS.stride],
+                qw.params.eval()[FLAGS.nburn:FLAGS.T:FLAGS.stride])
+  plt.show()
 
-# Plot posterior samples.
-sns.jointplot(qb.params.eval()[nburn:T:stride],
-              qw.params.eval()[nburn:T:stride])
-plt.show()
+  # Posterior predictive checks.
+  y_post = ed.copy(y, {w: qw, b: qb})
+  # This is equivalent to
+  # y_post = Normal(loc=ed.dot(X, qw) + qb, scale=tf.ones(FLAGS.N))
 
-# Posterior predictive checks.
-y_post = ed.copy(y, {w: qw, b: qb})
-# This is equivalent to
-# y_post = Normal(loc=ed.dot(X, qw) + qb, scale=tf.ones(N))
+  print("Mean squared error on test data:")
+  print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test}))
 
-print("Mean squared error on test data:")
-print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test}))
+  print("Displaying prior predictive samples.")
+  n_prior_samples = 10
 
-print("Displaying prior predictive samples.")
-n_prior_samples = 10
+  w_prior = w.sample(n_prior_samples).eval()
+  b_prior = b.sample(n_prior_samples).eval()
 
-w_prior = w.sample(n_prior_samples).eval()
-b_prior = b.sample(n_prior_samples).eval()
+  plt.scatter(X_train, y_train)
 
-plt.scatter(X_train, y_train)
+  inputs = np.linspace(-1, 10, num=400)
+  for ns in range(n_prior_samples):
+      output = inputs * w_prior[ns] + b_prior[ns]
+      plt.plot(inputs, output)
 
-inputs = np.linspace(-1, 10, num=400)
-for ns in range(n_prior_samples):
-    output = inputs * w_prior[ns] + b_prior[ns]
-    plt.plot(inputs, output)
+  plt.show()
 
-plt.show()
+  print("Displaying posterior predictive samples.")
+  n_posterior_samples = 10
 
-print("Displaying posterior predictive samples.")
-n_posterior_samples = 10
+  w_post = qw.sample(n_posterior_samples).eval()
+  b_post = qb.sample(n_posterior_samples).eval()
 
-w_post = qw.sample(n_posterior_samples).eval()
-b_post = qb.sample(n_posterior_samples).eval()
+  plt.scatter(X_train, y_train)
 
-plt.scatter(X_train, y_train)
+  inputs = np.linspace(-1, 10, num=400)
+  for ns in range(n_posterior_samples):
+      output = inputs * w_post[ns] + b_post[ns]
+      plt.plot(inputs, output)
 
-inputs = np.linspace(-1, 10, num=400)
-for ns in range(n_posterior_samples):
-    output = inputs * w_post[ns] + b_post[ns]
-    plt.plot(inputs, output)
+  plt.show()
 
-plt.show()
+if __name__ == "__main__":
+  tf.app.run()
diff --git a/examples/bayesian_linear_regression_implicitklqp.py b/examples/bayesian_linear_regression_implicitklqp.py
index b694f990c..41a72a132 100644
--- a/examples/bayesian_linear_regression_implicitklqp.py
+++ b/examples/bayesian_linear_regression_implicitklqp.py
@@ -24,7 +24,12 @@
 import tensorflow as tf
 
 from edward.models import Normal
-from tensorflow.contrib import slim
+
+tf.flags.DEFINE_integer("N", default=500, help="Number of data points.")
+tf.flags.DEFINE_integer("M", default=50, help="Batch size during training.")
+tf.flags.DEFINE_integer("D", default=2, help="Number of features.")
+
+FLAGS = tf.flags.FLAGS
 
 
 def build_toy_dataset(N, w, noise_std=0.1):
@@ -34,22 +39,6 @@ def build_toy_dataset(N, w, noise_std=0.1):
   return x, y
 
 
-def ratio_estimator(data, local_vars, global_vars):
-  """Takes as input a dict of data x, local variable samples z, and
-  global variable samples beta; outputs real values of shape
-  (x.shape[0] + z.shape[0],). In this example, there are no local
-  variables.
-  """
-  # data[y] has shape (M,); global_vars[w] has shape (D,)
-  # we concatenate w to each data point y, so input has shape (M, 1 + D)
-  input = tf.concat([
-      tf.reshape(data[y], [M, 1]),
-      tf.tile(tf.reshape(global_vars[w], [1, D]), [M, 1])], 1)
-  hidden = slim.fully_connected(input, 64, activation_fn=tf.nn.relu)
-  output = slim.fully_connected(hidden, 1, activation_fn=None)
-  return output
-
-
 def generator(arrays, batch_size):
   """Generate batches, one with respect to each array's first axis."""
   starts = [0] * len(arrays)  # pointers to where we are in iteration
@@ -69,52 +58,68 @@ def generator(arrays, batch_size):
     yield batches
 
 
-ed.set_seed(42)
-
-N = 500  # number of data points
-M = 50  # batch size during training
-D = 2  # number of features
-
-# DATA
-w_true = np.ones(D) * 5.0
-X_train, y_train = build_toy_dataset(N, w_true)
-X_test, y_test = build_toy_dataset(N, w_true)
-data = generator([X_train, y_train], M)
-
-# MODEL
-X = tf.placeholder(tf.float32, [M, D])
-y_ph = tf.placeholder(tf.float32, [M])
-w = Normal(loc=tf.zeros(D), scale=tf.ones(D))
-y = Normal(loc=ed.dot(X, w), scale=tf.ones(M))
-
-# INFERENCE
-qw = Normal(loc=tf.Variable(tf.random_normal([D]) + 1.0),
-            scale=tf.nn.softplus(tf.Variable(tf.random_normal([D]))))
-
-inference = ed.ImplicitKLqp(
-    {w: qw}, data={y: y_ph},
-    discriminator=ratio_estimator, global_vars={w: qw})
-inference.initialize(n_iter=5000, n_print=100, scale={y: float(N) / M})
-
-sess = ed.get_session()
-tf.global_variables_initializer().run()
-
-for _ in range(inference.n_iter):
-  X_batch, y_batch = next(data)
-  for _ in range(5):
-    info_dict_d = inference.update(
-        variables="Disc", feed_dict={X: X_batch, y_ph: y_batch})
-
-  info_dict = inference.update(
-      variables="Gen", feed_dict={X: X_batch, y_ph: y_batch})
-  info_dict['loss_d'] = info_dict_d['loss_d']
-  info_dict['t'] = info_dict['t'] // 6  # say set of 6 updates is 1 iteration
-
-  t = info_dict['t']
-  inference.print_progress(info_dict)
-  if t == 1 or t % inference.n_print == 0:
-    # Check inferred posterior parameters.
-    mean, std = sess.run([qw.mean(), qw.stddev()])
-    print("\nInferred mean & std:")
-    print(mean)
-    print(std)
+def main(_):
+  def ratio_estimator(data, local_vars, global_vars):
+    """Takes as input a dict of data x, local variable samples z, and
+    global variable samples beta; outputs real values of shape
+    (x.shape[0] + z.shape[0],). In this example, there are no local
+    variables.
+    """
+    # data[y] has shape (M,); global_vars[w] has shape (D,)
+    # we concatenate w to each data point y, so input has shape (M, 1 + D)
+    input = tf.concat([
+        tf.reshape(data[y], [FLAGS.M, 1]),
+        tf.tile(tf.reshape(global_vars[w], [1, FLAGS.D]), [FLAGS.M, 1])], 1)
+    hidden = tf.layers.dense(input, 64, activation=tf.nn.relu)
+    output = tf.layers.dense(hidden, 1, activation=None)
+    return output
+
+  ed.set_seed(42)
+
+  # DATA
+  w_true = np.ones(FLAGS.D) * 5.0
+  X_train, y_train = build_toy_dataset(FLAGS.N, w_true)
+  X_test, y_test = build_toy_dataset(FLAGS.N, w_true)
+  data = generator([X_train, y_train], FLAGS.M)
+
+  # MODEL
+  X = tf.placeholder(tf.float32, [FLAGS.M, FLAGS.D])
+  y_ph = tf.placeholder(tf.float32, [FLAGS.M])
+  w = Normal(loc=tf.zeros(FLAGS.D), scale=tf.ones(FLAGS.D))
+  y = Normal(loc=ed.dot(X, w), scale=tf.ones(FLAGS.M))
+
+  # INFERENCE
+  qw = Normal(loc=tf.get_variable("qw/loc", [FLAGS.D]) + 1.0,
+              scale=tf.nn.softplus(tf.get_variable("qw/scale", [FLAGS.D])))
+
+  inference = ed.ImplicitKLqp(
+      {w: qw}, data={y: y_ph},
+      discriminator=ratio_estimator, global_vars={w: qw})
+  inference.initialize(n_iter=5000, n_print=100,
+                       scale={y: float(FLAGS.N) / FLAGS.M})
+
+  sess = ed.get_session()
+  tf.global_variables_initializer().run()
+
+  for _ in range(inference.n_iter):
+    X_batch, y_batch = next(data)
+    for _ in range(5):
+      info_dict_d = inference.update(
+          variables="Disc", feed_dict={X: X_batch, y_ph: y_batch})
+
+    info_dict = inference.update(
+        variables="Gen", feed_dict={X: X_batch, y_ph: y_batch})
+    info_dict['loss_d'] = info_dict_d['loss_d']
+    info_dict['t'] = info_dict['t'] // 6  # say set of 6 updates is 1 iteration
+
+    t = info_dict['t']
+    inference.print_progress(info_dict)
+    if t == 1 or t % inference.n_print == 0:
+      # Check inferred posterior parameters.
+      mean, std = sess.run([qw.mean(), qw.stddev()])
+      print("\nInferred mean & std:")
+      print(mean)
+      print(std)
+
+if __name__ == "__main__":
+  tf.app.run()
diff --git a/examples/bayesian_logistic_regression.py b/examples/bayesian_logistic_regression.py
index a9c123e7d..141116bb7 100644
--- a/examples/bayesian_logistic_regression.py
+++ b/examples/bayesian_logistic_regression.py
@@ -13,6 +13,12 @@
 
 from edward.models import Bernoulli, Normal, Empirical
 
+tf.flags.DEFINE_integer("N", default=40, help="Number of data points.")
+tf.flags.DEFINE_integer("D", default=1, help="Number of features.")
+tf.flags.DEFINE_integer("T", default=5000, help="Number of samples.")
+
+FLAGS = tf.flags.FLAGS
+
 
 def build_toy_dataset(N, noise_std=0.1):
   D = 1
@@ -25,68 +31,68 @@ def build_toy_dataset(N, noise_std=0.1):
   return X, y
 
 
-ed.set_seed(42)
+def main(_):
+  ed.set_seed(42)
 
-N = 40  # number of data points
-D = 1  # number of features
+  # DATA
+  X_train, y_train = build_toy_dataset(FLAGS.N)
 
-# DATA
-X_train, y_train = build_toy_dataset(N)
+  # MODEL
+  X = tf.placeholder(tf.float32, [FLAGS.N, FLAGS.D])
+  w = Normal(loc=tf.zeros(FLAGS.D), scale=3.0 * tf.ones(FLAGS.D))
+  b = Normal(loc=tf.zeros([]), scale=3.0 * tf.ones([]))
+  y = Bernoulli(logits=ed.dot(X, w) + b)
 
-# MODEL
-X = tf.placeholder(tf.float32, [N, D])
-w = Normal(loc=tf.zeros(D), scale=3.0 * tf.ones(D))
-b = Normal(loc=tf.zeros([]), scale=3.0 * tf.ones([]))
-y = Bernoulli(logits=ed.dot(X, w) + b)
+  # INFERENCE
+  qw = Empirical(params=tf.get_variable("qw/params", [FLAGS.T, FLAGS.D]))
+  qb = Empirical(params=tf.get_variable("qb/params", [FLAGS.T]))
 
-# INFERENCE
-T = 5000  # number of samples
-qw = Empirical(params=tf.Variable(tf.random_normal([T, D])))
-qb = Empirical(params=tf.Variable(tf.random_normal([T])))
+  inference = ed.HMC({w: qw, b: qb}, data={X: X_train, y: y_train})
+  inference.initialize(n_print=10, step_size=0.6)
 
-inference = ed.HMC({w: qw, b: qb}, data={X: X_train, y: y_train})
-inference.initialize(n_print=10, step_size=0.6)
+  # Alternatively, use variational inference.
+  # qw_loc = tf.get_variable("qw_loc", [FLAGS.D])
+  # qw_scale = tf.nn.softplus(tf.get_variable("qw_scale", [FLAGS.D]))
+  # qb_loc = tf.get_variable("qb_loc", []) + 10.0
+  # qb_scale = tf.nn.softplus(tf.get_variable("qb_scale", []))
 
-# Alternatively, use variational inference.
-# qw_loc = tf.Variable(tf.random_normal([D]))
-# qw_scale = tf.nn.softplus(tf.Variable(tf.random_normal([D])))
-# qb_loc = tf.Variable(tf.random_normal([]) + 10)
-# qb_scale = tf.nn.softplus(tf.Variable(tf.random_normal([])))
+  # qw = Normal(loc=qw_loc, scale=qw_scale)
+  # qb = Normal(loc=qb_loc, scale=qb_scale)
 
-# qw = Normal(loc=qw_loc, scale=qw_scale)
-# qb = Normal(loc=qb_loc, scale=qb_scale)
+  # inference = ed.KLqp({w: qw, b: qb}, data={X: X_train, y: y_train})
+  # inference.initialize(n_print=10, n_iter=600)
 
-# inference = ed.KLqp({w: qw, b: qb}, data={X: X_train, y: y_train})
-# inference.initialize(n_print=10, n_iter=600)
+  tf.global_variables_initializer().run()
 
-tf.global_variables_initializer().run()
+  # Set up figure.
+  fig = plt.figure(figsize=(8, 8), facecolor='white')
+  ax = fig.add_subplot(111, frameon=False)
+  plt.ion()
+  plt.show(block=False)
 
-# Set up figure.
-fig = plt.figure(figsize=(8, 8), facecolor='white')
-ax = fig.add_subplot(111, frameon=False)
-plt.ion()
-plt.show(block=False)
+  # Build samples from inferred posterior.
+  n_samples = 50
+  inputs = np.linspace(-5, 3, num=400, dtype=np.float32).reshape((400, 1))
+  probs = tf.stack([tf.sigmoid(ed.dot(inputs, qw.sample()) + qb.sample())
+                    for _ in range(n_samples)])
 
-# Build samples from inferred posterior.
-n_samples = 50
-inputs = np.linspace(-5, 3, num=400, dtype=np.float32).reshape((400, 1))
-probs = tf.stack([tf.sigmoid(ed.dot(inputs, qw.sample()) + qb.sample())
-                  for _ in range(n_samples)])
+  for t in range(inference.n_iter):
+    info_dict = inference.update()
+    inference.print_progress(info_dict)
 
-for t in range(inference.n_iter):
-  info_dict = inference.update()
-  inference.print_progress(info_dict)
+    if t % inference.n_print == 0:
+      outputs = probs.eval()
 
-  if t % inference.n_print == 0:
-    outputs = probs.eval()
+      # Plot data and functions
+      plt.cla()
+      ax.plot(X_train[:], y_train, 'bx')
+      for s in range(n_samples):
+        ax.plot(inputs[:], outputs[s], alpha=0.2)
 
-    # Plot data and functions
-    plt.cla()
-    ax.plot(X_train[:], y_train, 'bx')
-    for s in range(n_samples):
-      ax.plot(inputs[:], outputs[s], alpha=0.2)
+      ax.set_xlim([-5, 3])
+      ax.set_ylim([-0.5, 1.5])
+      plt.draw()
+      plt.pause(1.0 / 60.0)
 
-    ax.set_xlim([-5, 3])
-    ax.set_ylim([-0.5, 1.5])
-    plt.draw()
-    plt.pause(1.0 / 60.0)
+if __name__ == "__main__":
+  tf.app.run()