Merge 568afac into 562540b

blei-lab · Jan 28, 2018 · f0e0f7b · f0e0f7b
2 parents 562540b + 568afac
commit f0e0f7b
Show file tree

Hide file tree

Showing 40 changed files with 196 additions and 215 deletions.
diff --git a/README.md b/README.md
@@ -15,8 +15,9 @@ It supports __modeling__ with
 
 + Directed graphical models
 + Neural networks (via libraries such as
-    [Keras](http://keras.io) and [TensorFlow
-    Slim](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim))
+    [`tf.layers`](https://www.tensorflow.org/api_docs/python/tf/layers)
+    and
+    [Keras](http://keras.io))
 + Implicit generative models
 + Bayesian nonparametrics and probabilistic programs
 

diff --git a/docs/tex/index.tex b/docs/tex/index.tex
@@ -14,9 +14,9 @@ \subsection{A library for probabilistic modeling, inference, and criticism.}
 \begin{itemize}
 \item Directed graphical models
 \item Neural networks (via libraries such as
-  \href{http://keras.io}{Keras} and
-  \href{https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim}{TensorFlow
-  Slim})
+  \href{\texttt{tf.layers}}{https://www.tensorflow.org/api_docs/python/tf/layers}
+  and
+  \href{http://keras.io}{Keras})
 \item Implicit generative models
 \item Bayesian nonparametrics and probabilistic programs
 \end{itemize}

diff --git a/docs/tex/troubleshooting.tex b/docs/tex/troubleshooting.tex
@@ -47,18 +47,17 @@ \subsubsection{Full Installation}
 minimal effort under a one-line interface. Observations was originally
 developed for Edward and it has since become a standalone library for
 general machine learning.
-  \item Neural networks are supported through four libraries:
+  \item Neural networks are supported through any library operating
+    on TensorFlow. For example:
   \texttt{tf.layers},
   \href{http://keras.io}{Keras} (>=1.0)
 \begin{lstlisting}[language=JSON]
 pip install keras==2.0.4
 \end{lstlisting}
+  and
   \href{https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim}{TensorFlow Slim}
-  (native in TensorFlow), and
-  \href{https://github.com/google/prettytensor}{PrettyTensor} (>=0.7.4)
-\begin{lstlisting}[language=JSON]
-pip install prettytensor
-\end{lstlisting}
+  (native in TensorFlow).
+
 Note that for Keras 2.0.5 and beyond, all neural net layer transformations cannot be directly applied on random variables anymore. For example, if \texttt{x} is a \texttt{ed.RandomVariable} object, one must call \texttt{tf.convert_to_tensor} before applying it to a layer transformation, \texttt{Dense(256)(tf.convert_to_tensor(x))}.
 See \href{https://github.com/fchollet/keras/issues/6979}{here} for more details.
   \item Notebooks require

diff --git a/examples/bayesian_linear_regression.py b/examples/bayesian_linear_regression.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Bayesian linear regression using stochastic gradient Hamiltonian
 Monte Carlo.
 
@@ -20,6 +19,16 @@
 
 from edward.models import Normal, Empirical
 
+tf.flags.DEFINE_integer("N", default=40, help="Number of data points.")
+tf.flags.DEFINE_integer("D", default=1, help="Number of features.")
+tf.flags.DEFINE_integer("T", default=5000, help="Number of samples.")
+tf.flags.DEFINE_integer("nburn", default=100,
+                        help="Number of burn-in samples.")
+tf.flags.DEFINE_integer("stride", default=10,
+                        help="Frequency with which to plots samples.")
+
+FLAGS = tf.flags.FLAGS
+
 
 def build_toy_dataset(N, noise_std=0.5):
   X = np.concatenate([np.linspace(0, 2, num=N / 2),
@@ -29,73 +38,70 @@ def build_toy_dataset(N, noise_std=0.5):
   return X, y
 
 
-ed.set_seed(42)
-
-N = 40  # number of data points
-D = 1  # number of features
+def main(_):
+  ed.set_seed(42)
 
-# DATA
-X_train, y_train = build_toy_dataset(N)
-X_test, y_test = build_toy_dataset(N)
+  # DATA
+  X_train, y_train = build_toy_dataset(FLAGS.N)
+  X_test, y_test = build_toy_dataset(FLAGS.N)
 
-# MODEL
-X = tf.placeholder(tf.float32, [N, D])
-w = Normal(loc=tf.zeros(D), scale=tf.ones(D))
-b = Normal(loc=tf.zeros(1), scale=tf.ones(1))
-y = Normal(loc=ed.dot(X, w) + b, scale=tf.ones(N))
+  # MODEL
+  X = tf.placeholder(tf.float32, [FLAGS.N, FLAGS.D])
+  w = Normal(loc=tf.zeros(FLAGS.D), scale=tf.ones(FLAGS.D))
+  b = Normal(loc=tf.zeros(1), scale=tf.ones(1))
+  y = Normal(loc=ed.dot(X, w) + b, scale=tf.ones(FLAGS.N))
 
-# INFERENCE
-T = 5000                        # Number of samples.
-nburn = 100                     # Number of burn-in samples.
-stride = 10                    # Frequency with which to plot samples.
-qw = Empirical(params=tf.Variable(tf.random_normal([T, D])))
-qb = Empirical(params=tf.Variable(tf.random_normal([T, 1])))
+  # INFERENCE
+  qw = Empirical(params=tf.get_variable("qw/params", [FLAGS.T, FLAGS.D]))
+  qb = Empirical(params=tf.get_variable("qb/params", [FLAGS.T, 1]))
 
-inference = ed.SGHMC({w: qw, b: qb}, data={X: X_train, y: y_train})
-inference.run(step_size=1e-3)
+  inference = ed.SGHMC({w: qw, b: qb}, data={X: X_train, y: y_train})
+  inference.run(step_size=1e-3)
 
+  # CRITICISM
 
-# CRITICISM
+  # Plot posterior samples.
+  sns.jointplot(qb.params.eval()[FLAGS.nburn:FLAGS.T:FLAGS.stride],
+                qw.params.eval()[FLAGS.nburn:FLAGS.T:FLAGS.stride])
+  plt.show()
 
-# Plot posterior samples.
-sns.jointplot(qb.params.eval()[nburn:T:stride],
-              qw.params.eval()[nburn:T:stride])
-plt.show()
+  # Posterior predictive checks.
+  y_post = ed.copy(y, {w: qw, b: qb})
+  # This is equivalent to
+  # y_post = Normal(loc=ed.dot(X, qw) + qb, scale=tf.ones(FLAGS.N))
 
-# Posterior predictive checks.
-y_post = ed.copy(y, {w: qw, b: qb})
-# This is equivalent to
-# y_post = Normal(loc=ed.dot(X, qw) + qb, scale=tf.ones(N))
+  print("Mean squared error on test data:")
+  print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test}))
 
-print("Mean squared error on test data:")
-print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test}))
+  print("Displaying prior predictive samples.")
+  n_prior_samples = 10
 
-print("Displaying prior predictive samples.")
-n_prior_samples = 10
+  w_prior = w.sample(n_prior_samples).eval()
+  b_prior = b.sample(n_prior_samples).eval()
 
-w_prior = w.sample(n_prior_samples).eval()
-b_prior = b.sample(n_prior_samples).eval()
+  plt.scatter(X_train, y_train)
 
-plt.scatter(X_train, y_train)
+  inputs = np.linspace(-1, 10, num=400)
+  for ns in range(n_prior_samples):
+      output = inputs * w_prior[ns] + b_prior[ns]
+      plt.plot(inputs, output)
 
-inputs = np.linspace(-1, 10, num=400)
-for ns in range(n_prior_samples):
-    output = inputs * w_prior[ns] + b_prior[ns]
-    plt.plot(inputs, output)
+  plt.show()
 
-plt.show()
+  print("Displaying posterior predictive samples.")
+  n_posterior_samples = 10
 
-print("Displaying posterior predictive samples.")
-n_posterior_samples = 10
+  w_post = qw.sample(n_posterior_samples).eval()
+  b_post = qb.sample(n_posterior_samples).eval()
 
-w_post = qw.sample(n_posterior_samples).eval()
-b_post = qb.sample(n_posterior_samples).eval()
+  plt.scatter(X_train, y_train)
 
-plt.scatter(X_train, y_train)
+  inputs = np.linspace(-1, 10, num=400)
+  for ns in range(n_posterior_samples):
+      output = inputs * w_post[ns] + b_post[ns]
+      plt.plot(inputs, output)
 
-inputs = np.linspace(-1, 10, num=400)
-for ns in range(n_posterior_samples):
-    output = inputs * w_post[ns] + b_post[ns]
-    plt.plot(inputs, output)
+  plt.show()
 
-plt.show()
+if __name__ == "__main__":
+  tf.app.run()
diff --git a/examples/bayesian_linear_regression_implicitklqp.py b/examples/bayesian_linear_regression_implicitklqp.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """Bayesian linear regression. Inference uses data subsampling and
 scales the log-likelihood.
 
@@ -25,7 +24,12 @@
 import tensorflow as tf
 
 from edward.models import Normal
-from tensorflow.contrib import slim
+
+tf.flags.DEFINE_integer("N", default=500, help="Number of data points.")
+tf.flags.DEFINE_integer("M", default=50, help="Batch size during training.")
+tf.flags.DEFINE_integer("D", default=2, help="Number of features.")
+
+FLAGS = tf.flags.FLAGS
 
 
 def build_toy_dataset(N, w, noise_std=0.1):
@@ -35,22 +39,6 @@ def build_toy_dataset(N, w, noise_std=0.1):
   return x, y
 
 
-def ratio_estimator(data, local_vars, global_vars):
-  """Takes as input a dict of data x, local variable samples z, and
-  global variable samples beta; outputs real values of shape
-  (x.shape[0] + z.shape[0],). In this example, there are no local
-  variables.
-  """
-  # data[y] has shape (M,); global_vars[w] has shape (D,)
-  # we concatenate w to each data point y, so input has shape (M, 1 + D)
-  input = tf.concat([
-      tf.reshape(data[y], [M, 1]),
-      tf.tile(tf.reshape(global_vars[w], [1, D]), [M, 1])], 1)
-  hidden = slim.fully_connected(input, 64, activation_fn=tf.nn.relu)
-  output = slim.fully_connected(hidden, 1, activation_fn=None)
-  return output
-
-
 def generator(arrays, batch_size):
   """Generate batches, one with respect to each array's first axis."""
   starts = [0] * len(arrays)  # pointers to where we are in iteration
@@ -70,52 +58,68 @@ def generator(arrays, batch_size):
     yield batches
 
 
-ed.set_seed(42)
-
-N = 500  # number of data points
-M = 50  # batch size during training
-D = 2  # number of features
-
-# DATA
-w_true = np.ones(D) * 5.0
-X_train, y_train = build_toy_dataset(N, w_true)
-X_test, y_test = build_toy_dataset(N, w_true)
-data = generator([X_train, y_train], M)
-
-# MODEL
-X = tf.placeholder(tf.float32, [M, D])
-y_ph = tf.placeholder(tf.float32, [M])
-w = Normal(loc=tf.zeros(D), scale=tf.ones(D))
-y = Normal(loc=ed.dot(X, w), scale=tf.ones(M))
-
-# INFERENCE
-qw = Normal(loc=tf.Variable(tf.random_normal([D]) + 1.0),
-            scale=tf.nn.softplus(tf.Variable(tf.random_normal([D]))))
-
-inference = ed.ImplicitKLqp(
-    {w: qw}, data={y: y_ph},
-    discriminator=ratio_estimator, global_vars={w: qw})
-inference.initialize(n_iter=5000, n_print=100, scale={y: float(N) / M})
-
-sess = ed.get_session()
-tf.global_variables_initializer().run()
-
-for _ in range(inference.n_iter):
-  X_batch, y_batch = next(data)
-  for _ in range(5):
-    info_dict_d = inference.update(
-        variables="Disc", feed_dict={X: X_batch, y_ph: y_batch})
-
-  info_dict = inference.update(
-      variables="Gen", feed_dict={X: X_batch, y_ph: y_batch})
-  info_dict['loss_d'] = info_dict_d['loss_d']
-  info_dict['t'] = info_dict['t'] // 6  # say set of 6 updates is 1 iteration
-
-  t = info_dict['t']
-  inference.print_progress(info_dict)
-  if t == 1 or t % inference.n_print == 0:
-    # Check inferred posterior parameters.
-    mean, std = sess.run([qw.mean(), qw.stddev()])
-    print("\nInferred mean & std:")
-    print(mean)
-    print(std)
+def main(_):
+  def ratio_estimator(data, local_vars, global_vars):
+    """Takes as input a dict of data x, local variable samples z, and
+    global variable samples beta; outputs real values of shape
+    (x.shape[0] + z.shape[0],). In this example, there are no local
+    variables.
+    """
+    # data[y] has shape (M,); global_vars[w] has shape (D,)
+    # we concatenate w to each data point y, so input has shape (M, 1 + D)
+    input = tf.concat([
+        tf.reshape(data[y], [FLAGS.M, 1]),
+        tf.tile(tf.reshape(global_vars[w], [1, FLAGS.D]), [FLAGS.M, 1])], 1)
+    hidden = tf.layers.dense(input, 64, activation=tf.nn.relu)
+    output = tf.layers.dense(hidden, 1, activation=None)
+    return output
+
+  ed.set_seed(42)
+
+  # DATA
+  w_true = np.ones(FLAGS.D) * 5.0
+  X_train, y_train = build_toy_dataset(FLAGS.N, w_true)
+  X_test, y_test = build_toy_dataset(FLAGS.N, w_true)
+  data = generator([X_train, y_train], FLAGS.M)
+
+  # MODEL
+  X = tf.placeholder(tf.float32, [FLAGS.M, FLAGS.D])
+  y_ph = tf.placeholder(tf.float32, [FLAGS.M])
+  w = Normal(loc=tf.zeros(FLAGS.D), scale=tf.ones(FLAGS.D))
+  y = Normal(loc=ed.dot(X, w), scale=tf.ones(FLAGS.M))
+
+  # INFERENCE
+  qw = Normal(loc=tf.get_variable("qw/loc", [FLAGS.D]) + 1.0,
+              scale=tf.nn.softplus(tf.get_variable("qw/scale", [FLAGS.D])))
+
+  inference = ed.ImplicitKLqp(
+      {w: qw}, data={y: y_ph},
+      discriminator=ratio_estimator, global_vars={w: qw})
+  inference.initialize(n_iter=5000, n_print=100,
+                       scale={y: float(FLAGS.N) / FLAGS.M})
+
+  sess = ed.get_session()
+  tf.global_variables_initializer().run()
+
+  for _ in range(inference.n_iter):
+    X_batch, y_batch = next(data)
+    for _ in range(5):
+      info_dict_d = inference.update(
+          variables="Disc", feed_dict={X: X_batch, y_ph: y_batch})
+
+    info_dict = inference.update(
+        variables="Gen", feed_dict={X: X_batch, y_ph: y_batch})
+    info_dict['loss_d'] = info_dict_d['loss_d']
+    info_dict['t'] = info_dict['t'] // 6  # say set of 6 updates is 1 iteration
+
+    t = info_dict['t']
+    inference.print_progress(info_dict)
+    if t == 1 or t % inference.n_print == 0:
+      # Check inferred posterior parameters.
+      mean, std = sess.run([qw.mean(), qw.stddev()])
+      print("\nInferred mean & std:")
+      print(mean)
+      print(std)
+
+if __name__ == "__main__":
+  tf.app.run()