From 054c520974cbf6d642f8cd78ab65c7b0823d15e4 Mon Sep 17 00:00:00 2001 From: Dustin Tran Date: Sun, 28 Jan 2018 15:10:06 -0800 Subject: [PATCH 1/3] update docs --- README.md | 5 +++-- docs/tex/index.tex | 6 +++--- docs/tex/troubleshooting.tex | 11 +++++------ 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 6291d733a..33dd43f77 100644 --- a/README.md +++ b/README.md @@ -15,8 +15,9 @@ It supports __modeling__ with + Directed graphical models + Neural networks (via libraries such as - [Keras](http://keras.io) and [TensorFlow - Slim](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim)) + [`tf.layers`](https://www.tensorflow.org/api_docs/python/tf/layers) + and + [Keras](http://keras.io)) + Implicit generative models + Bayesian nonparametrics and probabilistic programs diff --git a/docs/tex/index.tex b/docs/tex/index.tex index be29c113a..006ab9925 100644 --- a/docs/tex/index.tex +++ b/docs/tex/index.tex @@ -14,9 +14,9 @@ \subsection{A library for probabilistic modeling, inference, and criticism.} \begin{itemize} \item Directed graphical models \item Neural networks (via libraries such as - \href{http://keras.io}{Keras} and - \href{https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim}{TensorFlow - Slim}) + \href{\texttt{tf.layers}}{https://www.tensorflow.org/api_docs/python/tf/layers} + and + \href{http://keras.io}{Keras}) \item Implicit generative models \item Bayesian nonparametrics and probabilistic programs \end{itemize} diff --git a/docs/tex/troubleshooting.tex b/docs/tex/troubleshooting.tex index 678b2acb5..3de76f97f 100644 --- a/docs/tex/troubleshooting.tex +++ b/docs/tex/troubleshooting.tex @@ -47,18 +47,17 @@ \subsubsection{Full Installation} minimal effort under a one-line interface. Observations was originally developed for Edward and it has since become a standalone library for general machine learning. - \item Neural networks are supported through four libraries: + \item Neural networks are supported through any library operating + on TensorFlow. For example: \texttt{tf.layers}, \href{http://keras.io}{Keras} (>=1.0) \begin{lstlisting}[language=JSON] pip install keras==2.0.4 \end{lstlisting} + and \href{https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim}{TensorFlow Slim} - (native in TensorFlow), and - \href{https://github.com/google/prettytensor}{PrettyTensor} (>=0.7.4) -\begin{lstlisting}[language=JSON] -pip install prettytensor -\end{lstlisting} + (native in TensorFlow). + Note that for Keras 2.0.5 and beyond, all neural net layer transformations cannot be directly applied on random variables anymore. For example, if \texttt{x} is a \texttt{ed.RandomVariable} object, one must call \texttt{tf.convert_to_tensor} before applying it to a layer transformation, \texttt{Dense(256)(tf.convert_to_tensor(x))}. See \href{https://github.com/fchollet/keras/issues/6979}{here} for more details. \item Notebooks require From bbb450054d7c339245ceba703ea6cdc90fb2a5f9 Mon Sep 17 00:00:00 2001 From: Dustin Tran Date: Sun, 28 Jan 2018 15:47:52 -0800 Subject: [PATCH 2/3] rm '#!/usr/bin/env python' --- examples/bayesian_linear_regression.py | 1 - examples/bayesian_linear_regression_implicitklqp.py | 1 - examples/bayesian_logistic_regression.py | 1 - examples/bayesian_nn.py | 1 - examples/beta_bernoulli.py | 1 - examples/bigan.py | 1 - examples/cox_process.py | 1 - examples/deep_exponential_family.py | 1 - examples/dirichlet_categorical.py | 1 - examples/factor_analysis.py | 1 - examples/gan_synthetic_data.py | 1 - examples/gan_wasserstein.py | 1 - examples/gan_wasserstein_synthetic.py | 1 - examples/invgamma_normal_mh.py | 1 - examples/irt.py | 1 - examples/iwvi.py | 1 - examples/lstm.py | 1 - examples/mixture_gaussian_gibbs.py | 1 - examples/mixture_gaussian_mh.py | 1 - examples/normal.py | 1 - examples/normal_normal.py | 1 - examples/normal_sgld.py | 1 - examples/pp_dirichlet_process.py | 1 - examples/pp_dynamic_shape.py | 1 - examples/pp_persistent_randomness.py | 1 - examples/pp_stochastic_control_flow.py | 1 - examples/pp_stochastic_recursion.py | 1 - examples/probabilistic_matrix_factorization.py | 1 - examples/probabilistic_pca_subsampling.py | 1 - examples/rasch_model.py | 1 - examples/sigmoid_belief_network.py | 1 - examples/stochastic_block_model.py | 1 - examples/vae.py | 1 - examples/vae_convolutional.py | 1 - examples/vae_convolutional_prettytensor.py | 1 - tests/data/generate_test_saver.py | 1 - tests/data/generate_toy_data_tfrecords.py | 1 - 37 files changed, 37 deletions(-) diff --git a/examples/bayesian_linear_regression.py b/examples/bayesian_linear_regression.py index d814945bb..038939fd5 100644 --- a/examples/bayesian_linear_regression.py +++ b/examples/bayesian_linear_regression.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Bayesian linear regression using stochastic gradient Hamiltonian Monte Carlo. diff --git a/examples/bayesian_linear_regression_implicitklqp.py b/examples/bayesian_linear_regression_implicitklqp.py index 958f52361..b694f990c 100644 --- a/examples/bayesian_linear_regression_implicitklqp.py +++ b/examples/bayesian_linear_regression_implicitklqp.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Bayesian linear regression. Inference uses data subsampling and scales the log-likelihood. diff --git a/examples/bayesian_logistic_regression.py b/examples/bayesian_logistic_regression.py index 65a16be0d..a9c123e7d 100644 --- a/examples/bayesian_logistic_regression.py +++ b/examples/bayesian_logistic_regression.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Bayesian logistic regression using Hamiltonian Monte Carlo. We visualize the fit. diff --git a/examples/bayesian_nn.py b/examples/bayesian_nn.py index e978d5bc2..6860c0555 100644 --- a/examples/bayesian_nn.py +++ b/examples/bayesian_nn.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Bayesian neural network using variational inference (see, e.g., Blundell et al. (2015); Kucukelbir et al. (2016)). diff --git a/examples/beta_bernoulli.py b/examples/beta_bernoulli.py index c602f323c..a21f62740 100644 --- a/examples/beta_bernoulli.py +++ b/examples/beta_bernoulli.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """A simple coin flipping example. Inspired by Stan's toy example. """ from __future__ import absolute_import diff --git a/examples/bigan.py b/examples/bigan.py index 1b16d06e3..c857bfa9e 100644 --- a/examples/bigan.py +++ b/examples/bigan.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Adversarially Learned Inference (Dumoulin et al., 2017), aka Bidirectional Generative Adversarial Networks (Donahue et al., 2017), for joint learning of generator and inference networks for MNIST. diff --git a/examples/cox_process.py b/examples/cox_process.py index 0f59e91e8..f1ddb263a 100644 --- a/examples/cox_process.py +++ b/examples/cox_process.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """A Cox process model for spatial analysis (Cox, 1955; Miller et al., 2014). diff --git a/examples/deep_exponential_family.py b/examples/deep_exponential_family.py index 2fd01c9a2..330c9225f 100644 --- a/examples/deep_exponential_family.py +++ b/examples/deep_exponential_family.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Sparse Gamma deep exponential family (Ranganath et al., 2015). We apply it as a topic model on the collection of NIPS 2011 conference papers. diff --git a/examples/dirichlet_categorical.py b/examples/dirichlet_categorical.py index 81bf04347..ea944368f 100644 --- a/examples/dirichlet_categorical.py +++ b/examples/dirichlet_categorical.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Dirichlet-Categorical model. Posterior inference with Edward's BBVI. diff --git a/examples/factor_analysis.py b/examples/factor_analysis.py index f20f06b4b..ec3ec3d6c 100644 --- a/examples/factor_analysis.py +++ b/examples/factor_analysis.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Logistic factor analysis on MNIST. Using Monte Carlo EM, with HMC for the E-step and MAP for the M-step. We fit to just one data point in MNIST. diff --git a/examples/gan_synthetic_data.py b/examples/gan_synthetic_data.py index e2ab7e9f3..aa56c9b84 100644 --- a/examples/gan_synthetic_data.py +++ b/examples/gan_synthetic_data.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Generative adversarial network for toy Gaussian data (Goodfellow et al., 2014). diff --git a/examples/gan_wasserstein.py b/examples/gan_wasserstein.py index 3dc688bf6..9d7feb40f 100644 --- a/examples/gan_wasserstein.py +++ b/examples/gan_wasserstein.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Wasserstein generative adversarial network for MNIST (Arjovsky et al., 2017). It modifies GANs (Goodfellow et al., 2014) to optimize under the Wasserstein distance. diff --git a/examples/gan_wasserstein_synthetic.py b/examples/gan_wasserstein_synthetic.py index 28aa7f019..a68941a1c 100644 --- a/examples/gan_wasserstein_synthetic.py +++ b/examples/gan_wasserstein_synthetic.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Wasserstein generative adversarial network for toy Gaussian data (Arjovsky et al., 2017). A gradient penalty is used to approximate the 1-Lipschitz functional family in the Wasserstein distance (Gulrajani diff --git a/examples/invgamma_normal_mh.py b/examples/invgamma_normal_mh.py index f959830db..d542ce486 100644 --- a/examples/invgamma_normal_mh.py +++ b/examples/invgamma_normal_mh.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """ InverseGamma-Normal model Posterior inference with Metropolis Hastings diff --git a/examples/irt.py b/examples/irt.py index 5955b2492..306a63bc8 100644 --- a/examples/irt.py +++ b/examples/irt.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Bayesian Item Response Theory (IRT) Mixed Effects Model using variational inference. diff --git a/examples/iwvi.py b/examples/iwvi.py index 53d6cc300..c11f926d9 100644 --- a/examples/iwvi.py +++ b/examples/iwvi.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """A demo of how to develop new inference algorithms in Edward. Here we implement importance-weighted variational inference. We test it on logistic regression. diff --git a/examples/lstm.py b/examples/lstm.py index 045871b97..762c90752 100644 --- a/examples/lstm.py +++ b/examples/lstm.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """LSTM language model on text8. Default hyperparameters achieve ~78.4 NLL at epoch 50, ~76.1423 NLL at diff --git a/examples/mixture_gaussian_gibbs.py b/examples/mixture_gaussian_gibbs.py index f31e6cb0a..af66b309b 100644 --- a/examples/mixture_gaussian_gibbs.py +++ b/examples/mixture_gaussian_gibbs.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Mixture of Gaussians, with block Gibbs for inference. """ from __future__ import absolute_import diff --git a/examples/mixture_gaussian_mh.py b/examples/mixture_gaussian_mh.py index bdd125117..5bccca711 100644 --- a/examples/mixture_gaussian_mh.py +++ b/examples/mixture_gaussian_mh.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Mixture of Gaussians. Perform inference with Metropolis-Hastings. It utterly fails. This is diff --git a/examples/normal.py b/examples/normal.py index fbe32e8cf..03fdc7b62 100644 --- a/examples/normal.py +++ b/examples/normal.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Correlated normal posterior. Inference with Hamiltonian Monte Carlo. """ from __future__ import absolute_import diff --git a/examples/normal_normal.py b/examples/normal_normal.py index 3765068c5..215090b8c 100644 --- a/examples/normal_normal.py +++ b/examples/normal_normal.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Normal-normal model using Hamiltonian Monte Carlo.""" from __future__ import absolute_import from __future__ import division diff --git a/examples/normal_sgld.py b/examples/normal_sgld.py index ac5cb5011..aad47d3d2 100644 --- a/examples/normal_sgld.py +++ b/examples/normal_sgld.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Correlated normal posterior. Inference with stochastic gradient Langevin dynamics. """ diff --git a/examples/pp_dirichlet_process.py b/examples/pp_dirichlet_process.py index a61fe0e83..08b071d80 100644 --- a/examples/pp_dirichlet_process.py +++ b/examples/pp_dirichlet_process.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Dirichlet process. We implement sample generation from a Dirichlet process (with no base diff --git a/examples/pp_dynamic_shape.py b/examples/pp_dynamic_shape.py index af2dd11b7..86237b1fc 100644 --- a/examples/pp_dynamic_shape.py +++ b/examples/pp_dynamic_shape.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Dynamic shapes. We build a random variable whose size depends on a sample from another diff --git a/examples/pp_persistent_randomness.py b/examples/pp_persistent_randomness.py index 6ccdb0c51..d2e7e81f7 100644 --- a/examples/pp_persistent_randomness.py +++ b/examples/pp_persistent_randomness.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Persistent randomness. Our language defines random variables. They enable memoization in the diff --git a/examples/pp_stochastic_control_flow.py b/examples/pp_stochastic_control_flow.py index 15bf18edd..60e148e93 100644 --- a/examples/pp_stochastic_control_flow.py +++ b/examples/pp_stochastic_control_flow.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Stochastic control flow. We sample from a geometric random variable by using samples from diff --git a/examples/pp_stochastic_recursion.py b/examples/pp_stochastic_recursion.py index 93e837bf4..40a703ca7 100644 --- a/examples/pp_stochastic_recursion.py +++ b/examples/pp_stochastic_recursion.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Stochastic recursion. We sample from a geometric random variable by using samples from diff --git a/examples/probabilistic_matrix_factorization.py b/examples/probabilistic_matrix_factorization.py index 92e9cccc8..acd51d63d 100644 --- a/examples/probabilistic_matrix_factorization.py +++ b/examples/probabilistic_matrix_factorization.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Probabilistic matrix factorization using variational inference. Visualizes the actual and the estimated rating matrices as heatmaps. diff --git a/examples/probabilistic_pca_subsampling.py b/examples/probabilistic_pca_subsampling.py index aa4157cba..5dd0a6641 100644 --- a/examples/probabilistic_pca_subsampling.py +++ b/examples/probabilistic_pca_subsampling.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Probabilistic principal components analysis (Tipping and Bishop, 1999). Inference uses data subsampling. diff --git a/examples/rasch_model.py b/examples/rasch_model.py index a1c3ea570..65ebf557c 100644 --- a/examples/rasch_model.py +++ b/examples/rasch_model.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Rasch model (Rasch, 1960).""" from __future__ import absolute_import from __future__ import division diff --git a/examples/sigmoid_belief_network.py b/examples/sigmoid_belief_network.py index 0de8b7457..47382d258 100644 --- a/examples/sigmoid_belief_network.py +++ b/examples/sigmoid_belief_network.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Sigmoid belief network (Neal, 1990) trained on the Caltech 101 Silhouettes data set. diff --git a/examples/stochastic_block_model.py b/examples/stochastic_block_model.py index 28e23ecb3..cba80a867 100644 --- a/examples/stochastic_block_model.py +++ b/examples/stochastic_block_model.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Stochastic block model.""" from __future__ import absolute_import from __future__ import division diff --git a/examples/vae.py b/examples/vae.py index 610947c3f..1029cfc8a 100644 --- a/examples/vae.py +++ b/examples/vae.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Variational auto-encoder for MNIST data. References diff --git a/examples/vae_convolutional.py b/examples/vae_convolutional.py index e53733904..537de5343 100644 --- a/examples/vae_convolutional.py +++ b/examples/vae_convolutional.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Convolutional variational auto-encoder for binarized MNIST. The neural networks are written with TensorFlow Slim. diff --git a/examples/vae_convolutional_prettytensor.py b/examples/vae_convolutional_prettytensor.py index d373a84bf..bcab70260 100644 --- a/examples/vae_convolutional_prettytensor.py +++ b/examples/vae_convolutional_prettytensor.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Convolutional variational auto-encoder for binarized MNIST. The neural networks are written with Pretty Tensor. diff --git a/tests/data/generate_test_saver.py b/tests/data/generate_test_saver.py index 0dc8dd15b..d5353f7bd 100644 --- a/tests/data/generate_test_saver.py +++ b/tests/data/generate_test_saver.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Generate `test_saver`.""" from __future__ import absolute_import from __future__ import division diff --git a/tests/data/generate_toy_data_tfrecords.py b/tests/data/generate_toy_data_tfrecords.py index 2e2a4f560..6c3d91e61 100644 --- a/tests/data/generate_toy_data_tfrecords.py +++ b/tests/data/generate_toy_data_tfrecords.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Generate `toy_data.tfrecords`.""" from __future__ import absolute_import from __future__ import division From a02787ca7de4b08d25e0d4cffd2c1582d5655d71 Mon Sep 17 00:00:00 2001 From: Dustin Tran Date: Sun, 28 Jan 2018 15:23:35 -0800 Subject: [PATCH 3/3] update examples/ --- examples/bayesian_linear_regression.py | 109 +++---- ...bayesian_linear_regression_implicitklqp.py | 137 +++++---- examples/bayesian_logistic_regression.py | 106 ++++--- examples/bayesian_nn.py | 121 ++++---- examples/beta_bernoulli.py | 72 +++-- examples/beta_bernoulli_conjugate.py | 38 +-- examples/bigan.py | 167 +++++----- examples/cox_process.py | 43 +-- examples/deep_exponential_family.py | 289 +++++++++--------- examples/dirichlet_categorical.py | 39 ++- 10 files changed, 597 insertions(+), 524 deletions(-) diff --git a/examples/bayesian_linear_regression.py b/examples/bayesian_linear_regression.py index 038939fd5..8d0a6c997 100644 --- a/examples/bayesian_linear_regression.py +++ b/examples/bayesian_linear_regression.py @@ -19,6 +19,16 @@ from edward.models import Normal, Empirical +tf.flags.DEFINE_integer("N", default=40, help="Number of data points.") +tf.flags.DEFINE_integer("D", default=1, help="Number of features.") +tf.flags.DEFINE_integer("T", default=5000, help="Number of samples.") +tf.flags.DEFINE_integer("nburn", default=100, + help="Number of burn-in samples.") +tf.flags.DEFINE_integer("stride", default=10, + help="Frequency with which to plots samples.") + +FLAGS = tf.flags.FLAGS + def build_toy_dataset(N, noise_std=0.5): X = np.concatenate([np.linspace(0, 2, num=N / 2), @@ -28,73 +38,70 @@ def build_toy_dataset(N, noise_std=0.5): return X, y -ed.set_seed(42) - -N = 40 # number of data points -D = 1 # number of features +def main(_): + ed.set_seed(42) -# DATA -X_train, y_train = build_toy_dataset(N) -X_test, y_test = build_toy_dataset(N) + # DATA + X_train, y_train = build_toy_dataset(FLAGS.N) + X_test, y_test = build_toy_dataset(FLAGS.N) -# MODEL -X = tf.placeholder(tf.float32, [N, D]) -w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) -b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) -y = Normal(loc=ed.dot(X, w) + b, scale=tf.ones(N)) + # MODEL + X = tf.placeholder(tf.float32, [FLAGS.N, FLAGS.D]) + w = Normal(loc=tf.zeros(FLAGS.D), scale=tf.ones(FLAGS.D)) + b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) + y = Normal(loc=ed.dot(X, w) + b, scale=tf.ones(FLAGS.N)) -# INFERENCE -T = 5000 # Number of samples. -nburn = 100 # Number of burn-in samples. -stride = 10 # Frequency with which to plot samples. -qw = Empirical(params=tf.Variable(tf.random_normal([T, D]))) -qb = Empirical(params=tf.Variable(tf.random_normal([T, 1]))) + # INFERENCE + qw = Empirical(params=tf.get_variable("qw/params", [FLAGS.T, FLAGS.D])) + qb = Empirical(params=tf.get_variable("qb/params", [FLAGS.T, 1])) -inference = ed.SGHMC({w: qw, b: qb}, data={X: X_train, y: y_train}) -inference.run(step_size=1e-3) + inference = ed.SGHMC({w: qw, b: qb}, data={X: X_train, y: y_train}) + inference.run(step_size=1e-3) + # CRITICISM -# CRITICISM + # Plot posterior samples. + sns.jointplot(qb.params.eval()[FLAGS.nburn:FLAGS.T:FLAGS.stride], + qw.params.eval()[FLAGS.nburn:FLAGS.T:FLAGS.stride]) + plt.show() -# Plot posterior samples. -sns.jointplot(qb.params.eval()[nburn:T:stride], - qw.params.eval()[nburn:T:stride]) -plt.show() + # Posterior predictive checks. + y_post = ed.copy(y, {w: qw, b: qb}) + # This is equivalent to + # y_post = Normal(loc=ed.dot(X, qw) + qb, scale=tf.ones(FLAGS.N)) -# Posterior predictive checks. -y_post = ed.copy(y, {w: qw, b: qb}) -# This is equivalent to -# y_post = Normal(loc=ed.dot(X, qw) + qb, scale=tf.ones(N)) + print("Mean squared error on test data:") + print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test})) -print("Mean squared error on test data:") -print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test})) + print("Displaying prior predictive samples.") + n_prior_samples = 10 -print("Displaying prior predictive samples.") -n_prior_samples = 10 + w_prior = w.sample(n_prior_samples).eval() + b_prior = b.sample(n_prior_samples).eval() -w_prior = w.sample(n_prior_samples).eval() -b_prior = b.sample(n_prior_samples).eval() + plt.scatter(X_train, y_train) -plt.scatter(X_train, y_train) + inputs = np.linspace(-1, 10, num=400) + for ns in range(n_prior_samples): + output = inputs * w_prior[ns] + b_prior[ns] + plt.plot(inputs, output) -inputs = np.linspace(-1, 10, num=400) -for ns in range(n_prior_samples): - output = inputs * w_prior[ns] + b_prior[ns] - plt.plot(inputs, output) + plt.show() -plt.show() + print("Displaying posterior predictive samples.") + n_posterior_samples = 10 -print("Displaying posterior predictive samples.") -n_posterior_samples = 10 + w_post = qw.sample(n_posterior_samples).eval() + b_post = qb.sample(n_posterior_samples).eval() -w_post = qw.sample(n_posterior_samples).eval() -b_post = qb.sample(n_posterior_samples).eval() + plt.scatter(X_train, y_train) -plt.scatter(X_train, y_train) + inputs = np.linspace(-1, 10, num=400) + for ns in range(n_posterior_samples): + output = inputs * w_post[ns] + b_post[ns] + plt.plot(inputs, output) -inputs = np.linspace(-1, 10, num=400) -for ns in range(n_posterior_samples): - output = inputs * w_post[ns] + b_post[ns] - plt.plot(inputs, output) + plt.show() -plt.show() +if __name__ == "__main__": + tf.app.run() diff --git a/examples/bayesian_linear_regression_implicitklqp.py b/examples/bayesian_linear_regression_implicitklqp.py index b694f990c..41a72a132 100644 --- a/examples/bayesian_linear_regression_implicitklqp.py +++ b/examples/bayesian_linear_regression_implicitklqp.py @@ -24,7 +24,12 @@ import tensorflow as tf from edward.models import Normal -from tensorflow.contrib import slim + +tf.flags.DEFINE_integer("N", default=500, help="Number of data points.") +tf.flags.DEFINE_integer("M", default=50, help="Batch size during training.") +tf.flags.DEFINE_integer("D", default=2, help="Number of features.") + +FLAGS = tf.flags.FLAGS def build_toy_dataset(N, w, noise_std=0.1): @@ -34,22 +39,6 @@ def build_toy_dataset(N, w, noise_std=0.1): return x, y -def ratio_estimator(data, local_vars, global_vars): - """Takes as input a dict of data x, local variable samples z, and - global variable samples beta; outputs real values of shape - (x.shape[0] + z.shape[0],). In this example, there are no local - variables. - """ - # data[y] has shape (M,); global_vars[w] has shape (D,) - # we concatenate w to each data point y, so input has shape (M, 1 + D) - input = tf.concat([ - tf.reshape(data[y], [M, 1]), - tf.tile(tf.reshape(global_vars[w], [1, D]), [M, 1])], 1) - hidden = slim.fully_connected(input, 64, activation_fn=tf.nn.relu) - output = slim.fully_connected(hidden, 1, activation_fn=None) - return output - - def generator(arrays, batch_size): """Generate batches, one with respect to each array's first axis.""" starts = [0] * len(arrays) # pointers to where we are in iteration @@ -69,52 +58,68 @@ def generator(arrays, batch_size): yield batches -ed.set_seed(42) - -N = 500 # number of data points -M = 50 # batch size during training -D = 2 # number of features - -# DATA -w_true = np.ones(D) * 5.0 -X_train, y_train = build_toy_dataset(N, w_true) -X_test, y_test = build_toy_dataset(N, w_true) -data = generator([X_train, y_train], M) - -# MODEL -X = tf.placeholder(tf.float32, [M, D]) -y_ph = tf.placeholder(tf.float32, [M]) -w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) -y = Normal(loc=ed.dot(X, w), scale=tf.ones(M)) - -# INFERENCE -qw = Normal(loc=tf.Variable(tf.random_normal([D]) + 1.0), - scale=tf.nn.softplus(tf.Variable(tf.random_normal([D])))) - -inference = ed.ImplicitKLqp( - {w: qw}, data={y: y_ph}, - discriminator=ratio_estimator, global_vars={w: qw}) -inference.initialize(n_iter=5000, n_print=100, scale={y: float(N) / M}) - -sess = ed.get_session() -tf.global_variables_initializer().run() - -for _ in range(inference.n_iter): - X_batch, y_batch = next(data) - for _ in range(5): - info_dict_d = inference.update( - variables="Disc", feed_dict={X: X_batch, y_ph: y_batch}) - - info_dict = inference.update( - variables="Gen", feed_dict={X: X_batch, y_ph: y_batch}) - info_dict['loss_d'] = info_dict_d['loss_d'] - info_dict['t'] = info_dict['t'] // 6 # say set of 6 updates is 1 iteration - - t = info_dict['t'] - inference.print_progress(info_dict) - if t == 1 or t % inference.n_print == 0: - # Check inferred posterior parameters. - mean, std = sess.run([qw.mean(), qw.stddev()]) - print("\nInferred mean & std:") - print(mean) - print(std) +def main(_): + def ratio_estimator(data, local_vars, global_vars): + """Takes as input a dict of data x, local variable samples z, and + global variable samples beta; outputs real values of shape + (x.shape[0] + z.shape[0],). In this example, there are no local + variables. + """ + # data[y] has shape (M,); global_vars[w] has shape (D,) + # we concatenate w to each data point y, so input has shape (M, 1 + D) + input = tf.concat([ + tf.reshape(data[y], [FLAGS.M, 1]), + tf.tile(tf.reshape(global_vars[w], [1, FLAGS.D]), [FLAGS.M, 1])], 1) + hidden = tf.layers.dense(input, 64, activation=tf.nn.relu) + output = tf.layers.dense(hidden, 1, activation=None) + return output + + ed.set_seed(42) + + # DATA + w_true = np.ones(FLAGS.D) * 5.0 + X_train, y_train = build_toy_dataset(FLAGS.N, w_true) + X_test, y_test = build_toy_dataset(FLAGS.N, w_true) + data = generator([X_train, y_train], FLAGS.M) + + # MODEL + X = tf.placeholder(tf.float32, [FLAGS.M, FLAGS.D]) + y_ph = tf.placeholder(tf.float32, [FLAGS.M]) + w = Normal(loc=tf.zeros(FLAGS.D), scale=tf.ones(FLAGS.D)) + y = Normal(loc=ed.dot(X, w), scale=tf.ones(FLAGS.M)) + + # INFERENCE + qw = Normal(loc=tf.get_variable("qw/loc", [FLAGS.D]) + 1.0, + scale=tf.nn.softplus(tf.get_variable("qw/scale", [FLAGS.D]))) + + inference = ed.ImplicitKLqp( + {w: qw}, data={y: y_ph}, + discriminator=ratio_estimator, global_vars={w: qw}) + inference.initialize(n_iter=5000, n_print=100, + scale={y: float(FLAGS.N) / FLAGS.M}) + + sess = ed.get_session() + tf.global_variables_initializer().run() + + for _ in range(inference.n_iter): + X_batch, y_batch = next(data) + for _ in range(5): + info_dict_d = inference.update( + variables="Disc", feed_dict={X: X_batch, y_ph: y_batch}) + + info_dict = inference.update( + variables="Gen", feed_dict={X: X_batch, y_ph: y_batch}) + info_dict['loss_d'] = info_dict_d['loss_d'] + info_dict['t'] = info_dict['t'] // 6 # say set of 6 updates is 1 iteration + + t = info_dict['t'] + inference.print_progress(info_dict) + if t == 1 or t % inference.n_print == 0: + # Check inferred posterior parameters. + mean, std = sess.run([qw.mean(), qw.stddev()]) + print("\nInferred mean & std:") + print(mean) + print(std) + +if __name__ == "__main__": + tf.app.run() diff --git a/examples/bayesian_logistic_regression.py b/examples/bayesian_logistic_regression.py index a9c123e7d..141116bb7 100644 --- a/examples/bayesian_logistic_regression.py +++ b/examples/bayesian_logistic_regression.py @@ -13,6 +13,12 @@ from edward.models import Bernoulli, Normal, Empirical +tf.flags.DEFINE_integer("N", default=40, help="Number of data points.") +tf.flags.DEFINE_integer("D", default=1, help="Number of features.") +tf.flags.DEFINE_integer("T", default=5000, help="Number of samples.") + +FLAGS = tf.flags.FLAGS + def build_toy_dataset(N, noise_std=0.1): D = 1 @@ -25,68 +31,68 @@ def build_toy_dataset(N, noise_std=0.1): return X, y -ed.set_seed(42) +def main(_): + ed.set_seed(42) -N = 40 # number of data points -D = 1 # number of features + # DATA + X_train, y_train = build_toy_dataset(FLAGS.N) -# DATA -X_train, y_train = build_toy_dataset(N) + # MODEL + X = tf.placeholder(tf.float32, [FLAGS.N, FLAGS.D]) + w = Normal(loc=tf.zeros(FLAGS.D), scale=3.0 * tf.ones(FLAGS.D)) + b = Normal(loc=tf.zeros([]), scale=3.0 * tf.ones([])) + y = Bernoulli(logits=ed.dot(X, w) + b) -# MODEL -X = tf.placeholder(tf.float32, [N, D]) -w = Normal(loc=tf.zeros(D), scale=3.0 * tf.ones(D)) -b = Normal(loc=tf.zeros([]), scale=3.0 * tf.ones([])) -y = Bernoulli(logits=ed.dot(X, w) + b) + # INFERENCE + qw = Empirical(params=tf.get_variable("qw/params", [FLAGS.T, FLAGS.D])) + qb = Empirical(params=tf.get_variable("qb/params", [FLAGS.T])) -# INFERENCE -T = 5000 # number of samples -qw = Empirical(params=tf.Variable(tf.random_normal([T, D]))) -qb = Empirical(params=tf.Variable(tf.random_normal([T]))) + inference = ed.HMC({w: qw, b: qb}, data={X: X_train, y: y_train}) + inference.initialize(n_print=10, step_size=0.6) -inference = ed.HMC({w: qw, b: qb}, data={X: X_train, y: y_train}) -inference.initialize(n_print=10, step_size=0.6) + # Alternatively, use variational inference. + # qw_loc = tf.get_variable("qw_loc", [FLAGS.D]) + # qw_scale = tf.nn.softplus(tf.get_variable("qw_scale", [FLAGS.D])) + # qb_loc = tf.get_variable("qb_loc", []) + 10.0 + # qb_scale = tf.nn.softplus(tf.get_variable("qb_scale", [])) -# Alternatively, use variational inference. -# qw_loc = tf.Variable(tf.random_normal([D])) -# qw_scale = tf.nn.softplus(tf.Variable(tf.random_normal([D]))) -# qb_loc = tf.Variable(tf.random_normal([]) + 10) -# qb_scale = tf.nn.softplus(tf.Variable(tf.random_normal([]))) + # qw = Normal(loc=qw_loc, scale=qw_scale) + # qb = Normal(loc=qb_loc, scale=qb_scale) -# qw = Normal(loc=qw_loc, scale=qw_scale) -# qb = Normal(loc=qb_loc, scale=qb_scale) + # inference = ed.KLqp({w: qw, b: qb}, data={X: X_train, y: y_train}) + # inference.initialize(n_print=10, n_iter=600) -# inference = ed.KLqp({w: qw, b: qb}, data={X: X_train, y: y_train}) -# inference.initialize(n_print=10, n_iter=600) + tf.global_variables_initializer().run() -tf.global_variables_initializer().run() + # Set up figure. + fig = plt.figure(figsize=(8, 8), facecolor='white') + ax = fig.add_subplot(111, frameon=False) + plt.ion() + plt.show(block=False) -# Set up figure. -fig = plt.figure(figsize=(8, 8), facecolor='white') -ax = fig.add_subplot(111, frameon=False) -plt.ion() -plt.show(block=False) + # Build samples from inferred posterior. + n_samples = 50 + inputs = np.linspace(-5, 3, num=400, dtype=np.float32).reshape((400, 1)) + probs = tf.stack([tf.sigmoid(ed.dot(inputs, qw.sample()) + qb.sample()) + for _ in range(n_samples)]) -# Build samples from inferred posterior. -n_samples = 50 -inputs = np.linspace(-5, 3, num=400, dtype=np.float32).reshape((400, 1)) -probs = tf.stack([tf.sigmoid(ed.dot(inputs, qw.sample()) + qb.sample()) - for _ in range(n_samples)]) + for t in range(inference.n_iter): + info_dict = inference.update() + inference.print_progress(info_dict) -for t in range(inference.n_iter): - info_dict = inference.update() - inference.print_progress(info_dict) + if t % inference.n_print == 0: + outputs = probs.eval() - if t % inference.n_print == 0: - outputs = probs.eval() + # Plot data and functions + plt.cla() + ax.plot(X_train[:], y_train, 'bx') + for s in range(n_samples): + ax.plot(inputs[:], outputs[s], alpha=0.2) - # Plot data and functions - plt.cla() - ax.plot(X_train[:], y_train, 'bx') - for s in range(n_samples): - ax.plot(inputs[:], outputs[s], alpha=0.2) + ax.set_xlim([-5, 3]) + ax.set_ylim([-0.5, 1.5]) + plt.draw() + plt.pause(1.0 / 60.0) - ax.set_xlim([-5, 3]) - ax.set_ylim([-0.5, 1.5]) - plt.draw() - plt.pause(1.0 / 60.0) +if __name__ == "__main__": + tf.app.run() diff --git a/examples/bayesian_nn.py b/examples/bayesian_nn.py index 6860c0555..c7a710ffb 100644 --- a/examples/bayesian_nn.py +++ b/examples/bayesian_nn.py @@ -19,6 +19,11 @@ from edward.models import Normal +tf.flags.DEFINE_integer("N", default=40, help="Number of data points.") +tf.flags.DEFINE_integer("D", default=1, help="Number of features.") + +FLAGS = tf.flags.FLAGS + def build_toy_dataset(N=40, noise_std=0.1): D = 1 @@ -30,61 +35,61 @@ def build_toy_dataset(N=40, noise_std=0.1): return X, y -def neural_network(X): - h = tf.tanh(tf.matmul(X, W_0) + b_0) - h = tf.tanh(tf.matmul(h, W_1) + b_1) - h = tf.matmul(h, W_2) + b_2 - return tf.reshape(h, [-1]) - - -ed.set_seed(42) - -N = 40 # number of data points -D = 1 # number of features - -# DATA -X_train, y_train = build_toy_dataset(N) - -# MODEL -with tf.name_scope("model"): - W_0 = Normal(loc=tf.zeros([D, 10]), scale=tf.ones([D, 10]), name="W_0") - W_1 = Normal(loc=tf.zeros([10, 10]), scale=tf.ones([10, 10]), name="W_1") - W_2 = Normal(loc=tf.zeros([10, 1]), scale=tf.ones([10, 1]), name="W_2") - b_0 = Normal(loc=tf.zeros(10), scale=tf.ones(10), name="b_0") - b_1 = Normal(loc=tf.zeros(10), scale=tf.ones(10), name="b_1") - b_2 = Normal(loc=tf.zeros(1), scale=tf.ones(1), name="b_2") - - X = tf.placeholder(tf.float32, [N, D], name="X") - y = Normal(loc=neural_network(X), scale=0.1 * tf.ones(N), name="y") - -# INFERENCE -with tf.name_scope("posterior"): - with tf.name_scope("qW_0"): - qW_0 = Normal(loc=tf.Variable(tf.random_normal([D, 10]), name="loc"), - scale=tf.nn.softplus( - tf.Variable(tf.random_normal([D, 10]), name="scale"))) - with tf.name_scope("qW_1"): - qW_1 = Normal(loc=tf.Variable(tf.random_normal([10, 10]), name="loc"), - scale=tf.nn.softplus( - tf.Variable(tf.random_normal([10, 10]), name="scale"))) - with tf.name_scope("qW_2"): - qW_2 = Normal(loc=tf.Variable(tf.random_normal([10, 1]), name="loc"), - scale=tf.nn.softplus( - tf.Variable(tf.random_normal([10, 1]), name="scale"))) - with tf.name_scope("qb_0"): - qb_0 = Normal(loc=tf.Variable(tf.random_normal([10]), name="loc"), - scale=tf.nn.softplus( - tf.Variable(tf.random_normal([10]), name="scale"))) - with tf.name_scope("qb_1"): - qb_1 = Normal(loc=tf.Variable(tf.random_normal([10]), name="loc"), - scale=tf.nn.softplus( - tf.Variable(tf.random_normal([10]), name="scale"))) - with tf.name_scope("qb_2"): - qb_2 = Normal(loc=tf.Variable(tf.random_normal([1]), name="loc"), - scale=tf.nn.softplus( - tf.Variable(tf.random_normal([1]), name="scale"))) - -inference = ed.KLqp({W_0: qW_0, b_0: qb_0, - W_1: qW_1, b_1: qb_1, - W_2: qW_2, b_2: qb_2}, data={X: X_train, y: y_train}) -inference.run(logdir='log') +def main(_): + def neural_network(X): + h = tf.tanh(tf.matmul(X, W_0) + b_0) + h = tf.tanh(tf.matmul(h, W_1) + b_1) + h = tf.matmul(h, W_2) + b_2 + return tf.reshape(h, [-1]) + ed.set_seed(42) + + # DATA + X_train, y_train = build_toy_dataset(FLAGS.N) + + # MODEL + with tf.name_scope("model"): + W_0 = Normal(loc=tf.zeros([FLAGS.D, 10]), scale=tf.ones([FLAGS.D, 10]), + name="W_0") + W_1 = Normal(loc=tf.zeros([10, 10]), scale=tf.ones([10, 10]), name="W_1") + W_2 = Normal(loc=tf.zeros([10, 1]), scale=tf.ones([10, 1]), name="W_2") + b_0 = Normal(loc=tf.zeros(10), scale=tf.ones(10), name="b_0") + b_1 = Normal(loc=tf.zeros(10), scale=tf.ones(10), name="b_1") + b_2 = Normal(loc=tf.zeros(1), scale=tf.ones(1), name="b_2") + + X = tf.placeholder(tf.float32, [FLAGS.N, FLAGS.D], name="X") + y = Normal(loc=neural_network(X), scale=0.1 * tf.ones(FLAGS.N), name="y") + + # INFERENCE + with tf.variable_scope("posterior"): + with tf.variable_scope("qW_0"): + loc = tf.get_variable("loc", [FLAGS.D, 10]) + scale = tf.nn.softplus(tf.get_variable("scale", [FLAGS.D, 10])) + qW_0 = Normal(loc=loc, scale=scale) + with tf.variable_scope("qW_1"): + loc = tf.get_variable("loc", [10, 10]) + scale = tf.nn.softplus(tf.get_variable("scale", [10, 10])) + qW_1 = Normal(loc=loc, scale=scale) + with tf.variable_scope("qW_2"): + loc = tf.get_variable("loc", [10, 1]) + scale = tf.nn.softplus(tf.get_variable("scale", [10, 1])) + qW_2 = Normal(loc=loc, scale=scale) + with tf.variable_scope("qb_0"): + loc = tf.get_variable("loc", [10]) + scale = tf.nn.softplus(tf.get_variable("scale", [10])) + qb_0 = Normal(loc=loc, scale=scale) + with tf.variable_scope("qb_1"): + loc = tf.get_variable("loc", [10]) + scale = tf.nn.softplus(tf.get_variable("scale", [10])) + qb_1 = Normal(loc=loc, scale=scale) + with tf.variable_scope("qb_2"): + loc = tf.get_variable("loc", [1]) + scale = tf.nn.softplus(tf.get_variable("scale", [1])) + qb_2 = Normal(loc=loc, scale=scale) + + inference = ed.KLqp({W_0: qW_0, b_0: qb_0, + W_1: qW_1, b_1: qb_1, + W_2: qW_2, b_2: qb_2}, data={X: X_train, y: y_train}) + inference.run(logdir='log') + +if __name__ == "__main__": + tf.app.run() diff --git a/examples/beta_bernoulli.py b/examples/beta_bernoulli.py index a21f62740..c3a674091 100644 --- a/examples/beta_bernoulli.py +++ b/examples/beta_bernoulli.py @@ -11,36 +11,42 @@ from edward.models import Bernoulli, Beta, Empirical -ed.set_seed(42) - -# DATA -x_data = np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 1]) - -# MODEL -p = Beta(1.0, 1.0) -x = Bernoulli(probs=p, sample_shape=10) - -# INFERENCE -qp = Empirical(params=tf.Variable(tf.zeros([1000]) + 0.5)) - -proposal_p = Beta(3.0, 9.0) - -inference = ed.MetropolisHastings({p: qp}, {p: proposal_p}, data={x: x_data}) -inference.run() - -# CRITICISM -# exact posterior has mean 0.25 and std 0.12 -sess = ed.get_session() -mean, stddev = sess.run([qp.mean(), qp.stddev()]) -print("Inferred posterior mean:") -print(mean) -print("Inferred posterior stddev:") -print(stddev) - -x_post = ed.copy(x, {p: qp}) -tx_rep, tx = ed.ppc( - lambda xs, zs: tf.reduce_mean(tf.cast(xs[x_post], tf.float32)), - data={x_post: x_data}) -ed.ppc_stat_hist_plot( - tx[0], tx_rep, stat_name=r'$T \equiv$mean', bins=10) -plt.show() + +def main(_): + ed.set_seed(42) + + # DATA + x_data = np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 1]) + + # MODEL + p = Beta(1.0, 1.0) + x = Bernoulli(probs=p, sample_shape=10) + + # INFERENCE + qp = Empirical(params=tf.get_variable( + "qp/params", [1000], initializer=tf.constant_initializer(0.5))) + + proposal_p = Beta(3.0, 9.0) + + inference = ed.MetropolisHastings({p: qp}, {p: proposal_p}, data={x: x_data}) + inference.run() + + # CRITICISM + # exact posterior has mean 0.25 and std 0.12 + sess = ed.get_session() + mean, stddev = sess.run([qp.mean(), qp.stddev()]) + print("Inferred posterior mean:") + print(mean) + print("Inferred posterior stddev:") + print(stddev) + + x_post = ed.copy(x, {p: qp}) + tx_rep, tx = ed.ppc( + lambda xs, zs: tf.reduce_mean(tf.cast(xs[x_post], tf.float32)), + data={x_post: x_data}) + ed.ppc_stat_hist_plot( + tx[0], tx_rep, stat_name=r'$T \equiv$mean', bins=10) + plt.show() + +if __name__ == "__main__": + tf.app.run() diff --git a/examples/beta_bernoulli_conjugate.py b/examples/beta_bernoulli_conjugate.py index dbff3010a..9dc586766 100644 --- a/examples/beta_bernoulli_conjugate.py +++ b/examples/beta_bernoulli_conjugate.py @@ -13,25 +13,29 @@ from edward.models import Bernoulli, Beta -ed.set_seed(42) -# DATA -x_data = np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 1]) +def main(_): + ed.set_seed(42) -# MODEL -p = Beta(1.0, 1.0) -x = Bernoulli(probs=p, sample_shape=10) + # DATA + x_data = np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 1]) -# COMPLETE CONDITIONAL -p_cond = ed.complete_conditional(p) + # MODEL + p = Beta(1.0, 1.0) + x = Bernoulli(probs=p, sample_shape=10) -sess = ed.get_session() -tf.global_variables_initializer().run() + # COMPLETE CONDITIONAL + p_cond = ed.complete_conditional(p) -print('p(probs | x) type:', p_cond.parameters['name']) -param_vals = sess.run({key: val for - key, val in six.iteritems(p_cond.parameters) - if isinstance(val, tf.Tensor)}, {x: x_data}) -print('parameters:') -for key, val in six.iteritems(param_vals): - print('%s:\t%.3f' % (key, val)) + sess = ed.get_session() + + print('p(probs | x) type:', p_cond.parameters['name']) + param_vals = sess.run({key: val for + key, val in six.iteritems(p_cond.parameters) + if isinstance(val, tf.Tensor)}, {x: x_data}) + print('parameters:') + for key, val in six.iteritems(param_vals): + print('%s:\t%.3f' % (key, val)) + +if __name__ == "__main__": + tf.app.run() diff --git a/examples/bigan.py b/examples/bigan.py index c857bfa9e..373c1fd70 100644 --- a/examples/bigan.py +++ b/examples/bigan.py @@ -14,9 +14,20 @@ import tensorflow as tf from observations import mnist -from tensorflow.contrib import slim -leak = 0.2 # leak parameter for leakyReLU +tf.flags.DEFINE_string("data_dir", default="tmp/data", help="") +tf.flags.DEFINE_string("out_dir", default="tmp/out", help="") +tf.flags.DEFINE_integer("M", default=100, help="Batch size during training.") +tf.flags.DEFINE_integer("d", default=50, help="Latent dimension.") +tf.flags.DEFINE_float("leak", default=0.2, + help="Leak parameter for leakyReLU.") +tf.flags.DEFINE_integer("hidden_units", default=300, help="") +tf.flags.DEFINE_float("encoder_variance", default=0.01, + help="Set to 0 for deterministic encoder.") + +FLAGS = tf.flags.FLAGS +if not os.path.exists(FLAGS.out_dir): + os.makedirs(FLAGS.out_dir) def generator(array, batch_size): @@ -36,27 +47,28 @@ def generator(array, batch_size): yield batch -def leakyrelu(x, alpha=leak): +def leakyrelu(x, alpha=FLAGS.leak): return tf.maximum(x, alpha * x) def gen_latent(x, hidden_units): - h = slim.fully_connected(x, hidden_units, activation_fn=leakyrelu) - z = slim.fully_connected(h, d, activation_fn=None) - return z + np.sqrt(encoder_variance) * np.random.normal(0.0, 1.0, np.shape(z)) + net = tf.layers.dense(x, hidden_units, activation=leakyrelu) + net = tf.layers.dense(net, FLAGS.d, activation=None) + return (net + np.sqrt(FLAGS.encoder_variance) * + np.random.normal(0.0, 1.0, np.shape(z))) def gen_data(z, hidden_units): - h = slim.fully_connected(z, hidden_units, activation_fn=leakyrelu) - x = slim.fully_connected(h, 784, activation_fn=tf.sigmoid) - return x + net = tf.layers.dense(z, hidden_units, activation=leakyrelu) + net = tf.layers.dense(net, 784, activation=tf.sigmoid) + return net def discriminative_network(x, y): # Discriminator must output probability in logits - inputs = tf.concat([x, y], 1) - h1 = slim.fully_connected(inputs, hidden_units, activation_fn=leakyrelu) - logit = slim.fully_connected(h1, 1, activation_fn=None) + net = tf.concat([x, y], 1) + net = tf.layers.dense(net, FLAGS.hidden_units, activation=leakyrelu) + net = tf.layers.dense(net, 1, activation=None) return logit @@ -77,71 +89,66 @@ def plot(samples): return fig -ed.set_seed(42) - -data_dir = "/tmp/data" -out_dir = "/tmp/out" -if not os.path.exists(out_dir): - os.makedirs(out_dir) -M = 100 # batch size during training -d = 50 # latent dimension -hidden_units = 300 -encoder_variance = 0.01 # Set to 0 for deterministic encoder - -# DATA. MNIST batches are fed at training time. -(x_train, _), (x_test, _) = mnist(data_dir) -x_train_generator = generator(x_train, M) -x_ph = tf.placeholder(tf.float32, [M, 784]) -z_ph = tf.placeholder(tf.float32, [M, d]) - -# MODEL -with tf.variable_scope("Gen"): - xf = gen_data(z_ph, hidden_units) - zf = gen_latent(x_ph, hidden_units) - -# INFERENCE: -optimizer = tf.train.AdamOptimizer() -optimizer_d = tf.train.AdamOptimizer() -inference = ed.BiGANInference( - latent_vars={zf: z_ph}, data={xf: x_ph}, - discriminator=discriminative_network) - -inference.initialize( - optimizer=optimizer, optimizer_d=optimizer_d, n_iter=100000, n_print=3000) - -sess = ed.get_session() -init_op = tf.global_variables_initializer() -sess.run(init_op) - -idx = np.random.randint(M, size=16) -i = 0 -for t in range(inference.n_iter): - if t % inference.n_print == 1: - - samples = sess.run(xf, feed_dict={z_ph: z_batch}) - samples = samples[idx, ] - fig = plot(samples) - plt.savefig(os.path.join(out_dir, '{}{}.png').format( - 'Generated', str(i).zfill(3)), bbox_inches='tight') - plt.close(fig) - - fig = plot(x_batch[idx, ]) - plt.savefig(os.path.join(out_dir, '{}{}.png').format( - 'Base', str(i).zfill(3)), bbox_inches='tight') - plt.close(fig) - - zsam = sess.run(zf, feed_dict={x_ph: x_batch}) - reconstructions = sess.run(xf, feed_dict={z_ph: zsam}) - reconstructions = reconstructions[idx, ] - fig = plot(reconstructions) - plt.savefig(os.path.join(out_dir, '{}{}.png').format( - 'Reconstruct', str(i).zfill(3)), bbox_inches='tight') - plt.close(fig) - - i += 1 - - x_batch = next(x_train_generator) - z_batch = np.random.normal(0, 1, [M, d]) - - info_dict = inference.update(feed_dict={x_ph: x_batch, z_ph: z_batch}) - inference.print_progress(info_dict) +def main(_): + ed.set_seed(42) + + # DATA. MNIST batches are fed at training time. + (x_train, _), (x_test, _) = mnist(FLAGS.data_dir) + x_train_generator = generator(x_train, FLAGS.M) + x_ph = tf.placeholder(tf.float32, [FLAGS.M, 784]) + z_ph = tf.placeholder(tf.float32, [FLAGS.M, FLAGS.d]) + + # MODEL + with tf.variable_scope("Gen"): + xf = gen_data(z_ph, FLAGS.hidden_units) + zf = gen_latent(x_ph, FLAGS.hidden_units) + + # INFERENCE: + optimizer = tf.train.AdamOptimizer() + optimizer_d = tf.train.AdamOptimizer() + inference = ed.BiGANInference( + latent_vars={zf: z_ph}, data={xf: x_ph}, + discriminator=discriminative_network) + + inference.initialize( + optimizer=optimizer, optimizer_d=optimizer_d, n_iter=100000, n_print=3000) + + sess = ed.get_session() + init_op = tf.global_variables_initializer() + sess.run(init_op) + + idx = np.random.randint(FLAGS.M, size=16) + i = 0 + for t in range(inference.n_iter): + if t % inference.n_print == 1: + + samples = sess.run(xf, feed_dict={z_ph: z_batch}) + samples = samples[idx, ] + fig = plot(samples) + plt.savefig(os.path.join(FLAGS.out_dir, '{}{}.png').format( + 'Generated', str(i).zfill(3)), bbox_inches='tight') + plt.close(fig) + + fig = plot(x_batch[idx, ]) + plt.savefig(os.path.join(FLAGS.out_dir, '{}{}.png').format( + 'Base', str(i).zfill(3)), bbox_inches='tight') + plt.close(fig) + + zsam = sess.run(zf, feed_dict={x_ph: x_batch}) + reconstructions = sess.run(xf, feed_dict={z_ph: zsam}) + reconstructions = reconstructions[idx, ] + fig = plot(reconstructions) + plt.savefig(os.path.join(FLAGS.out_dir, '{}{}.png').format( + 'Reconstruct', str(i).zfill(3)), bbox_inches='tight') + plt.close(fig) + + i += 1 + + x_batch = next(x_train_generator) + z_batch = np.random.normal(0, 1, [FLAGS.M, FLAGS.d]) + + info_dict = inference.update(feed_dict={x_ph: x_batch, z_ph: z_batch}) + inference.print_progress(info_dict) + +if __name__ == "__main__": + tf.app.run() diff --git a/examples/cox_process.py b/examples/cox_process.py index f1ddb263a..1a6f50dbb 100644 --- a/examples/cox_process.py +++ b/examples/cox_process.py @@ -28,6 +28,11 @@ from edward.util import rbf from scipy.stats import multivariate_normal, poisson +tf.flags.DEFINE_integer("N", default=308, help="Number of NBA players.") +tf.flags.DEFINE_integer("V", default=2, help="Number of shot locations.") + +FLAGS = tf.flags.FLAGS + def build_toy_dataset(N, V): """A simulator mimicking the data set from 2015-2016 NBA season with @@ -42,26 +47,30 @@ def build_toy_dataset(N, V): return x -ed.set_seed(42) -N = 308 # number of NBA players -V = 2 # number of shot locations +def main(_): + ed.set_seed(42) + + # DATA + x_data = build_toy_dataset(FLAGS.N, FLAGS.V) -# DATA -x_data = build_toy_dataset(N, V) + # MODEL + x_ph = tf.placeholder(tf.float32, [FLAGS.N, FLAGS.V]) -# MODEL -x_ph = tf.placeholder(tf.float32, [N, V]) # inputs to Gaussian Process + # Form (N, V, V) covariance, one matrix per data point. + K = tf.stack([rbf(tf.reshape(xn, [FLAGS.V, 1])) + tf.diag([1e-6, 1e-6]) + for xn in tf.unstack(x_ph)]) + f = MultivariateNormalTriL(loc=tf.zeros([FLAGS.N, FLAGS.V]), + scale_tril=tf.cholesky(K)) + x = Poisson(rate=tf.exp(f)) -# Form (N, V, V) covariance, one matrix per data point. -K = tf.stack([rbf(tf.reshape(xn, [V, 1])) + tf.diag([1e-6, 1e-6]) - for xn in tf.unstack(x_ph)]) -f = MultivariateNormalTriL(loc=tf.zeros([N, V]), scale_tril=tf.cholesky(K)) -x = Poisson(rate=tf.exp(f)) + # INFERENCE + qf = Normal( + loc=tf.get_variable("qf/loc", [FLAGS.N, FLAGS.V]), + scale=tf.nn.softplus(tf.get_variable("qf/scale", [FLAGS.N, FLAGS.V]))) -# INFERENCE -qf = Normal(loc=tf.Variable(tf.random_normal([N, V])), - scale=tf.nn.softplus(tf.Variable(tf.random_normal([N, V])))) + inference = ed.KLqp({f: qf}, data={x: x_data, x_ph: x_data}) + inference.run(n_iter=5000) -inference = ed.KLqp({f: qf}, data={x: x_data, x_ph: x_data}) -inference.run(n_iter=5000) +if __name__ == "__main__": + tf.app.run() diff --git a/examples/deep_exponential_family.py b/examples/deep_exponential_family.py index 330c9225f..513f36a44 100644 --- a/examples/deep_exponential_family.py +++ b/examples/deep_exponential_family.py @@ -77,141 +77,156 @@ class objects visual from edward.util import Progbar from observations import nips -ed.set_seed(42) - -data_dir = "~/data" -logdir = '~/log/def/' -data_dir = os.path.expanduser(data_dir) -logdir = os.path.expanduser(logdir) - -# DATA -x_train, metadata = nips(data_dir) -documents = metadata['columns'] -words = metadata['rows'] - -# Subset to documents in 2011 and words appearing in at least two -# documents and have a total word count of at least 10. -doc_idx = [i for i, document in enumerate(documents) - if document.startswith('2011')] -documents = [documents[doc] for doc in doc_idx] -x_train = x_train[:, doc_idx] -word_idx = np.logical_and(np.sum(x_train != 0, 1) >= 2, - np.sum(x_train, 1) >= 10) -words = [word for word, idx in zip(words, word_idx) if idx] -x_train = x_train[word_idx, :] -x_train = x_train.T - -N = x_train.shape[0] # number of documents -D = x_train.shape[1] # vocabulary size -K = [100, 30, 15] # number of components per layer -q = 'lognormal' # choice of q; 'lognormal' or 'gamma' -shape = 0.1 # gamma shape parameter -lr = 1e-4 # learning rate step-size - -# MODEL -W2 = Gamma(0.1, 0.3, sample_shape=[K[2], K[1]]) -W1 = Gamma(0.1, 0.3, sample_shape=[K[1], K[0]]) -W0 = Gamma(0.1, 0.3, sample_shape=[K[0], D]) - -z3 = Gamma(0.1, 0.1, sample_shape=[N, K[2]]) -z2 = Gamma(shape, shape / tf.matmul(z3, W2)) -z1 = Gamma(shape, shape / tf.matmul(z2, W1)) -x = Poisson(tf.matmul(z1, W0)) - - -# INFERENCE -def pointmass_q(shape): - min_mean = 1e-3 - mean_init = tf.random_normal(shape) - rv = PointMass(tf.maximum(tf.nn.softplus(tf.Variable(mean_init)), min_mean)) - return rv - - -def gamma_q(shape): - # Parameterize Gamma q's via shape and scale, with softplus unconstraints. - min_shape = 1e-3 - min_scale = 1e-5 - shape_init = 0.5 + 0.1 * tf.random_normal(shape) - scale_init = 0.1 * tf.random_normal(shape) - rv = Gamma(tf.maximum(tf.nn.softplus(tf.Variable(shape_init)), - min_shape), - tf.maximum(1.0 / tf.nn.softplus(tf.Variable(scale_init)), - 1.0 / min_scale)) - return rv - - -def lognormal_q(shape): - min_scale = 1e-5 - loc_init = tf.random_normal(shape) - scale_init = 0.1 * tf.random_normal(shape) - rv = TransformedDistribution( - distribution=Normal( - tf.Variable(loc_init), - tf.maximum(tf.nn.softplus(tf.Variable(scale_init)), min_scale)), - bijector=tf.contrib.distributions.bijectors.Exp()) - return rv - - -qW2 = pointmass_q(W2.shape) -qW1 = pointmass_q(W1.shape) -qW0 = pointmass_q(W0.shape) -if q == 'gamma': - qz3 = gamma_q(z3.shape) - qz2 = gamma_q(z2.shape) - qz1 = gamma_q(z1.shape) -else: - qz3 = lognormal_q(z3.shape) - qz2 = lognormal_q(z2.shape) - qz1 = lognormal_q(z1.shape) - -# We apply variational EM with E-step over local variables -# and M-step to point estimate the global weight matrices. -inference_e = ed.KLqp({z1: qz1, z2: qz2, z3: qz3}, - data={x: x_train, W0: qW0, W1: qW1, W2: qW2}) -inference_m = ed.MAP({W0: qW0, W1: qW1, W2: qW2}, - data={x: x_train, z1: qz1, z2: qz2, z3: qz3}) - -optimizer_e = tf.train.RMSPropOptimizer(lr) -optimizer_m = tf.train.RMSPropOptimizer(lr) +tf.flags.DEFINE_string("data_dir", default="~/data", help="") +tf.flags.DEFINE_string("logdir", default="~/log/def/", help="") +tf.flags.DEFINE_list("K", default=[100, 30, 15], + help="Number of components per layer.") +tf.flags.DEFINE_string("q", default="lognormal", + help="Choice of q; 'lognormal' or 'gamma'.") +tf.flags.DEFINE_float("shape", default=0.1, help="Gamma shape parameter.") +tf.flags.DEFINE_float("lr", default=1e-4, help="Learning rate step-size.") + +FLAGS = tf.flags.FLAGS +FLAGS.data_dir = os.path.expanduser(FLAGS.data_dir) +FLAGS.logdir = os.path.expanduser(FLAGS.logdir) timestamp = datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S") -logdir += timestamp + '_' + '_'.join([str(ks) for ks in K]) + \ - '_q_' + str(q) + '_lr_' + str(lr) -kwargs = {'optimizer': optimizer_e, - 'n_print': 100, - 'logdir': logdir, - 'log_timestamp': False} -if q == 'gamma': - kwargs['n_samples'] = 30 -inference_e.initialize(**kwargs) -inference_m.initialize(optimizer=optimizer_m) - -sess = ed.get_session() -tf.global_variables_initializer().run() - -n_epoch = 20 -n_iter_per_epoch = 10000 -for epoch in range(n_epoch): - print("Epoch {}".format(epoch)) - nll = 0.0 - - pbar = Progbar(n_iter_per_epoch) - for t in range(1, n_iter_per_epoch + 1): - pbar.update(t) - info_dict_e = inference_e.update() - info_dict_m = inference_m.update() - nll += info_dict_e['loss'] - - # Compute perplexity averaged over a number of training iterations. - # The model's negative log-likelihood of data is upper bounded by - # the variational objective. - nll = nll / n_iter_per_epoch - perplexity = np.exp(nll / np.sum(x_train)) - print("Negative log-likelihood <= {:0.3f}".format(nll)) - print("Perplexity <= {:0.3f}".format(perplexity)) - - # Print top 10 words for first 10 topics. - qW0_vals = sess.run(qW0) - for k in range(10): - top_words_idx = qW0_vals[k, :].argsort()[-10:][::-1] - top_words = " ".join([words[i] for i in top_words_idx]) - print("Topic {}: {}".format(k, top_words)) +FLAGS.logdir += timestamp + '_' + '_'.join([str(ks) for ks in FLAGS.K]) + \ + '_q_' + str(FLAGS.q) + '_lr_' + str(FLAGS.lr) + + +def pointmass_q(shape, name=None): + with tf.variable_scope(name, default_name="pointmass_q"): + min_mean = 1e-3 + mean = tf.get_variable("mean", shape) + rv = PointMass(tf.maximum(tf.nn.softplus(mean), min_mean)) + return rv + + +def gamma_q(shape, name=None): + # Parameterize Gamma q's via shape and scale, with softplus unconstraints. + with tf.variable_scope(name, default_name="gamma_q"): + min_shape = 1e-3 + min_scale = 1e-5 + shape_init = 0.5 + 0.1 * tf.random_normal(shape) + scale_init = 0.1 * tf.random_normal(shape) + shape = tf.get_variable("shape", shape, + initializer=tf.constant_initializer(shape_init)) + scale = tf.get_variable("scale", shape, + initializer=tf.constant_initializer(shape_init)) + rv = Gamma(tf.maximum(tf.nn.softplus(shape), min_shape), + tf.maximum(1.0 / tf.nn.softplus(scale), 1.0 / min_scale)) + return rv + + +def lognormal_q(shape, name=None): + with tf.variable_scope(name, default_name="lognormal_q"): + min_scale = 1e-5 + loc_init = tf.random_normal(shape) + scale_init = 0.1 * tf.random_normal(shape) + loc = tf.get_variable("loc", shape, + initializer=tf.constant_initializer(loc_init)) + scale = tf.get_variable("scale", shape, + initializer=tf.constant_initializer(scale_init)) + rv = TransformedDistribution( + distribution=Normal(loc, tf.maximum(tf.nn.softplus(scale), min_scale)), + bijector=tf.contrib.distributions.bijectors.Exp()) + return rv + + +def main(_): + ed.set_seed(42) + + # DATA + x_train, metadata = nips(FLAGS.data_dir) + documents = metadata['columns'] + words = metadata['rows'] + + # Subset to documents in 2011 and words appearing in at least two + # documents and have a total word count of at least 10. + doc_idx = [i for i, document in enumerate(documents) + if document.startswith('2011')] + documents = [documents[doc] for doc in doc_idx] + x_train = x_train[:, doc_idx] + word_idx = np.logical_and(np.sum(x_train != 0, 1) >= 2, + np.sum(x_train, 1) >= 10) + words = [word for word, idx in zip(words, word_idx) if idx] + x_train = x_train[word_idx, :] + x_train = x_train.T + + N = x_train.shape[0] # number of documents + D = x_train.shape[1] # vocabulary size + + # MODEL + W2 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[2], FLAGS.K[1]]) + W1 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[1], FLAGS.K[0]]) + W0 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[0], D]) + + z3 = Gamma(0.1, 0.1, sample_shape=[N, FLAGS.K[2]]) + z2 = Gamma(FLAGS.shape, FLAGS.shape / tf.matmul(z3, W2)) + z1 = Gamma(FLAGS.shape, FLAGS.shape / tf.matmul(z2, W1)) + x = Poisson(tf.matmul(z1, W0)) + + # INFERENCE + qW2 = pointmass_q(W2.shape) + qW1 = pointmass_q(W1.shape) + qW0 = pointmass_q(W0.shape) + if FLAGS.q == 'gamma': + qz3 = gamma_q(z3.shape) + qz2 = gamma_q(z2.shape) + qz1 = gamma_q(z1.shape) + else: + qz3 = lognormal_q(z3.shape) + qz2 = lognormal_q(z2.shape) + qz1 = lognormal_q(z1.shape) + + # We apply variational EM with E-step over local variables + # and M-step to point estimate the global weight matrices. + inference_e = ed.KLqp({z1: qz1, z2: qz2, z3: qz3}, + data={x: x_train, W0: qW0, W1: qW1, W2: qW2}) + inference_m = ed.MAP({W0: qW0, W1: qW1, W2: qW2}, + data={x: x_train, z1: qz1, z2: qz2, z3: qz3}) + + optimizer_e = tf.train.RMSPropOptimizer(FLAGS.lr) + optimizer_m = tf.train.RMSPropOptimizer(FLAGS.lr) + kwargs = {'optimizer': optimizer_e, + 'n_print': 100, + 'logdir': FLAGS.logdir, + 'log_timestamp': False} + if FLAGS.q == 'gamma': + kwargs['n_samples'] = 30 + inference_e.initialize(**kwargs) + inference_m.initialize(optimizer=optimizer_m) + + sess = ed.get_session() + tf.global_variables_initializer().run() + + n_epoch = 20 + n_iter_per_epoch = 10000 + for epoch in range(n_epoch): + print("Epoch {}".format(epoch)) + nll = 0.0 + + pbar = Progbar(n_iter_per_epoch) + for t in range(1, n_iter_per_epoch + 1): + pbar.update(t) + info_dict_e = inference_e.update() + info_dict_m = inference_m.update() + nll += info_dict_e['loss'] + + # Compute perplexity averaged over a number of training iterations. + # The model's negative log-likelihood of data is upper bounded by + # the variational objective. + nll = nll / n_iter_per_epoch + perplexity = np.exp(nll / np.sum(x_train)) + print("Negative log-likelihood <= {:0.3f}".format(nll)) + print("Perplexity <= {:0.3f}".format(perplexity)) + + # Print top 10 words for first 10 topics. + qW0_vals = sess.run(qW0) + for k in range(10): + top_words_idx = qW0_vals[k, :].argsort()[-10:][::-1] + top_words = " ".join([words[i] for i in top_words_idx]) + print("Topic {}: {}".format(k, top_words)) + +if __name__ == "__main__": + tf.app.run() diff --git a/examples/dirichlet_categorical.py b/examples/dirichlet_categorical.py index ea944368f..05a5645dd 100644 --- a/examples/dirichlet_categorical.py +++ b/examples/dirichlet_categorical.py @@ -12,23 +12,32 @@ from edward.models import Categorical, Dirichlet -N = 1000 -K = 4 +tf.flags.DEFINE_integer("N", default=1000, help="") +tf.flags.DEFINE_integer("K", default=4, help="") -# DATA -pi_true = np.random.dirichlet(np.array([20.0, 30.0, 10.0, 10.0])) -z_data = np.array([np.random.choice(K, 1, p=pi_true)[0] for n in range(N)]) -print('pi={}'.format(pi_true)) +FLAGS = tf.flags.FLAGS -# MODEL -pi = Dirichlet(tf.ones(4)) -z = Categorical(probs=tf.ones([N, 1]) * pi) -# INFERENCE -qpi = Dirichlet(tf.nn.softplus(tf.Variable(tf.random_normal([K])))) +def main(_): + # DATA + pi_true = np.random.dirichlet(np.array([20.0, 30.0, 10.0, 10.0])) + z_data = np.array([np.random.choice(FLAGS.K, 1, p=pi_true)[0] + for n in range(FLAGS.N)]) + print('pi={}'.format(pi_true)) -inference = ed.KLqp({pi: qpi}, data={z: z_data}) -inference.run(n_iter=1500, n_samples=30) + # MODEL + pi = Dirichlet(tf.ones(4)) + z = Categorical(probs=tf.ones([FLAGS.N, 1]) * pi) -sess = ed.get_session() -print('Inferred pi={}'.format(sess.run(qpi.mean()))) + # INFERENCE + qpi = Dirichlet(tf.nn.softplus( + tf.get_variable("qpi/concentration", [FLAGS.K]))) + + inference = ed.KLqp({pi: qpi}, data={z: z_data}) + inference.run(n_iter=1500, n_samples=30) + + sess = ed.get_session() + print('Inferred pi={}'.format(sess.run(qpi.mean()))) + +if __name__ == "__main__": + tf.app.run()