From 054c520974cbf6d642f8cd78ab65c7b0823d15e4 Mon Sep 17 00:00:00 2001 From: Dustin Tran Date: Sun, 28 Jan 2018 15:10:06 -0800 Subject: [PATCH 1/3] update docs --- README.md | 5 +++-- docs/tex/index.tex | 6 +++--- docs/tex/troubleshooting.tex | 11 +++++------ 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 6291d733a..33dd43f77 100644 --- a/README.md +++ b/README.md @@ -15,8 +15,9 @@ It supports __modeling__ with + Directed graphical models + Neural networks (via libraries such as - [Keras](http://keras.io) and [TensorFlow - Slim](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim)) + [`tf.layers`](https://www.tensorflow.org/api_docs/python/tf/layers) + and + [Keras](http://keras.io)) + Implicit generative models + Bayesian nonparametrics and probabilistic programs diff --git a/docs/tex/index.tex b/docs/tex/index.tex index be29c113a..006ab9925 100644 --- a/docs/tex/index.tex +++ b/docs/tex/index.tex @@ -14,9 +14,9 @@ \subsection{A library for probabilistic modeling, inference, and criticism.} \begin{itemize} \item Directed graphical models \item Neural networks (via libraries such as - \href{http://keras.io}{Keras} and - \href{https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim}{TensorFlow - Slim}) + \href{\texttt{tf.layers}}{https://www.tensorflow.org/api_docs/python/tf/layers} + and + \href{http://keras.io}{Keras}) \item Implicit generative models \item Bayesian nonparametrics and probabilistic programs \end{itemize} diff --git a/docs/tex/troubleshooting.tex b/docs/tex/troubleshooting.tex index 678b2acb5..3de76f97f 100644 --- a/docs/tex/troubleshooting.tex +++ b/docs/tex/troubleshooting.tex @@ -47,18 +47,17 @@ \subsubsection{Full Installation} minimal effort under a one-line interface. Observations was originally developed for Edward and it has since become a standalone library for general machine learning. - \item Neural networks are supported through four libraries: + \item Neural networks are supported through any library operating + on TensorFlow. For example: \texttt{tf.layers}, \href{http://keras.io}{Keras} (>=1.0) \begin{lstlisting}[language=JSON] pip install keras==2.0.4 \end{lstlisting} + and \href{https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim}{TensorFlow Slim} - (native in TensorFlow), and - \href{https://github.com/google/prettytensor}{PrettyTensor} (>=0.7.4) -\begin{lstlisting}[language=JSON] -pip install prettytensor -\end{lstlisting} + (native in TensorFlow). + Note that for Keras 2.0.5 and beyond, all neural net layer transformations cannot be directly applied on random variables anymore. For example, if \texttt{x} is a \texttt{ed.RandomVariable} object, one must call \texttt{tf.convert_to_tensor} before applying it to a layer transformation, \texttt{Dense(256)(tf.convert_to_tensor(x))}. See \href{https://github.com/fchollet/keras/issues/6979}{here} for more details. \item Notebooks require From bbb450054d7c339245ceba703ea6cdc90fb2a5f9 Mon Sep 17 00:00:00 2001 From: Dustin Tran Date: Sun, 28 Jan 2018 15:47:52 -0800 Subject: [PATCH 2/3] rm '#!/usr/bin/env python' --- examples/bayesian_linear_regression.py | 1 - examples/bayesian_linear_regression_implicitklqp.py | 1 - examples/bayesian_logistic_regression.py | 1 - examples/bayesian_nn.py | 1 - examples/beta_bernoulli.py | 1 - examples/bigan.py | 1 - examples/cox_process.py | 1 - examples/deep_exponential_family.py | 1 - examples/dirichlet_categorical.py | 1 - examples/factor_analysis.py | 1 - examples/gan_synthetic_data.py | 1 - examples/gan_wasserstein.py | 1 - examples/gan_wasserstein_synthetic.py | 1 - examples/invgamma_normal_mh.py | 1 - examples/irt.py | 1 - examples/iwvi.py | 1 - examples/lstm.py | 1 - examples/mixture_gaussian_gibbs.py | 1 - examples/mixture_gaussian_mh.py | 1 - examples/normal.py | 1 - examples/normal_normal.py | 1 - examples/normal_sgld.py | 1 - examples/pp_dirichlet_process.py | 1 - examples/pp_dynamic_shape.py | 1 - examples/pp_persistent_randomness.py | 1 - examples/pp_stochastic_control_flow.py | 1 - examples/pp_stochastic_recursion.py | 1 - examples/probabilistic_matrix_factorization.py | 1 - examples/probabilistic_pca_subsampling.py | 1 - examples/rasch_model.py | 1 - examples/sigmoid_belief_network.py | 1 - examples/stochastic_block_model.py | 1 - examples/vae.py | 1 - examples/vae_convolutional.py | 1 - examples/vae_convolutional_prettytensor.py | 1 - tests/data/generate_test_saver.py | 1 - tests/data/generate_toy_data_tfrecords.py | 1 - 37 files changed, 37 deletions(-) diff --git a/examples/bayesian_linear_regression.py b/examples/bayesian_linear_regression.py index d814945bb..038939fd5 100644 --- a/examples/bayesian_linear_regression.py +++ b/examples/bayesian_linear_regression.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Bayesian linear regression using stochastic gradient Hamiltonian Monte Carlo. diff --git a/examples/bayesian_linear_regression_implicitklqp.py b/examples/bayesian_linear_regression_implicitklqp.py index 958f52361..b694f990c 100644 --- a/examples/bayesian_linear_regression_implicitklqp.py +++ b/examples/bayesian_linear_regression_implicitklqp.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Bayesian linear regression. Inference uses data subsampling and scales the log-likelihood. diff --git a/examples/bayesian_logistic_regression.py b/examples/bayesian_logistic_regression.py index 65a16be0d..a9c123e7d 100644 --- a/examples/bayesian_logistic_regression.py +++ b/examples/bayesian_logistic_regression.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Bayesian logistic regression using Hamiltonian Monte Carlo. We visualize the fit. diff --git a/examples/bayesian_nn.py b/examples/bayesian_nn.py index e978d5bc2..6860c0555 100644 --- a/examples/bayesian_nn.py +++ b/examples/bayesian_nn.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Bayesian neural network using variational inference (see, e.g., Blundell et al. (2015); Kucukelbir et al. (2016)). diff --git a/examples/beta_bernoulli.py b/examples/beta_bernoulli.py index c602f323c..a21f62740 100644 --- a/examples/beta_bernoulli.py +++ b/examples/beta_bernoulli.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """A simple coin flipping example. Inspired by Stan's toy example. """ from __future__ import absolute_import diff --git a/examples/bigan.py b/examples/bigan.py index 1b16d06e3..c857bfa9e 100644 --- a/examples/bigan.py +++ b/examples/bigan.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Adversarially Learned Inference (Dumoulin et al., 2017), aka Bidirectional Generative Adversarial Networks (Donahue et al., 2017), for joint learning of generator and inference networks for MNIST. diff --git a/examples/cox_process.py b/examples/cox_process.py index 0f59e91e8..f1ddb263a 100644 --- a/examples/cox_process.py +++ b/examples/cox_process.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """A Cox process model for spatial analysis (Cox, 1955; Miller et al., 2014). diff --git a/examples/deep_exponential_family.py b/examples/deep_exponential_family.py index 2fd01c9a2..330c9225f 100644 --- a/examples/deep_exponential_family.py +++ b/examples/deep_exponential_family.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Sparse Gamma deep exponential family (Ranganath et al., 2015). We apply it as a topic model on the collection of NIPS 2011 conference papers. diff --git a/examples/dirichlet_categorical.py b/examples/dirichlet_categorical.py index 81bf04347..ea944368f 100644 --- a/examples/dirichlet_categorical.py +++ b/examples/dirichlet_categorical.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Dirichlet-Categorical model. Posterior inference with Edward's BBVI. diff --git a/examples/factor_analysis.py b/examples/factor_analysis.py index f20f06b4b..ec3ec3d6c 100644 --- a/examples/factor_analysis.py +++ b/examples/factor_analysis.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Logistic factor analysis on MNIST. Using Monte Carlo EM, with HMC for the E-step and MAP for the M-step. We fit to just one data point in MNIST. diff --git a/examples/gan_synthetic_data.py b/examples/gan_synthetic_data.py index e2ab7e9f3..aa56c9b84 100644 --- a/examples/gan_synthetic_data.py +++ b/examples/gan_synthetic_data.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Generative adversarial network for toy Gaussian data (Goodfellow et al., 2014). diff --git a/examples/gan_wasserstein.py b/examples/gan_wasserstein.py index 3dc688bf6..9d7feb40f 100644 --- a/examples/gan_wasserstein.py +++ b/examples/gan_wasserstein.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Wasserstein generative adversarial network for MNIST (Arjovsky et al., 2017). It modifies GANs (Goodfellow et al., 2014) to optimize under the Wasserstein distance. diff --git a/examples/gan_wasserstein_synthetic.py b/examples/gan_wasserstein_synthetic.py index 28aa7f019..a68941a1c 100644 --- a/examples/gan_wasserstein_synthetic.py +++ b/examples/gan_wasserstein_synthetic.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Wasserstein generative adversarial network for toy Gaussian data (Arjovsky et al., 2017). A gradient penalty is used to approximate the 1-Lipschitz functional family in the Wasserstein distance (Gulrajani diff --git a/examples/invgamma_normal_mh.py b/examples/invgamma_normal_mh.py index f959830db..d542ce486 100644 --- a/examples/invgamma_normal_mh.py +++ b/examples/invgamma_normal_mh.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """ InverseGamma-Normal model Posterior inference with Metropolis Hastings diff --git a/examples/irt.py b/examples/irt.py index 5955b2492..306a63bc8 100644 --- a/examples/irt.py +++ b/examples/irt.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Bayesian Item Response Theory (IRT) Mixed Effects Model using variational inference. diff --git a/examples/iwvi.py b/examples/iwvi.py index 53d6cc300..c11f926d9 100644 --- a/examples/iwvi.py +++ b/examples/iwvi.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """A demo of how to develop new inference algorithms in Edward. Here we implement importance-weighted variational inference. We test it on logistic regression. diff --git a/examples/lstm.py b/examples/lstm.py index 045871b97..762c90752 100644 --- a/examples/lstm.py +++ b/examples/lstm.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """LSTM language model on text8. Default hyperparameters achieve ~78.4 NLL at epoch 50, ~76.1423 NLL at diff --git a/examples/mixture_gaussian_gibbs.py b/examples/mixture_gaussian_gibbs.py index f31e6cb0a..af66b309b 100644 --- a/examples/mixture_gaussian_gibbs.py +++ b/examples/mixture_gaussian_gibbs.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Mixture of Gaussians, with block Gibbs for inference. """ from __future__ import absolute_import diff --git a/examples/mixture_gaussian_mh.py b/examples/mixture_gaussian_mh.py index bdd125117..5bccca711 100644 --- a/examples/mixture_gaussian_mh.py +++ b/examples/mixture_gaussian_mh.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Mixture of Gaussians. Perform inference with Metropolis-Hastings. It utterly fails. This is diff --git a/examples/normal.py b/examples/normal.py index fbe32e8cf..03fdc7b62 100644 --- a/examples/normal.py +++ b/examples/normal.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Correlated normal posterior. Inference with Hamiltonian Monte Carlo. """ from __future__ import absolute_import diff --git a/examples/normal_normal.py b/examples/normal_normal.py index 3765068c5..215090b8c 100644 --- a/examples/normal_normal.py +++ b/examples/normal_normal.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Normal-normal model using Hamiltonian Monte Carlo.""" from __future__ import absolute_import from __future__ import division diff --git a/examples/normal_sgld.py b/examples/normal_sgld.py index ac5cb5011..aad47d3d2 100644 --- a/examples/normal_sgld.py +++ b/examples/normal_sgld.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Correlated normal posterior. Inference with stochastic gradient Langevin dynamics. """ diff --git a/examples/pp_dirichlet_process.py b/examples/pp_dirichlet_process.py index a61fe0e83..08b071d80 100644 --- a/examples/pp_dirichlet_process.py +++ b/examples/pp_dirichlet_process.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Dirichlet process. We implement sample generation from a Dirichlet process (with no base diff --git a/examples/pp_dynamic_shape.py b/examples/pp_dynamic_shape.py index af2dd11b7..86237b1fc 100644 --- a/examples/pp_dynamic_shape.py +++ b/examples/pp_dynamic_shape.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Dynamic shapes. We build a random variable whose size depends on a sample from another diff --git a/examples/pp_persistent_randomness.py b/examples/pp_persistent_randomness.py index 6ccdb0c51..d2e7e81f7 100644 --- a/examples/pp_persistent_randomness.py +++ b/examples/pp_persistent_randomness.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Persistent randomness. Our language defines random variables. They enable memoization in the diff --git a/examples/pp_stochastic_control_flow.py b/examples/pp_stochastic_control_flow.py index 15bf18edd..60e148e93 100644 --- a/examples/pp_stochastic_control_flow.py +++ b/examples/pp_stochastic_control_flow.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Stochastic control flow. We sample from a geometric random variable by using samples from diff --git a/examples/pp_stochastic_recursion.py b/examples/pp_stochastic_recursion.py index 93e837bf4..40a703ca7 100644 --- a/examples/pp_stochastic_recursion.py +++ b/examples/pp_stochastic_recursion.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Stochastic recursion. We sample from a geometric random variable by using samples from diff --git a/examples/probabilistic_matrix_factorization.py b/examples/probabilistic_matrix_factorization.py index 92e9cccc8..acd51d63d 100644 --- a/examples/probabilistic_matrix_factorization.py +++ b/examples/probabilistic_matrix_factorization.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Probabilistic matrix factorization using variational inference. Visualizes the actual and the estimated rating matrices as heatmaps. diff --git a/examples/probabilistic_pca_subsampling.py b/examples/probabilistic_pca_subsampling.py index aa4157cba..5dd0a6641 100644 --- a/examples/probabilistic_pca_subsampling.py +++ b/examples/probabilistic_pca_subsampling.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Probabilistic principal components analysis (Tipping and Bishop, 1999). Inference uses data subsampling. diff --git a/examples/rasch_model.py b/examples/rasch_model.py index a1c3ea570..65ebf557c 100644 --- a/examples/rasch_model.py +++ b/examples/rasch_model.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Rasch model (Rasch, 1960).""" from __future__ import absolute_import from __future__ import division diff --git a/examples/sigmoid_belief_network.py b/examples/sigmoid_belief_network.py index 0de8b7457..47382d258 100644 --- a/examples/sigmoid_belief_network.py +++ b/examples/sigmoid_belief_network.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Sigmoid belief network (Neal, 1990) trained on the Caltech 101 Silhouettes data set. diff --git a/examples/stochastic_block_model.py b/examples/stochastic_block_model.py index 28e23ecb3..cba80a867 100644 --- a/examples/stochastic_block_model.py +++ b/examples/stochastic_block_model.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Stochastic block model.""" from __future__ import absolute_import from __future__ import division diff --git a/examples/vae.py b/examples/vae.py index 610947c3f..1029cfc8a 100644 --- a/examples/vae.py +++ b/examples/vae.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Variational auto-encoder for MNIST data. References diff --git a/examples/vae_convolutional.py b/examples/vae_convolutional.py index e53733904..537de5343 100644 --- a/examples/vae_convolutional.py +++ b/examples/vae_convolutional.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Convolutional variational auto-encoder for binarized MNIST. The neural networks are written with TensorFlow Slim. diff --git a/examples/vae_convolutional_prettytensor.py b/examples/vae_convolutional_prettytensor.py index d373a84bf..bcab70260 100644 --- a/examples/vae_convolutional_prettytensor.py +++ b/examples/vae_convolutional_prettytensor.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Convolutional variational auto-encoder for binarized MNIST. The neural networks are written with Pretty Tensor. diff --git a/tests/data/generate_test_saver.py b/tests/data/generate_test_saver.py index 0dc8dd15b..d5353f7bd 100644 --- a/tests/data/generate_test_saver.py +++ b/tests/data/generate_test_saver.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Generate `test_saver`.""" from __future__ import absolute_import from __future__ import division diff --git a/tests/data/generate_toy_data_tfrecords.py b/tests/data/generate_toy_data_tfrecords.py index 2e2a4f560..6c3d91e61 100644 --- a/tests/data/generate_toy_data_tfrecords.py +++ b/tests/data/generate_toy_data_tfrecords.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Generate `toy_data.tfrecords`.""" from __future__ import absolute_import from __future__ import division From 568afac9ac3e6d5af0aae7cd8f837c3f044e4882 Mon Sep 17 00:00:00 2001 From: Dustin Tran Date: Sun, 28 Jan 2018 15:23:35 -0800 Subject: [PATCH 3/3] update examples/ --- examples/bayesian_linear_regression.py | 109 +++++++------- ...bayesian_linear_regression_implicitklqp.py | 137 +++++++++--------- examples/bayesian_logistic_regression.py | 106 +++++++------- 3 files changed, 185 insertions(+), 167 deletions(-) diff --git a/examples/bayesian_linear_regression.py b/examples/bayesian_linear_regression.py index 038939fd5..8d0a6c997 100644 --- a/examples/bayesian_linear_regression.py +++ b/examples/bayesian_linear_regression.py @@ -19,6 +19,16 @@ from edward.models import Normal, Empirical +tf.flags.DEFINE_integer("N", default=40, help="Number of data points.") +tf.flags.DEFINE_integer("D", default=1, help="Number of features.") +tf.flags.DEFINE_integer("T", default=5000, help="Number of samples.") +tf.flags.DEFINE_integer("nburn", default=100, + help="Number of burn-in samples.") +tf.flags.DEFINE_integer("stride", default=10, + help="Frequency with which to plots samples.") + +FLAGS = tf.flags.FLAGS + def build_toy_dataset(N, noise_std=0.5): X = np.concatenate([np.linspace(0, 2, num=N / 2), @@ -28,73 +38,70 @@ def build_toy_dataset(N, noise_std=0.5): return X, y -ed.set_seed(42) - -N = 40 # number of data points -D = 1 # number of features +def main(_): + ed.set_seed(42) -# DATA -X_train, y_train = build_toy_dataset(N) -X_test, y_test = build_toy_dataset(N) + # DATA + X_train, y_train = build_toy_dataset(FLAGS.N) + X_test, y_test = build_toy_dataset(FLAGS.N) -# MODEL -X = tf.placeholder(tf.float32, [N, D]) -w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) -b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) -y = Normal(loc=ed.dot(X, w) + b, scale=tf.ones(N)) + # MODEL + X = tf.placeholder(tf.float32, [FLAGS.N, FLAGS.D]) + w = Normal(loc=tf.zeros(FLAGS.D), scale=tf.ones(FLAGS.D)) + b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) + y = Normal(loc=ed.dot(X, w) + b, scale=tf.ones(FLAGS.N)) -# INFERENCE -T = 5000 # Number of samples. -nburn = 100 # Number of burn-in samples. -stride = 10 # Frequency with which to plot samples. -qw = Empirical(params=tf.Variable(tf.random_normal([T, D]))) -qb = Empirical(params=tf.Variable(tf.random_normal([T, 1]))) + # INFERENCE + qw = Empirical(params=tf.get_variable("qw/params", [FLAGS.T, FLAGS.D])) + qb = Empirical(params=tf.get_variable("qb/params", [FLAGS.T, 1])) -inference = ed.SGHMC({w: qw, b: qb}, data={X: X_train, y: y_train}) -inference.run(step_size=1e-3) + inference = ed.SGHMC({w: qw, b: qb}, data={X: X_train, y: y_train}) + inference.run(step_size=1e-3) + # CRITICISM -# CRITICISM + # Plot posterior samples. + sns.jointplot(qb.params.eval()[FLAGS.nburn:FLAGS.T:FLAGS.stride], + qw.params.eval()[FLAGS.nburn:FLAGS.T:FLAGS.stride]) + plt.show() -# Plot posterior samples. -sns.jointplot(qb.params.eval()[nburn:T:stride], - qw.params.eval()[nburn:T:stride]) -plt.show() + # Posterior predictive checks. + y_post = ed.copy(y, {w: qw, b: qb}) + # This is equivalent to + # y_post = Normal(loc=ed.dot(X, qw) + qb, scale=tf.ones(FLAGS.N)) -# Posterior predictive checks. -y_post = ed.copy(y, {w: qw, b: qb}) -# This is equivalent to -# y_post = Normal(loc=ed.dot(X, qw) + qb, scale=tf.ones(N)) + print("Mean squared error on test data:") + print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test})) -print("Mean squared error on test data:") -print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test})) + print("Displaying prior predictive samples.") + n_prior_samples = 10 -print("Displaying prior predictive samples.") -n_prior_samples = 10 + w_prior = w.sample(n_prior_samples).eval() + b_prior = b.sample(n_prior_samples).eval() -w_prior = w.sample(n_prior_samples).eval() -b_prior = b.sample(n_prior_samples).eval() + plt.scatter(X_train, y_train) -plt.scatter(X_train, y_train) + inputs = np.linspace(-1, 10, num=400) + for ns in range(n_prior_samples): + output = inputs * w_prior[ns] + b_prior[ns] + plt.plot(inputs, output) -inputs = np.linspace(-1, 10, num=400) -for ns in range(n_prior_samples): - output = inputs * w_prior[ns] + b_prior[ns] - plt.plot(inputs, output) + plt.show() -plt.show() + print("Displaying posterior predictive samples.") + n_posterior_samples = 10 -print("Displaying posterior predictive samples.") -n_posterior_samples = 10 + w_post = qw.sample(n_posterior_samples).eval() + b_post = qb.sample(n_posterior_samples).eval() -w_post = qw.sample(n_posterior_samples).eval() -b_post = qb.sample(n_posterior_samples).eval() + plt.scatter(X_train, y_train) -plt.scatter(X_train, y_train) + inputs = np.linspace(-1, 10, num=400) + for ns in range(n_posterior_samples): + output = inputs * w_post[ns] + b_post[ns] + plt.plot(inputs, output) -inputs = np.linspace(-1, 10, num=400) -for ns in range(n_posterior_samples): - output = inputs * w_post[ns] + b_post[ns] - plt.plot(inputs, output) + plt.show() -plt.show() +if __name__ == "__main__": + tf.app.run() diff --git a/examples/bayesian_linear_regression_implicitklqp.py b/examples/bayesian_linear_regression_implicitklqp.py index b694f990c..41a72a132 100644 --- a/examples/bayesian_linear_regression_implicitklqp.py +++ b/examples/bayesian_linear_regression_implicitklqp.py @@ -24,7 +24,12 @@ import tensorflow as tf from edward.models import Normal -from tensorflow.contrib import slim + +tf.flags.DEFINE_integer("N", default=500, help="Number of data points.") +tf.flags.DEFINE_integer("M", default=50, help="Batch size during training.") +tf.flags.DEFINE_integer("D", default=2, help="Number of features.") + +FLAGS = tf.flags.FLAGS def build_toy_dataset(N, w, noise_std=0.1): @@ -34,22 +39,6 @@ def build_toy_dataset(N, w, noise_std=0.1): return x, y -def ratio_estimator(data, local_vars, global_vars): - """Takes as input a dict of data x, local variable samples z, and - global variable samples beta; outputs real values of shape - (x.shape[0] + z.shape[0],). In this example, there are no local - variables. - """ - # data[y] has shape (M,); global_vars[w] has shape (D,) - # we concatenate w to each data point y, so input has shape (M, 1 + D) - input = tf.concat([ - tf.reshape(data[y], [M, 1]), - tf.tile(tf.reshape(global_vars[w], [1, D]), [M, 1])], 1) - hidden = slim.fully_connected(input, 64, activation_fn=tf.nn.relu) - output = slim.fully_connected(hidden, 1, activation_fn=None) - return output - - def generator(arrays, batch_size): """Generate batches, one with respect to each array's first axis.""" starts = [0] * len(arrays) # pointers to where we are in iteration @@ -69,52 +58,68 @@ def generator(arrays, batch_size): yield batches -ed.set_seed(42) - -N = 500 # number of data points -M = 50 # batch size during training -D = 2 # number of features - -# DATA -w_true = np.ones(D) * 5.0 -X_train, y_train = build_toy_dataset(N, w_true) -X_test, y_test = build_toy_dataset(N, w_true) -data = generator([X_train, y_train], M) - -# MODEL -X = tf.placeholder(tf.float32, [M, D]) -y_ph = tf.placeholder(tf.float32, [M]) -w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) -y = Normal(loc=ed.dot(X, w), scale=tf.ones(M)) - -# INFERENCE -qw = Normal(loc=tf.Variable(tf.random_normal([D]) + 1.0), - scale=tf.nn.softplus(tf.Variable(tf.random_normal([D])))) - -inference = ed.ImplicitKLqp( - {w: qw}, data={y: y_ph}, - discriminator=ratio_estimator, global_vars={w: qw}) -inference.initialize(n_iter=5000, n_print=100, scale={y: float(N) / M}) - -sess = ed.get_session() -tf.global_variables_initializer().run() - -for _ in range(inference.n_iter): - X_batch, y_batch = next(data) - for _ in range(5): - info_dict_d = inference.update( - variables="Disc", feed_dict={X: X_batch, y_ph: y_batch}) - - info_dict = inference.update( - variables="Gen", feed_dict={X: X_batch, y_ph: y_batch}) - info_dict['loss_d'] = info_dict_d['loss_d'] - info_dict['t'] = info_dict['t'] // 6 # say set of 6 updates is 1 iteration - - t = info_dict['t'] - inference.print_progress(info_dict) - if t == 1 or t % inference.n_print == 0: - # Check inferred posterior parameters. - mean, std = sess.run([qw.mean(), qw.stddev()]) - print("\nInferred mean & std:") - print(mean) - print(std) +def main(_): + def ratio_estimator(data, local_vars, global_vars): + """Takes as input a dict of data x, local variable samples z, and + global variable samples beta; outputs real values of shape + (x.shape[0] + z.shape[0],). In this example, there are no local + variables. + """ + # data[y] has shape (M,); global_vars[w] has shape (D,) + # we concatenate w to each data point y, so input has shape (M, 1 + D) + input = tf.concat([ + tf.reshape(data[y], [FLAGS.M, 1]), + tf.tile(tf.reshape(global_vars[w], [1, FLAGS.D]), [FLAGS.M, 1])], 1) + hidden = tf.layers.dense(input, 64, activation=tf.nn.relu) + output = tf.layers.dense(hidden, 1, activation=None) + return output + + ed.set_seed(42) + + # DATA + w_true = np.ones(FLAGS.D) * 5.0 + X_train, y_train = build_toy_dataset(FLAGS.N, w_true) + X_test, y_test = build_toy_dataset(FLAGS.N, w_true) + data = generator([X_train, y_train], FLAGS.M) + + # MODEL + X = tf.placeholder(tf.float32, [FLAGS.M, FLAGS.D]) + y_ph = tf.placeholder(tf.float32, [FLAGS.M]) + w = Normal(loc=tf.zeros(FLAGS.D), scale=tf.ones(FLAGS.D)) + y = Normal(loc=ed.dot(X, w), scale=tf.ones(FLAGS.M)) + + # INFERENCE + qw = Normal(loc=tf.get_variable("qw/loc", [FLAGS.D]) + 1.0, + scale=tf.nn.softplus(tf.get_variable("qw/scale", [FLAGS.D]))) + + inference = ed.ImplicitKLqp( + {w: qw}, data={y: y_ph}, + discriminator=ratio_estimator, global_vars={w: qw}) + inference.initialize(n_iter=5000, n_print=100, + scale={y: float(FLAGS.N) / FLAGS.M}) + + sess = ed.get_session() + tf.global_variables_initializer().run() + + for _ in range(inference.n_iter): + X_batch, y_batch = next(data) + for _ in range(5): + info_dict_d = inference.update( + variables="Disc", feed_dict={X: X_batch, y_ph: y_batch}) + + info_dict = inference.update( + variables="Gen", feed_dict={X: X_batch, y_ph: y_batch}) + info_dict['loss_d'] = info_dict_d['loss_d'] + info_dict['t'] = info_dict['t'] // 6 # say set of 6 updates is 1 iteration + + t = info_dict['t'] + inference.print_progress(info_dict) + if t == 1 or t % inference.n_print == 0: + # Check inferred posterior parameters. + mean, std = sess.run([qw.mean(), qw.stddev()]) + print("\nInferred mean & std:") + print(mean) + print(std) + +if __name__ == "__main__": + tf.app.run() diff --git a/examples/bayesian_logistic_regression.py b/examples/bayesian_logistic_regression.py index a9c123e7d..141116bb7 100644 --- a/examples/bayesian_logistic_regression.py +++ b/examples/bayesian_logistic_regression.py @@ -13,6 +13,12 @@ from edward.models import Bernoulli, Normal, Empirical +tf.flags.DEFINE_integer("N", default=40, help="Number of data points.") +tf.flags.DEFINE_integer("D", default=1, help="Number of features.") +tf.flags.DEFINE_integer("T", default=5000, help="Number of samples.") + +FLAGS = tf.flags.FLAGS + def build_toy_dataset(N, noise_std=0.1): D = 1 @@ -25,68 +31,68 @@ def build_toy_dataset(N, noise_std=0.1): return X, y -ed.set_seed(42) +def main(_): + ed.set_seed(42) -N = 40 # number of data points -D = 1 # number of features + # DATA + X_train, y_train = build_toy_dataset(FLAGS.N) -# DATA -X_train, y_train = build_toy_dataset(N) + # MODEL + X = tf.placeholder(tf.float32, [FLAGS.N, FLAGS.D]) + w = Normal(loc=tf.zeros(FLAGS.D), scale=3.0 * tf.ones(FLAGS.D)) + b = Normal(loc=tf.zeros([]), scale=3.0 * tf.ones([])) + y = Bernoulli(logits=ed.dot(X, w) + b) -# MODEL -X = tf.placeholder(tf.float32, [N, D]) -w = Normal(loc=tf.zeros(D), scale=3.0 * tf.ones(D)) -b = Normal(loc=tf.zeros([]), scale=3.0 * tf.ones([])) -y = Bernoulli(logits=ed.dot(X, w) + b) + # INFERENCE + qw = Empirical(params=tf.get_variable("qw/params", [FLAGS.T, FLAGS.D])) + qb = Empirical(params=tf.get_variable("qb/params", [FLAGS.T])) -# INFERENCE -T = 5000 # number of samples -qw = Empirical(params=tf.Variable(tf.random_normal([T, D]))) -qb = Empirical(params=tf.Variable(tf.random_normal([T]))) + inference = ed.HMC({w: qw, b: qb}, data={X: X_train, y: y_train}) + inference.initialize(n_print=10, step_size=0.6) -inference = ed.HMC({w: qw, b: qb}, data={X: X_train, y: y_train}) -inference.initialize(n_print=10, step_size=0.6) + # Alternatively, use variational inference. + # qw_loc = tf.get_variable("qw_loc", [FLAGS.D]) + # qw_scale = tf.nn.softplus(tf.get_variable("qw_scale", [FLAGS.D])) + # qb_loc = tf.get_variable("qb_loc", []) + 10.0 + # qb_scale = tf.nn.softplus(tf.get_variable("qb_scale", [])) -# Alternatively, use variational inference. -# qw_loc = tf.Variable(tf.random_normal([D])) -# qw_scale = tf.nn.softplus(tf.Variable(tf.random_normal([D]))) -# qb_loc = tf.Variable(tf.random_normal([]) + 10) -# qb_scale = tf.nn.softplus(tf.Variable(tf.random_normal([]))) + # qw = Normal(loc=qw_loc, scale=qw_scale) + # qb = Normal(loc=qb_loc, scale=qb_scale) -# qw = Normal(loc=qw_loc, scale=qw_scale) -# qb = Normal(loc=qb_loc, scale=qb_scale) + # inference = ed.KLqp({w: qw, b: qb}, data={X: X_train, y: y_train}) + # inference.initialize(n_print=10, n_iter=600) -# inference = ed.KLqp({w: qw, b: qb}, data={X: X_train, y: y_train}) -# inference.initialize(n_print=10, n_iter=600) + tf.global_variables_initializer().run() -tf.global_variables_initializer().run() + # Set up figure. + fig = plt.figure(figsize=(8, 8), facecolor='white') + ax = fig.add_subplot(111, frameon=False) + plt.ion() + plt.show(block=False) -# Set up figure. -fig = plt.figure(figsize=(8, 8), facecolor='white') -ax = fig.add_subplot(111, frameon=False) -plt.ion() -plt.show(block=False) + # Build samples from inferred posterior. + n_samples = 50 + inputs = np.linspace(-5, 3, num=400, dtype=np.float32).reshape((400, 1)) + probs = tf.stack([tf.sigmoid(ed.dot(inputs, qw.sample()) + qb.sample()) + for _ in range(n_samples)]) -# Build samples from inferred posterior. -n_samples = 50 -inputs = np.linspace(-5, 3, num=400, dtype=np.float32).reshape((400, 1)) -probs = tf.stack([tf.sigmoid(ed.dot(inputs, qw.sample()) + qb.sample()) - for _ in range(n_samples)]) + for t in range(inference.n_iter): + info_dict = inference.update() + inference.print_progress(info_dict) -for t in range(inference.n_iter): - info_dict = inference.update() - inference.print_progress(info_dict) + if t % inference.n_print == 0: + outputs = probs.eval() - if t % inference.n_print == 0: - outputs = probs.eval() + # Plot data and functions + plt.cla() + ax.plot(X_train[:], y_train, 'bx') + for s in range(n_samples): + ax.plot(inputs[:], outputs[s], alpha=0.2) - # Plot data and functions - plt.cla() - ax.plot(X_train[:], y_train, 'bx') - for s in range(n_samples): - ax.plot(inputs[:], outputs[s], alpha=0.2) + ax.set_xlim([-5, 3]) + ax.set_ylim([-0.5, 1.5]) + plt.draw() + plt.pause(1.0 / 60.0) - ax.set_xlim([-5, 3]) - ax.set_ylim([-0.5, 1.5]) - plt.draw() - plt.pause(1.0 / 60.0) +if __name__ == "__main__": + tf.app.run()