/
tf_bayesian_nn_analytic_kl.py
145 lines (118 loc) · 4.31 KB
/
tf_bayesian_nn_analytic_kl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/env python
"""
Bayesian neural network using mean-field variational inference
(see, e.g., Blundell et al. (2015); Kucukelbir et al. (2016)).
Inspired by autograd's Bayesian neural network example.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import edward as ed
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from edward.models import Normal
from edward.stats import norm
from edward.util import rbf
class BayesianNN:
"""
Bayesian neural network for regressing outputs y on inputs x.
p((x,y), z) = Normal(y | NN(x; z), lik_std) *
Normal(z | 0, prior_std),
where z are neural network weights, and with known likelihood and
prior standard deviations.
Parameters
----------
layer_sizes : list
The size of each layer, ordered from input to output.
nonlinearity : function, optional
Non-linearity after each linear transformation in the neural
network; aka activation function.
lik_std : float, optional
Standard deviation of the normal likelihood; aka noise parameter,
homoscedastic noise, scale parameter.
prior_std : float, optional
Standard deviation of the normal prior on weights; aka L2
regularization parameter, ridge penalty, scale parameter.
"""
def __init__(self, layer_sizes, nonlinearity=tf.tanh,
lik_std=0.1, prior_std=1.0):
self.layer_sizes = layer_sizes
self.nonlinearity = nonlinearity
self.lik_std = lik_std
self.prior_std = prior_std
self.n_layers = len(layer_sizes)
self.weight_dims = list(zip(layer_sizes[:-1], layer_sizes[1:]))
self.n_vars = sum((m + 1) * n for m, n in self.weight_dims)
def unpack_weights(self, zs):
"""Unpack weight matrices and biases from a flattened vector."""
for m, n in self.weight_dims:
yield tf.reshape(zs[:(m * n)], [m, n]), \
tf.reshape(zs[(m * n):(m * n + n)], [n])
zs = zs[(m + 1) * n:]
def neural_network(self, x, zs):
"""Forward pass of the neural net, outputting a vector of
`n_minibatch` elements."""
h = x
for W, b in self.unpack_weights(zs):
h = self.nonlinearity(tf.matmul(h, W) + b)
return tf.squeeze(h) # n_minibatch x 1 to n_minibatch
def log_lik(self, xs, zs):
"""Return scalar, the log-likelihood p(xs | zs)."""
x, y = xs['x'], xs['y']
mu = self.neural_network(x, zs['z'])
log_lik = tf.reduce_sum(norm.logpdf(y, mu, self.lik_std))
return log_lik
def build_toy_dataset(N=40, noise_std=0.1):
D = 1
x = np.concatenate([np.linspace(0, 2, num=N / 2),
np.linspace(6, 8, num=N / 2)])
y = np.cos(x) + norm.rvs(0, noise_std, size=N)
x = (x - 4.0) / 4.0
x = x.reshape((N, D))
return x, y
ed.set_seed(42)
x_train, y_train = build_toy_dataset()
model = BayesianNN(layer_sizes=[1, 10, 10, 1], nonlinearity=rbf)
qz_mu = tf.Variable(tf.random_normal([model.n_vars]))
qz_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([model.n_vars])))
qz = Normal(mu=qz_mu, sigma=qz_sigma)
# Set up figure
fig = plt.figure(figsize=(8, 8), facecolor='white')
ax = fig.add_subplot(111, frameon=False)
plt.ion()
plt.show(block=False)
# model.log_lik() is defined so KLqp will do variational inference
# assuming a standard normal prior on the weights; this enables VI
# with an analytic KL term which provides faster inference.
sess = ed.get_session()
data = {'x': x_train, 'y': y_train}
inference = ed.KLqp({'z': qz}, data, model)
inference.initialize(n_print=10)
init = tf.initialize_all_variables()
init.run()
for t in range(inference.n_iter):
info_dict = inference.update()
inference.print_progress(info_dict)
if t % inference.n_print == 0:
# Sample functions from variational model
mean, std = sess.run([qz.mu, qz.sigma])
rs = np.random.RandomState(0)
zs = rs.randn(10, model.n_vars) * std + mean
zs = tf.convert_to_tensor(zs, dtype=tf.float32)
inputs = np.linspace(-8, 8, num=400, dtype=np.float32)
x = tf.expand_dims(inputs, 1)
mus = []
for z in tf.unpack(zs):
mus += [model.neural_network(x, z)]
outputs = tf.pack(mus).eval()
# Get data
x, y = data['x'], data['y']
# Plot data and functions
plt.cla()
ax.plot(x, y, 'bx')
ax.plot(inputs, outputs.T)
ax.set_xlim([-8, 8])
ax.set_ylim([-2, 3])
plt.draw()
plt.pause(1.0 / 60.0)