In [None]:
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
def sample_data_1():
    count = 100000
    rand = np.random.RandomState(0)
    return [[1.0, 2.0]] + rand.randn(count, 2) * [[5.0, 1.0]]
def sample_data_2():
    count = 100000
    rand = np.random.RandomState(0)
    return [[1.0, 2.0]] + (rand.randn(count, 2) * [[5.0, 1.0]]).dot(
    [[np.sqrt(2) / 2, np.sqrt(2) / 2], [-np.sqrt(2) / 2, np.sqrt(2) / 2]])

In [5]:
data_1 = sample_data_1()
data_1_trn, data_1_val = data_1[:80000], data_1[80000:]

In [31]:
def build_mlp(input_ph, output_dim, scope, num_layers, hidden_dim, activation=tf.tanh, output_activation=None):
    output_ph = input_ph
    with tf.variable_scope(scope):
        for _ in range(num_layers):
            output_ph = tf.layers.dense(output_ph, hidden_dim, activation=activation)
        output_ph = tf.layers.dense(output_ph, output_dim, activation=output_activation)
    return output_ph

$l_{i}(\theta, \phi)=\mathbb{E}_{z \sim q_{\theta}\left(z | x_{i}\right)}\left[-\log p_{\phi}\left(x_{i} | z\right)\right]+K L\left(q_{\theta}\left(z | x_{i}\right) \| p(z)\right)$

In [51]:
class VAE():
    def __init__(self, sess, x_size=2, z_size=2, learning_rate=1e-4):
        self.sess = sess
        self.x = tf.placeholder(tf.float32, (None, x_size), name="x_ph")
        self.prior = self._make_prior(z_size)
        self.z = self.prior.sample(tf.shape(self.x)[0])
        self.encoder = self._make_encoder(self.x, z_size)
        self.decoder = self._make_decoder(self.z, x_size)
        
        self.loss = self._build_loss
        self.op = tf.train.AdamOptimizer(learning_rate).minimize(self.loss)
        
        self.num_samples = tf.placeholder(tf.int32, (), name="num_samples")
        self.z_sp = self.prior.sample(self.num_samples)
        self.decoder_sp = self._make_decoder(self.z_sp, x_size)
        self.x_sp = self.decoder_sp.sample(self.num_samples)
        
    def _build_nn(self, layer_in, output_dim, scope, num_layers=2, hidden_dim=6):
        stats = build_mlp(layer_in, 2 * output_dim, scope, num_layers, hidden_dim)
        mean, std = tf.split(stats, 2, axis=-1)
        dist = tfp.distributions.MultivariateNormalDiag(loc=mean, scale_diag=std, name=scope)
        return dist
    
    def _make_prior(self, z_size):
        mean = tf.Variable(tf.zeros((z_size), tf.float32), name="prior_mean")
        std = tf.Variable(tf.zeros((z_size), tf.float32), name="prior_logstd")
        return tfp.distributions.MultivariateNormalDiag(loc=mean, scale_diag=std, name='prior')
    
    def _make_encoder(self, x, z_size):
        make_encoder = tf.make_template('encoder', self._build_nn)
        encoder = make_encoder(x, z_size, 'z')
        return encoder
    
    def _make_decoder(self, z, x_size):
        make_decoder = tf.make_template('decoder', self._build_nn)
        decoder = make_decoder(z, x_size, 'x')
        return decoder
        
    def _build_loss(self):
        entropy = - self.decoder.log_prob(self.x, name="decoder_log_prob")
        kl = self.encoder.kl_divergence(self.prior)
        loss = tf.reduce_mean(entropy, axis=0) + kl
        return loss
        
    def step(self, batch, with_update=False):
        if with_update:
            loss, _ = self.sess.run([self.loss, self.op], feed_dict={self.x: batch})
        else:
            loss = self.sess.run(self.loss, feed_dict={self.x: batch})
        return loss
    
    def sample(self, num_samples):
        samples = self.sess.run(self.x_sp, feed_dict={self.num_samples: num_samples})
        return samples

In [52]:
def train(sess, data_trn, data_val, batch_size=64, num_epochs=60, 
          log_per_epoch=1, print_per_epoch=1):
    print("building model...")
    model = VAE(sess)
    init_op = tf.initializers.global_variables()
    sess.run(init_op)
    
    loss_trn = []
    loss_val = []
    
    for epoch in range(num_epochs):
        print("epoch {} starts...".format(epoch))
        loss_trn_batch = []
        for batch in np.array_split(data_trn, np.ceil(len(data_trn)/batch_size)):
            loss = model.step(batch, with_update=True)
            loss_trn_batch.append(loss)

        if epoch % log_per_epoch == 0:
            loss_trn.append(np.mean(loss_trn_batch))
            loss_val.append(model.step(data_val, with_update=False))

        if epoch % print_per_epoch == 0:
            print("at epoch", epoch, loss_trn[-1], loss_val[-1])
            
    return loss_trn, loss_val, model

In [53]:
data_trn, data_val = data_1_trn, data_1_val

In [54]:
tf.reset_default_graph()
sess = tf.Session()
loss_trn, loss_val, model = train(sess, data_trn, data_val)

building model...


ValueError: No gradients provided for any variable, check your graph for ops that do not support gradients, between variables [] and loss <bound method VAE._build_loss of <__main__.VAE object at 0x1c2a8ccac8>>.

In [None]:
samples = model.sample(2)