In [1]:
import scipy as sp
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import os

from tensorflow.examples.tutorials.mnist import input_data
from util import random_mini_batches

mnist = input_data.read_data_sets("MNIST_data/", one_hot=False)

st = tf.contrib.bayesflow.stochastic_tensor
Normal = tf.contrib.distributions.Normal
Bernoulli = tf.contrib.distributions.Bernoulli

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [2]:
# some constants
LEARNING_RATE = 0.002
BETA1 = 0.6
BATCH_SIZE = 64
EPOCHS = 20
SAVE_SAMPLE_PERIOD = 50

In [3]:
def lrelu(x, alpha =0.2):
    return tf.maximum(alpha*x,x)

if not os.path.exists('samples'):
    os.mkdir('samples')

In [4]:
class ConvLayer(object):
    
    def __init__(
        self,
        name,
        mi, mo,
        apply_batch_norm,
        filter_sz=5,stride=2,
        f=tf.nn.relu,
    ):
        
        self.W = tf.get_variable(
            'W_%s' %name,
            shape = (filter_sz, filter_sz, mi, mo),
            initializer=tf.glorot_uniform_initializer(),
        )
        
        self.b = tf.get_variable(
            'b_%s' %name,
            shape = (mo, ),
            initializer=tf.zeros_initializer(),
        )
        
        self.name = name
        self.f = f
        self.stride=stride
        self.apply_batch_norm = apply_batch_norm
    
    def forward(self, X, reuse, is_training):
        
        conv_out = tf.nn.conv2d(
            X,
            self.W,
            strides=[1,self.stride,self.stride,1],
            padding='SAME'
        )
        
        conv_out = tf.nn.bias_add(conv_out, self.b)
        
        if self.apply_batch_norm:
            
            conv_out = tf.contrib.layers.batch_norm(
                conv_out,
                decay=0.9,
                updates_collections=None,
                epsilon=1e-5,
                scale=True,
                is_training = is_training,
                reuse=reuse,
                scope = self.name,
            )
            
        return self.f(conv_out)
    
    def set_session(self, session):
        
        self.session = session

In [5]:
class DeconvLayer(object):
    
    #fractionally strided convolution
    def __init__(
        self,
        name,
        mi, mo,
        output_shape,
        apply_batch_norm,
        filter_sz=5, stride=2,
        f=tf.nn.relu
    ):
        
        self.W = tf.get_variable(
            'W_%s' %name,
            shape=(filter_sz, filter_sz, mo, mi),
            initializer=tf.random_normal_initializer(stddev=0.02),
        )
        self.b = tf.get_variable(
            'b_%s' %name,
            shape=(mo,),
            initializer=tf.zeros_initializer(),
        )
        
        self.name = name
        self.f = f
        self.stride=stride
        self.output_shape = output_shape
        self.apply_batch_norm = apply_batch_norm
        self.params = [self.W,self.b]

    def forward(self, X, reuse, is_training):

        conv_out = tf.nn.conv2d_transpose(
            value=X,
            filter=self.W,
            output_shape=self.output_shape,
            strides=[1, self.stride, self.stride, 1]
        )

        conv_out = tf.nn.bias_add(conv_out,self.b)
        
        if self.apply_batch_norm:
            conv_out=tf.contrib.layers.batch_norm(
                conv_out,
                decay=0.9,
                updates_collections=None,
                epsilon=1e-5,
                scale=True,
                is_training = is_training,
                reuse=reuse,
                scope = self.name,
            )
        return self.f(conv_out)
    
    def set_session(self, session):
        
        self.session = session

In [6]:
class DenseLayer(object):
    
    def __init__(self,
                 name,
                 mi, mo,
                 apply_batch_norm,
                 f=tf.nn.relu):
        
        #self.mi = mi
        #self.mo = mo
        
        self.W = tf.get_variable(
            'W_%s' %name,
            shape=(mi, mo),
            initializer=tf.glorot_normal_initializer(),
        )
        self.b = tf.get_variable(
            'b_%s' %name,
            shape=(mo, ),
            initializer=tf.zeros_initializer(),
        )
        
        self.f = f
        self.name = name
        self.apply_batch_norm = apply_batch_norm
        
        
    def forward(self, X, reuse, is_training):
        
        Z = tf.matmul(X,self.W) + self.b
        
        if self.apply_batch_norm:
            
            Z = tf.contrib.layers.batch_norm(
                Z,
                decay=0.9,
                updates_collections=None,
                epsilon=1e-5,
                scale=True,
                is_training = is_training,
                reuse=reuse,
                scope = self.name,
            )
            
        return self.f(Z)
    
    def set_session(self, session):
        
        self.session = session

In [7]:
class VariationalAutoencoder:
    
    
    def __init__(self, n_W, n_C, e_sizes, d_sizes):
        
        #size of every layer in the encoder
        #up to the latent layer, decoder
        #will have reverse shape
        self.n_W = n_W
        self.n_C = n_C
        
        self.e_sizes = e_sizes
        self.d_sizes = d_sizes
        self.latent_dims = e_sizes['z']

        
        self.X = tf.placeholder(
            tf.float32,
            shape=(None, n_W, n_W, n_C),
            name='X'
        )
        
        self.batch_sz = tf.placeholder(
            tf.int32,
            shape=(),
            name='batch_sz'
        )
        
        #builds the encoder and outputs a Z distribution
        self.Z = self.build_encoder(self.X, self.e_sizes)
        
        #builds decoder from Z distribution
        logits = self.build_decoder(self.Z, self.d_sizes)
        
        #builds X_hat distribution from decoder output
        self.X_hat_distribution = Bernoulli(logits=logits)
        
        
        #posterior predictive
        
        with tf.variable_scope('encoder') as scope:
            scope.reuse_variables
            self.Z_dist = self.encode(
                self.X, reuse=True, is_training=False,
            )#self.X or something on purpose?

                                                   
        with tf.variable_scope('decoder') as scope:
            scope.reuse_variables()
            sample_logits = self.decode(
                self.Z_dist, reuse=True, is_training=False,
            )
            
        self.posterior_predictive_dist = Bernoulli(logits=sample_logits)
        self.posterior_predictive = self.posterior_predictive_dist.sample()
        self.posterior_predictive_probs = tf.nn.sigmoid(sample_logits)
        
        #prior predictive from prob

        standard_normal = Normal(
          loc=np.zeros(self.latent_dims, dtype=np.float32),
          scale=np.ones(self.latent_dims, dtype=np.float32)
        )

        Z_std = standard_normal.sample(1)

        with tf.variable_scope('decoder') as scope:
            scope.reuse_variables()
            logits_from_prob = self.decode(
                Z_std, reuse=True, is_training=False,
            )
        
        prior_predictive_dist = Bernoulli(logits=logits_from_prob)
        self.prior_predictive = prior_predictive_dist.sample()
        self.prior_predictive_probs = tf.nn.sigmoid(logits_from_prob)


        # prior predictive from input

        self.Z_input = tf.placeholder(tf.float32, shape=(None, self.latent_dims))
        
        with tf.variable_scope('decoder') as scope:
            scope.reuse_variables()    
            logits_from_input = self.decode(
                self.Z_input, reuse=True, is_training=False,
            )
        
        input_predictive_dist = Bernoulli(logits=logits_from_input)
        self.prior_predictive_from_input= input_predictive_dist.sample()
        self.prior_predictive_from_input_probs = tf.nn.sigmoid(logits_from_input)

        
        #cost
        kl = tf.reduce_sum(
            tf.contrib.distributions.kl_divergence(
                self.Z.distribution,
                standard_normal),
            1
        )
        
        
        expected_log_likelihood = tf.reduce_sum(
              self.X_hat_distribution.log_prob(self.X),
              1
        )
        
        self.elbo = tf.reduce_sum(expected_log_likelihood - kl)
        self.train_op = tf.train.AdamOptimizer(
            learning_rate=LEARNING_RATE,
            beta1=BETA1,
        ).minimize(-self.elbo)

    def build_encoder(self, X, e_sizes):
        
        with tf.variable_scope('encoder') as scope:
            
            M_in = self.n_C
            dim = self.n_W
            
            self.e_conv_layers=[]
            count = 0
            
            for M_out, filter_sz, stride, apply_batch_norm in e_sizes['conv_layers']:
                
                name = 'e_conv_layer_%s' %count
                count += 1
                
                layer = ConvLayer(name, M_in, M_out, apply_batch_norm, filter_sz, stride, lrelu)
                self.e_conv_layers.append(layer)
                M_in = M_out
                
                #print('Dim:', dim)
                dim = int(np.ceil(float(dim)/stride))
            
            M_in = M_in*dim*dim
            
            self.e_dense_layers=[]
            
            for M_out, apply_batch_norm in e_sizes['dense_layers']:
                
                name = 'e_dense_layer_%s' %count
                count +=1
                
                layer = DenseLayer(name, M_in, M_out, apply_batch_norm, lrelu)
                self.e_dense_layers.append(layer)
                
                M_in = M_out
        
            #no activation of last layer and need 2
            #times as many units (M means and M stddevs)
            name = 'e_conv_layer_%s' %count
            last_enc_layer = DenseLayer(name, M_in, 2*self.latent_dims, apply_batch_norm=False, f=lambda x: x)
            self.e_dense_layers.append(last_enc_layer)
            
            return self.encode(X)
        
    def encode(self, X, reuse=None, is_training=True):
        #propagate X until end of encoder
        output=X

        for layer in self.e_conv_layers:
            output = layer.forward(output, reuse, is_training)
        
        output = tf.contrib.layers.flatten(output)
        
        for layer in self.e_dense_layers:
            output = layer.forward(output, reuse, is_training)
        
        
        #get means and stddev from last encoder layer
        self.means = output[:, :self.latent_dims]
        self.stddev = tf.nn.softplus(output[:,self.latent_dims:])+1e-6
        
        # get a sample of Z, we need to use a stochastic tensor
        # in order for the errors to be backpropagated past this point
        
        with st.value_type(st.SampleValue()):
            Z = st.StochasticTensor(Normal(loc=self.means, scale=self.stddev))
        
        return Z
    
        #build decoder
    def build_decoder(self, Z, d_sizes):
        
        with tf.variable_scope('decoder') as scope:
            
            dims=[self.n_W]
            dim = self.n_W
            
            for _, _, stride, _ in reversed(d_sizes['conv_layers']):
                dim = int(np.ceil(float(dim)/stride))
                dims.append(dim)
            
            dims = list(reversed(dims))
            #print('Decoder dims:', dims)
            self.d_dims = dims
            
            M_in = self.latent_dims
            self.d_dense_layers =[]
            

            count=0
            for M_out, apply_batch_norm in d_sizes['dense_layers']:
                
                name = 'd_dense_layer_%s' %count
                count +=1
                
                layer = DenseLayer(name, M_in, M_out, apply_batch_norm)
                self.d_dense_layers.append(layer)
                M_in = M_out
                
            M_out = d_sizes['projection']*dims[0]*dims[0]


            #final dense layer
            name = 'dec_layer_%s' %count
            last_dec_layer = DenseLayer(name, M_in, M_out, not d_sizes['bn_after_project'])
            self.d_dense_layers.append(last_dec_layer)
            
            
            #fractionally strided layers
            
            M_in = d_sizes['projection']
            self.d_conv_layers=[]
            
            #unactivated output
            num_relus = len(d_sizes['conv_layers'])-1
            #activation_functions = [tf.nn.relu]*num_relus +[d_sizes['output_activation']]
            activation_functions = [tf.nn.relu]*num_relus +[lambda x: x]           


            for i in range(len(d_sizes['conv_layers'])):
               
                name = 'd_conv_layer_%s' %i
                M_out, filter_sz, stride, apply_batch_norm = d_sizes['conv_layers'][i]
                f = activation_functions[i]
                
                output_shape = [self.batch_sz, dims[i+1], dims[i+1], M_out]
                #print("M_in:", M_in, "M_out:", M_out, "output_shape:", output_shape)
                
                layer = DeconvLayer(
                    name, M_in, M_out, output_shape, apply_batch_norm, filter_sz, stride, f
                )
                self.d_conv_layers.append(layer)
                
            self.d_sizes = d_sizes
            
            return self.decode(Z)
    
    def decode(self, Z, reuse=None, is_training=True):
        
        #dense layers
        output = Z
        
        for layer in self.d_dense_layers:
            output = layer.forward(output, reuse, is_training)

        output = tf.reshape(
            output,
            [-1, self.d_dims[0],self.d_dims[0],self.d_sizes['projection']]
        )

        if self.d_sizes['bn_after_project']:
            output = tf.contrib.layers.batch_norm(
            output,
            decay=0.9, 
            updates_collections=None,
            epsilon=1e-5,
            scale=True,
            is_training=is_training,
            reuse=reuse,
            scope='bn_after_project'
        )        
        #passing to fs-convolutional layers   
        
        for layer in self.d_conv_layers:

            output = layer.forward(output, reuse, is_training)
            
        return output
    
    def set_session(self, session):
        
        self.session = session
        
        for layer in self.e_conv_layers:
            layer.set_session(session)
        for layer in self.e_dense_layers:
            layer.set_session(session)
            
        for layer in self.d_dense_layers:
            layer.set_session(session) 
        for layer in self.d_conv_layers:
            layer.set_session(session)  
        
    def fit(self, X):
        costs = []
        
        n_batches = len(X)//BATCH_SIZE
        print("# batches", n_batches)
        
        total_iters=0
        
        for i in range(EPOCHS):
            print("Epoch", i)
            np.random.shuffle(X)
            
            for j in range(n_batches):
                
                X_batch = X[j*BATCH_SIZE:(j+1)*BATCH_SIZE]
                _, c = self.session.run((self.train_op,self.elbo),feed_dict={self.X:X_batch, self.batch_sz:BATCH_SIZE})
                c /= BATCH_SIZE
                costs.append(c)
                
                if j % 250 == 0:
                    print("on iter %d, cost: %.3f" %(j, c))
                
                total_iters +=1
                if total_iters % SAVE_SAMPLE_PERIOD == 0:
                    print('Saving a sample...')
                    samples = self.sample(64)
                    
                    d = self.n_W
                    flat_image = np.empty((8*d,8*d))
                    
                    k=0
                    for i in range(8):
                        for j in range(8):
                            flat_image[i*d:(i+1)*d, j*d:(j+1)*d] = samples[k].reshape(d, d)
                            k+=1
                            
                    plt.imshow(flat_image, cmap='gray')
                    
                    sp.misc.imsave(
                        'samples/samples_at_iter_%d.png' % total_iters,
                        flat_image,
                    )
            plt.clf()
            plt.plot(costs, label='cost vs iteration')
            plt.legend()
            plt.savefig('cost vs iteration.png')
            
    def sample(self, n):
        Z = np.random.uniform(-1,1, size=(n,self.latent_dims))
        samples = self.session.run(
          self.prior_predictive_from_input_probs,
          feed_dict={self.Z_input: Z, self.batch_sz: n}
        )
        return samples

    def prior_predictive_with_input(self, Z):
        return self.session.run(
          self.prior_predictive_from_input_probs,
          feed_dict={self.Z_input: Z}
        )

    def posterior_predictive_sample(self, X):
        # returns a sample from p(x_new | X)
        return self.session.run(self.posterior_predictive_probs, feed_dict={self.X: X})

    def prior_predictive_sample_with_probs(self):
        # returns a sample from p(x_new | z), z ~ N(0, 1)
        return self.session.run((self.prior_predictive, self.prior_predictive_probs))

In [8]:
def test_vae():

    X_train = mnist.train.images
    #X_train = X_train/255
    
    X_train = X_train.reshape(len(X_train),28,28,1)
    X_train = (X_train>0.5).astype(np.float32)
    
    X_test = mnist.test.images
    #X_test = X_test/255

    X_test = X_test.reshape(len(X_test),28,28,1)
    X_test = (X_test>0.5).astype(np.float32)
    
    n_W = X_train.shape[1]
    n_C = X_train.shape[-1]
    
    
    
    e_sizes = {
        'conv_layers': [(2, 5, 2, False), (64, 5, 2, True)],
        'dense_layers': [(1024, True)],'z': 100
    }
    
    d_sizes = {
        'projection': 128,
        'bn_after_project': False,
        'conv_layers': [(128, 5, 2, True), (n_C, 5, 2, False)],
        'dense_layers': [(1024, True)],
        'output_activation': tf.sigmoid,
    }
    tf.reset_default_graph()
    vae = VariationalAutoencoder(n_W, n_C, e_sizes, d_sizes)
    # set up session and variables for later
    init_op = tf.global_variables_initializer()

    with tf.Session() as sess:
        
        sess.run(init_op)
        vae.set_session(sess)
        vae.fit(X_train)

        done = False
        while not done:
        
            i = np.random.choice(len(X_test))
            x = X_test[i]
            im = vae.posterior_predictive_sample([x]).reshape(28, 28)
            
            plt.subplot(1,2,1)
            plt.imshow(x.reshape(28, 28), cmap='gray')
            plt.title("Original")
            
            plt.subplot(1,2,2)
            plt.imshow(im, cmap='gray')
            plt.title("Sampled")
            plt.show()

            ans = input("Generate another?")
            if ans and ans[0] in ('n' or 'N'):
                done = True

  # plot output from random samples in latent space
        done = False
        while not done:
        
            im, probs = vae.prior_predictive_sample_with_probs()
            
            im = im.reshape(28, 28)
            
            probs = probs.reshape(28, 28)
            
            plt.subplot(1,2,1)
            plt.imshow(im, cmap='gray')
            plt.title("Prior predictive sample")
            
            plt.subplot(1,2,2)
            plt.imshow(probs, cmap='gray')
            plt.title("Prior predictive probs")
            plt.show()

            ans = input("Generate another?")
            if ans and ans[0] in ('n' or 'N'):
                done = True

In [9]:
if __name__=='__main__':
    test_vae()

# batches 859
Epoch 0
on iter 0, cost: -155253.891
Saving a sample...


AttributeError: module 'scipy.misc' has no attribute 'imsave'