In [1]:
#!/usr/bin/env python3

In [2]:
import numpy as np 
import time 
from datetime import timedelta
import tensorflow as tf
from readImages import *
from build_tensorflow_graph import *

In [3]:
#
## ---------- VAE Architecture ----------
#

# Convolution Layer 1
numFilters1 = 32
cnnArchitecture1 = CNN_Architecture(numFilters = numFilters1, 
                                    filterSize = (3, 3), 
                                    strides = 1, 
                                    toPadding = True, 
                                    useReLU = True, 
                                    numInputChannels = 3, 
                                    maxPoolingSize=None)
dcnnArchitecture1 = cnnArchitecture1
dcnnArchitecture1.useReLU = False
# Convolution Layer 2
numFilters2 = 64
cnnArchitecture2 = CNN_Architecture(numFilters = numFilters2, 
                                    filterSize = (3, 3), 
                                    strides = 1, 
                                    toPadding = True, 
                                    useReLU = True, 
                                    numInputChannels = numFilters1, 
                                    maxPoolingSize=None)
dcnnArchitecture2 = cnnArchitecture2
# Convolution Layer 3
numFilters3 = 128
cnnArchitecture3 = CNN_Architecture(numFilters = numFilters3,
                                    filterSize = (3, 3), 
                                    strides = 1, 
                                    toPadding = True, 
                                    useReLU = True, 
                                    numInputChannels = numFilters2, 
                                    maxPoolingSize=None)
dcnnArchitecture3 = cnnArchitecture3
# Fully Connected 1
fc1_size = 2048
# Fully Connected 2
fc2_size = 512
# Lattern Code 
z_dim = 32

In [4]:
#  
#    input layer: 64 * 64 * 3 = 12288
#    conv1 layer: 64 * 64 * 32 = 131072
#    conv2 layer: 64 * 64 * 64 = 262144
#    conv3 layer: 64 * 64 * 128 = 524288
#    fc1_size = 2048
#    fc2_size = 512
#    z_dim = 32

In [5]:
class Image:
    """
    Structure for input images
    """
    size = 64
    numChannels = 3

In [6]:
class VAE:
    def __init__(self, batch_size):
        self.batch_size = batch_size 
        
        # ---------- build model ----------
        bs = self.batch_size
        self.inputImages = tf.placeholder(tf.float32, 
                                          shape=[bs, Image.size, Image.size, Image.numChannels])
        self.lattenCode = tf.placeholder(tf.float32, shape= [bs, z_dim])
        self.mu, self.sigma = self.encoder(self.inputImages, is_training=True, reuse=False)
        z = self.mu + \
               self.sigma * tf.random_normal(tf.shape(self.mu), 0, 1, dtype=tf.float32)
        
        # Reconstruct
        reconstruct = self.decoder(z, is_training=True, reuse=False)
        self.reconstruct = tf.clip_by_value(reconstruct, 1e-8, 1 - 1e-8)
        
        # Define Cost
        regularizer = tf.reduce_sum(tf.exp(self.sigma) - (1 + self.sigma) + self.mu, 
                                 axis = 1)
        lms = tf.reduce_mean(tf.square(self.inputImages - self.reconstruct), axis = [1, 2, 3])
        print(regularizer.get_shape())
        print(lms.get_shape())
        self.cost = regularizer + lms
        optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(self.cost)
        
        # Testing
        self.fake_images = self.decoder(self.lattenCode, is_training=False, reuse=True)
        
        
    def encoder(self, inputLayer, is_training = True, reuse = False):
        with tf.variable_scope("encoder", reuse = reuse):
            conv1, convWeights1 = new_convLayer(inputLayer, cnnArchitecture1, name="en_conv1")
            conv2, convWeights2 = new_convLayer(conv1, cnnArchitecture2, name="en_conv2")
            conv2 = bn(conv2, is_training=is_training, scope="en_bn2")
            conv3, convWeights3 = new_convLayer(conv2, cnnArchitecture3, name="en_conv3")
            conv3 = bn(conv3, is_training=is_training, scope="en_bn3")
            flat_layer, numAttrs = flattenLayer(conv3)
            
            self.flat_numAttrs = (numAttrs)
            
            fc1 = new_fcLayer(flat_layer, 
                              inputChannels = numAttrs, 
                              outputChannels = fc1_size,
                              useReLU=True, 
                              name = "en_fc4")
            fc1 = bn(fc1, is_training=is_training, scope="en_bn4")
            fc2 = new_fcLayer(fc1, 
                              inputChannels = fc1_size, 
                              outputChannels = fc2_size,
                              useReLU=True, 
                              name = "en_fc5")
            fc2 = bn(fc2, is_training=is_training, scope="en_bn5")
            gaussian_para = new_fcLayer(fc2, 
                                        inputChannels = fc2_size, 
                                        outputChannels = z_dim * 2,
                                        useReLU=False, 
                                        name = "en_fc6")
            mean = gaussian_para[:, :z_dim]
            stdev = tf.exp(gaussian_para[:, z_dim:])
            return mean, stdev
        
    def decoder(self, lattenCode, is_training = True, reuse = False):
        with tf.variable_scope("decoder", reuse=reuse):
            dfc2 = new_fcLayer(lattenCode, 
                               inputChannels = z_dim, 
                               outputChannels = fc2_size,
                               useReLU=True, 
                               name = "de_fc1")
            dfc2 = bn(dfc2, is_training=is_training, scope="de_bn1")
            dfc1 = new_fcLayer(dfc2, 
                               inputChannels = fc2_size, 
                               outputChannels = fc1_size, 
                               useReLU = True, 
                               name = "de_fc2")
            dfc1 = bn(dfc1, is_training=is_training, scope="de_bn2")
            dflat = new_fcLayer(dfc1, 
                                inputChannels = fc1_size,  
                                outputChannels = self.flat_numAttrs, 
                                useReLU = True, 
                                name = "de_conv3")
            dflat = bn(dflat, is_training=is_training, scope="de_bn3")
            dconv3 = tf.reshape(dflat, shape=[-1, 64, 64, 128])
            
            dconv2 =  new_dconvLayer(dconv3, dcnnArchitecture3, 
                                     [self.batch_size, 64, 64, 64], name = "de_conv4")
            dconv2 = bn(dconv2, is_training=is_training, scope="de_bn4")
            dconv1 =  new_dconvLayer(dconv2, dcnnArchitecture2, 
                                     [self.batch_size, 64, 64, 32], name = "de_conv5")
            dconv1 = bn(dconv1, is_training=is_training, scope="de_bn5")
            reconstruct = new_dconvLayer(dconv1, dcnnArchitecture1, 
                                         [self.batch_size, 64, 64, 3], name = "de_conv6")
            reconstruct = tf.nn.sigmoid(reconstruct)
            return reconstruct
    def train(self):
        
    def generateFakeImages(self):
        raise NotImplementedError
    

In [7]:
vae = VAE(256)

(256,)
(256,)
