In [14]:
import numpy as np
import matplotlib.pyplot as plt
from functools import partial, reduce
import os.path
import os
import h5py
import math

from PIL import Image
from scipy.ndimage import imread

from keras.layers import Flatten, Reshape, Input, Dense, Lambda, Dropout, Activation, BatchNormalization
from keras.layers import Conv2D, Conv2DTranspose, MaxPooling2D, ZeroPadding2D, UpSampling2D
from keras.models import Model, Sequential
from keras.callbacks import TensorBoard, ModelCheckpoint, Callback
from keras.metrics import binary_crossentropy
from keras.engine.topology import Layer
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.advanced_activations import LeakyReLU
import keras.optimizers
from keras import backend as K
from keras.datasets import mnist

from keras.utils.np_utils import to_categorical

K.set_floatx('float32')

%matplotlib inline

We are gonna play around with mnist data to start, just for proof of concept

### Convolutional Variational Auto Encoder

This is the vgg16 building code from HW5

In [2]:
def build_vgg16(img_width, img_height, framework='tf'):

    if framework == 'th':
        # build the VGG16 network in Theano weight ordering mode
        K.set_image_dim_ordering('th')
    else:
        # build the VGG16 network in Tensorflow weight ordering mode
        K.set_image_dim_ordering('tf')
        
    model = Sequential()
    if framework == 'th':
        model.add(ZeroPadding2D((1, 1), input_shape=(3, img_width, img_height)))
    else:
        model.add(ZeroPadding2D((1, 1), input_shape=(img_width, img_height, 3)))
        
    model.add(Conv2D(64, (3, 3), activation='relu', name='conv1_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(64, (3, 3), activation='relu', name='conv1_2'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(128, (3, 3), activation='relu', name='conv2_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(128, (3, 3), activation='relu', name='conv2_2'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(256, (3, 3), activation='relu', name='conv3_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(256, (3, 3), activation='relu', name='conv3_2'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(256, (3, 3), activation='relu', name='conv3_3'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3), activation='relu', name='conv4_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3), activation='relu', name='conv4_2'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3), activation='relu', name='conv4_3'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3), activation='relu', name='conv5_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3), activation='relu', name='conv5_2'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3), activation='relu', name='conv5_3'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    return model

And a helper function to load a new vgg net (also adapted from HW5 code)

In [3]:
def build_vgg(img_width, img_height, weights_path='vgg16_weights.h5'):
    # path to the model weights files.
    weights_path = 'vgg16_weights.h5'
    th_model = build_vgg16(img_width, img_height, 'th')

    # load the weights of the VGG16 networks
    # (trained on ImageNet, won the ILSVRC competition in 2014)
    # note: when there is a complete match between your model definition
    # and your weight savefile, you can simply call model.load_weights(filename)
    assert os.path.exists(weights_path), 'Model weights not found (see "weights_path" variable in script).'
    f = h5py.File(weights_path)
    for k in range(f.attrs['nb_layers']):
        if k >= len(th_model.layers):
            # we don't look at the last (fully-connected) layers in the savefile
            break
        g = f['layer_{}'.format(k)]
        weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
        th_model.layers[k].set_weights(weights)
    f.close()
    print('Theano Model loaded.')
    
    # create tensorflow model and transfer weights from theano
    tf_model = build_vgg16(img_width, img_height, 'tf')

    for th_layer, tf_layer in zip(th_model.layers, tf_model.layers):
        if th_layer.__class__.__name__ == 'Conv2D':
          kernel, bias = th_layer.get_weights()
          kernel = np.transpose(kernel, (2, 3, 1, 0))
          tf_layer.set_weights([kernel, bias])
        else:
          tf_layer.set_weights(tf_layer.get_weights())
        
    return tf_model

#### Custom layer to calculate our VGG CVAE loss

In [26]:
# we need a different loss layer because we are gonna replace pixel-wise loss with feature-wise loss
# because Keras isn't powerful to handle the loss for a CVAE
# we have to create an actual loss layer to calculate it
class VGG_CVAELossLayer(Layer):
    def __init__(self, latent_dim, image_size, kl_lambda = 1, percep_lambda = 1, **kwargs):
        self.latent_dim = latent_dim
        self.image_size = image_size
        
        self.kl_l = kl_lambda
        self.pc_l = percep_lambda
        
        self.is_placeholder = True
        
        super(VGG_CVAELossLayer, self).__init__(**kwargs)
        
    def calculate_kl(self, z_mean, z_log_sigma):
        # this is the Kullback Libeler divergence between the
        # distribution in latent space and the prior
        latent_loss = - 0.5 * K.sum(1 + K.clip(z_log_sigma, 1e-10, 1e10) - K.clip(K.square(z_mean), 1e-10, 1e10) - K.clip(K.exp(z_log_sigma), 1e-10, 1e10), axis = -1)
        
        return latent_loss
    
    def calculate_perceploss(self, real, decoded):
        return K.reduce_mean(K.subtract(real, decoded) ** 2, [1, 2, 3])
    
    def call(self, inputs):
        # first we will calculate kl
        kl_div = self.calculate_kl(inputs[0][:,:self.latent_dim], inputs[1][:,:self.latent_dim])
        
        # then calculate perceptual losses
        batch_size, _, _, _ = inputs[1].get_shape().as_list()
        perceps = (len(inputs) - 1) / 2
        p_loss = K.zeros(batch_size).astype('float32')
        
        for i in range(1, perceps):
            p_loss += self.calculate_perceploss(inputs[i], inputs[i] + perceps)

        p_loss = K.mean(p_loss, axis=-1)
        
        # this is the hacky way to calculate our loss
        # we use the inputs given to calculate the loss
        # and then return it for direct use for fitting
        return (self.kl_l * kl_div + self.pc_l * p_loss)
    
    def compute_output_shape(self, input_shape):
        return (input_shape[0], 1)
    

In [27]:
# helper function to ensure we have the right amount of padding
def build_padding(v_rem, h_rem, mult):
    lay = None
    
    v_pad = 0
    h_pad = 0
    
    if v_rem >= mult:
        v_rem -= mult
        v_pad = 1

    if h_rem >= mult:
        h_rem -= mult
        h_pad = 1
    
    if h_pad or v_pad:
        lay = ZeroPadding2D(padding=(v_pad, h_pad))
        
    return lay, v_rem, h_rem

In [28]:
def dist_sample(latent_dims, inputs):
    z_mean = inputs[:,:latent_dims]
    z_log_sigma = inputs[:,latent_dims:]

    eps = K.random_normal(shape=(latent_dims,), mean=0.0, stddev=1.0, dtype='float32')
    
    return z_mean + (K.exp(z_log_sigma) * eps)

def sample_output_shape(input_shape):
    shape = list(input_shape)
    assert len(shape) == 2
    shape[1] //= 2
    return tuple(shape)
    
def get_layer_output(model, layer_num, learning_phase=True):
    if learning_phase:
        return K.function([model.layers[0].input, K.learning_phase()], [model.layers[layer_num].output])
    else:
        return K.function([model.layers[0].input], [model.layers[layer_num].output])

And finally the code to build the net itself!

In [29]:
class VGG_CVAE(object):
    
    def __init__(self, image_size, latent_dim, int_dim = 256, num_k=32, k_size=4):
        
        self.image_size = image_size
        self.latent_dim = latent_dim
        self.int_dim = int_dim
        self.num_k = num_k
        self.k_size = k_size
        
        self._build_net()
        
    def _build_net(self):
        self._build_encoder()
        self._build_sampler()
        self._build_decoder()
        self.kl_loss = VGG_CVAELossLayer(self.latent_dim, self.image_size, name='cvae_loss')
        
        vgg = build_vgg(self.image_size[0], self.image_size[1])
        for layer in vgg.layers:
            layer.trainable = False
            
        raw_inp = Input(shape=self.image_size)
        
        encoded = self.encoder(raw_inp)
        sample = self.sample_layer(encoded)
        decoded = self.decoder(sample)
        
        vgg_1 = get_layer_output(vgg, 1)
        #vgg_2 = get_layer_output(vgg, 6)
        vgg_3 = get_layer_output(vgg, 11)
        #vgg_4 = get_layer_output(vgg, 18)
        vgg_5 = get_layer_output(vgg, 25)
        
        real_1 = vgg_1(raw_inp)
        real_3 = vgg_3(raw_inp)
        real_5 = vgg_5(raw_inp)
        
        gen_1 = vgg_1(decoded)
        gen_3 = vgg_3(decoded)
        gen_5 = vgg_5(decoded)
        
        loss_layer = VGG_CVAELossLayer(self.latent_dim, self.image_size)([encoded, real_1, real_3, real_5, gen_1, gen_3, gen_5])
        
        self.model = Model(raw_inp, loss_layer)
        
        
    def _build_encoder(self):
        
        c_params = {'padding' : 'same', 'activation' :  LeakyReLU(alpha=0.1)}
        p_params= {'pool_size' : (2,2), 'strides' : (2,2), 'padding' : 'same'}
    
        inp = Input(shape=self.image_size)
    
        # just a series of convolutions and poolings to get the size down
        x = Conv2D(self.num_k, (self.k_size, self.k_size), padding='same')(inp)
        x = BatchNormalization(momentum=0.9)(x)
        x = Activation(LeakyReLU(alpha=0.1))(x)
        x = MaxPooling2D(**p_params)(x)
    
        x = Conv2D(self.num_k * 2, (self.k_size, self.k_size), padding='same')(x)
        x = BatchNormalization(momentum=0.9)(x)
        x = Activation(LeakyReLU(alpha=0.1))(x)
        x = MaxPooling2D(**p_params)(x)
    
        x = Conv2D(self.num_k * 4, (self.k_size, self.k_size), padding='same')(x)
        x = BatchNormalization(momentum=0.9)(x)
        x = Activation(LeakyReLU(alpha=0.1))(x)
        x = MaxPooling2D(**p_params)(x)
    
        x = Conv2D(self.num_k * 4, (self.k_size, self.k_size), padding='same')(x)
        x = BatchNormalization(momentum=0.9)(x)
        x = Activation(LeakyReLU(alpha=0.1))(x)
        x = MaxPooling2D(**p_params)(x)
    
        # then we flatten, and have two consecutive dense layers
        flat = Flatten()(x)
        d = Dense(self.int_dim, activation='relu')(flat)
        d = Dropout(0.2)(d)
        encoded = Dense(self.latent_dim * 2, activation='sigmoid')(d)
    
        self.encoder = Model(inp, encoded)
        
    def _build_sampler(self):
        '''layer to sample z from the latent space'''
        # build a layer to sample from the given probability distribution
        sample_func = partial(dist_sample, self.latent_dim)
        # sometimes python is kind of dumb
        # like right now, partial functions don't have names which messes up keras
        # so let's manually add one
        sample_func.__name__ = 'decoder_sample_func'
        
        self.sample_layer = Lambda(sample_func, output_shape = sample_output_shape)
    
    def _build_decoder(self):
        
        k_size = self.k_size
        # figure out what dimension we want to start with
        # and how much remainder we'll have (for padding)
        v_dim = self.image_size[0] // 16
        h_dim = self.image_size[1] // 16
        v_rem = self.image_size[0] - (v_dim * 16) + 1
        h_rem = self.image_size[1] - (h_dim * 16) + 1
    
        # get input
        sample = Input(shape=(self.latent_dim,))
    
        # upscale with dense, use some dropout to combat mode collapse
        x = Dense(self.int_dim, activation='relu')(sample)
        x = Dense(self.num_k // 2 * v_dim * h_dim, activation = 'relu')(x)
        x = Dropout(0.3)(x)
        x = Reshape((v_dim, h_dim, self.num_k // 2))(x)
    
        # convolutinal upscaling layers
        # padding logic will automatically pad such that the output shape is correct

        x = Conv2DTranspose(self.num_k*4, (self.k_size, self.k_size), strides=(2,2), padding='same')(x)
        x = BatchNormalization(momentum=0.9)(x)
        x = Activation(LeakyReLU(alpha=0.1))(x)
    
        pad, v_rem, h_rem = build_padding(v_rem, h_rem, 8)
        if pad:
            x = pad(x)
        
        x = Conv2DTranspose(self.num_k*2, (self.k_size, self.k_size), strides=(2,2), padding='same')(x)
        x = BatchNormalization(momentum=0.9)(x)
        x = Activation(LeakyReLU(alpha=0.1))(x)
    
        pad, v_rem, h_rem = build_padding(v_rem, h_rem, 4)
        if pad:
            x = pad(x)
    
        x = Conv2DTranspose(self.num_k, (self.k_size, self.k_size), strides=(2,2), padding='same')(x)
        x = BatchNormalization(momentum=0.9)(x)
        x = Activation(LeakyReLU(alpha=0.1))(x)
    
        pad, v_rem, h_rem = build_padding(v_rem, h_rem, 2)
        if pad:
            x = pad(x)
    
        gen = Conv2DTranspose(self.num_k*4, (self.k_size, self.k_size), strides=(2,2), padding='same', activation='sigmoid')(x)
    
        self.decoder = Model(sample, gen)

In [31]:
cvae = VGG_CVAE((64,64,1), 64)

ResourceExhaustedError: OOM when allocating tensor with shape[3,3,256,512]
	 [[Node: conv4_1_1/random_uniform/mul = Mul[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/gpu:0"](conv4_1_1/random_uniform/RandomUniform, conv4_1_1/random_uniform/sub)]]

Caused by op 'conv4_1_1/random_uniform/mul', defined at:
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/tornado/ioloop.py", line 887, in start
    handler_func(fd_obj, events)
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-31-966afd9688e1>", line 1, in <module>
    cvae = VGG_CVAE((64,64,1), 64)
  File "<ipython-input-29-db739693f9ba>", line 11, in __init__
    self._build_net()
  File "<ipython-input-29-db739693f9ba>", line 19, in _build_net
    vgg = build_vgg(self.image_size[0], self.image_size[1])
  File "<ipython-input-3-49540505d1ff>", line 4, in build_vgg
    th_model = build_vgg16(img_width, img_height, 'th')
  File "<ipython-input-2-7650971b9888>", line 36, in build_vgg16
    model.add(Conv2D(512, (3, 3), activation='relu', name='conv4_1'))
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/keras/models.py", line 455, in add
    output_tensor = layer(self.outputs[0])
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/keras/engine/topology.py", line 528, in __call__
    self.build(input_shapes[0])
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/keras/layers/convolutional.py", line 134, in build
    constraint=self.kernel_constraint)
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/keras/engine/topology.py", line 364, in add_weight
    weight = K.variable(initializer(shape), dtype=K.floatx(), name=name)
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/keras/initializers.py", line 205, in __call__
    dtype=dtype, seed=self.seed)
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 3146, in random_uniform
    dtype=dtype, seed=seed)
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/tensorflow/python/ops/random_ops.py", line 245, in random_uniform
    return math_ops.add(rnd * (maxval - minval), minval, name=name)
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 794, in binary_op_wrapper
    return func(x, y, name=name)
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 1015, in _mul_dispatch
    return gen_math_ops._mul(x, y, name=name)
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py", line 1625, in _mul
    result = _op_def_lib.apply_op("Mul", x=x, y=y, name=name)
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 763, in apply_op
    op_def=op_def)
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2327, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/home/jibben/anaconda3/envs/deepcv/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1226, in __init__
    self._traceback = _extract_stack()

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[3,3,256,512]
	 [[Node: conv4_1_1/random_uniform/mul = Mul[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/gpu:0"](conv4_1_1/random_uniform/RandomUniform, conv4_1_1/random_uniform/sub)]]


Let us define a helper function to demo a model on a given dataset

In [5]:
def demo_model(model, data, shape, show_latent=False):
    e = model.layers[1]
    d = model.layers[2]
    
    batch_size = data.shape[0]
    
    vecs = e.predict(data, batch_size = batch_size, verbose=0)
    pred = d.predict(vecs, batch_size = batch_size, verbose=0)
    
    for i in range(batch_size):
        fig = plt.figure()
        a=fig.add_subplot(1,2,1)
        a.spines['top'].set_color('none')
        a.spines['bottom'].set_color('none')
        a.spines['left'].set_color('none')
        a.spines['right'].set_color('none')
        a.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off')
        img = plt.imshow(data[i].reshape(shape))
        a.set_title('input')
        
        a=fig.add_subplot(1,2,2)
        a.spines['top'].set_color('none')
        a.spines['bottom'].set_color('none')
        a.spines['left'].set_color('none')
        a.spines['right'].set_color('none')
        a.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off')
        img = plt.imshow(pred[i].reshape(shape))
        a.set_title('decoded')
        if(show_latent):
            a.set_xlabel(vecs[i])
        
        plt.show()
        

In [6]:
class ImageSaver(Callback):
    ''' Keras Callback to save demo images after each epoch'''
    def __init__(self, image_path, images, shape, period=1):
        self.images = images
        self.batch_size = images.shape[0]
        self.image_path = image_path
        self.shape = shape
        self.period = period
        
        self.dpi = 128
        self.fig_size = ((shape[0] * 4) // self.dpi, int(shape[1] * 1.5 * self.batch_size) // self.dpi)
        
        super(ImageSaver, self).__init__()
        
    def on_epoch_end(self, epoch, logs=None):
        if epoch % self.period == 0:
            vecs = self.model.layers[1].predict(self.images, batch_size=self.batch_size, verbose=0)
            pred = self.model.layers[2].predict(vecs, batch_size=self.batch_size, verbose=0)
        
            fig = plt.figure(figsize=self.fig_size)
            
            for i in range(self.batch_size):
                a = fig.add_subplot(self.batch_size, 2, i * 2 + 1)
                a.spines['top'].set_color('none')
                a.spines['bottom'].set_color('none')
                a.spines['left'].set_color('none')
                a.spines['right'].set_color('none')
                a.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off')
                img = plt.imshow(self.images[i].reshape(shape))
                a.set_title('input')
                
                a = fig.add_subplot(self.batch_size, 2, i * 2 + 2)
                a.spines['top'].set_color('none')
                a.spines['bottom'].set_color('none')
                a.spines['left'].set_color('none')
                a.spines['right'].set_color('none')
                a.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off')
                img = plt.imshow(pred[i].reshape(shape))
                a.set_title('decoded')
            
            fig.show()
            fig.savefig(self.image_path.format(epoch=epoch), dpi=self.dpi)
            

Let's try using this on mnist data, just for a proof of concept

In [None]:
shape = (28,28,1)
latent_dim = 64

model = build_cvae(shape, latent_dim, vgg=False)
model.summary()

In [None]:
(x_train, _), (x_test, _) = mnist.load_data()

x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255
x_train = np.reshape(x_train, (len(x_train), 28, 28, 1))
x_test = np.reshape(x_test, (len(x_test), 28, 28 ,1))

In [None]:
shape = (28,28,1)
latent_dim = 2

model = build_cvae(shape, latent_dim, vgg=False)
model.fit(x_train, np.repeat(1, 60000), epochs=5, batch_size=64, shuffle=True, validation_data=(x_test, np.repeat(1,10000)), callbacks=[TensorBoard(log_dir='/tmp/ae')])

And show what it does to MNIST

In [None]:
demo_model(model, x_test[90:100], (28,28))

Not bad considering we are effectively compressing each image to two dimensions! But let's move on to try it with something a bit more interesting

How about flowers!

In [None]:
shape = (64,64,3)
latent_dim = 128
int_dim = 256
nb_train = 8189
batch_size = 32
epochs = 50

train_data_dir = '../flowers/train'

flower_model = build_cvae(shape, latent_dim, int_dim=int_dim, optimizer='adamax')

flower_train_generator = ImageDataGenerator(rescale=1/255, zoom_range=0.2, horizontal_flip=True).flow_from_directory(
        train_data_dir,
        target_size=(shape[0], shape[1]),
        batch_size = batch_size,
        class_mode='binary')

flower_checkpoint = ModelCheckpoint('./models/flower_weights..hdf5', monitor='loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)

In [None]:
hist = flower_model.fit_generator(
        flower_train_generator,
        steps_per_epoch = nb_train // batch_size,
        epochs = epochs,
        validation_data = None,
        verbose = 1,
        initial_epoch = 0,
        callbacks=[TensorBoard(log_dir='/tmp/flowers')]
)

In [None]:
demo_model(flower_model, next(flower_train_generator)[0], (64,64, 3))

Hmmm, not super happy with the mode collapse we're seeing. Let's try something with more data

How about faces!

In [7]:
shape = (128,128,3)
latent_dim = 256
int_dim = 500
nb_train = 162770
nb_validation = 15000
batch_size = 8
epochs = 50

train_data_dir = '../faces/celebs/train'
validation_data_dir = '../faces/celebs/validation'
test_data_dir = '../faces/celebs/test'
face_model = build_cvae(shape, latent_dim, int_dim=int_dim, optimizer='adamax')

In [8]:
# we have our setup such that we have the first class with no data
# and the second class with all the data
# this way, we can actually use a ImageDataGenerator and get the [1]
# we expect for our fitting, as was done above

face_train_generator = ImageDataGenerator(rescale=1/255).flow_from_directory(
        train_data_dir,
        target_size=(shape[0], shape[1]),
        batch_size=batch_size,
        class_mode='binary')

face_validation_generator = ImageDataGenerator(rescale=1/255).flow_from_directory(
        validation_data_dir,
        target_size=(shape[0], shape[1]),
        batch_size=batch_size,
        class_mode='binary')

face_test_generator = ImageDataGenerator(rescale=1/255).flow_from_directory(
        test_data_dir,
        target_size=(shape[0], shape[1]),
        batch_size=16,
        class_mode=None)

face_checkpoint = ModelCheckpoint('./models/face_weights.hdf5', monitor='loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=5)
face_saver = ImageSaver('./images/face-{epoch:02d}.png', next(face_test_generator), shape, period=1)

Found 162770 images belonging to 2 classes.
Found 19867 images belonging to 2 classes.
Found 19962 images belonging to 2 classes.


In [9]:
hist = face_model.fit_generator(
        face_train_generator,
        steps_per_epoch = nb_train // batch_size,
        epochs = epochs,
        validation_data = face_validation_generator,
        validation_steps = nb_validation // batch_size,
        verbose = 0,
        initial_epoch = 0,
        callbacks=[TensorBoard(log_dir='/tmp/faces'), face_checkpoint, face_saver]
)

KeyboardInterrupt: 

In [None]:
demo_model(face_model, next(face_train_generator)[0], (128,128,3), show_latent=True)

In [None]:
e = face_model.layers[1]
d = face_model.layers[2]

In [None]:
data = next(face_train_generator)[0]
    
batch_size = data.shape[0]
    
vecs = e.predict(data, batch_size = batch_size, verbose=0)
pred = d.predict(vecs, batch_size = batch_size, verbose=0)

In [None]:
    show_latent = False
    for i in range(batch_size):
        fig = plt.figure()
        a=fig.add_subplot(1,2,1)
        a.spines['top'].set_color('none')
        a.spines['bottom'].set_color('none')
        a.spines['left'].set_color('none')
        a.spines['right'].set_color('none')
        a.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off')
        img = plt.imshow(data[i].reshape(shape))
        a.set_title('input')
        
        a=fig.add_subplot(1,2,2)
        a.spines['top'].set_color('none')
        a.spines['bottom'].set_color('none')
        a.spines['left'].set_color('none')
        a.spines['right'].set_color('none')
        a.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off')
        img = plt.imshow(pred[i].reshape(shape))
        a.set_title('decoded')
        if(show_latent):
            a.set_xlabel(vecs[i])
        
        plt.show()

In [None]:
    batch_size = 1
    vecs = np.random.uniform(size=(latent_dim*2,))
    pred = d.predict(rand, batch_size=batch_size, verbose=0)

    show_latent = False
    for i in range(batch_size):
        fig = plt.figure()
        a=fig.add_subplot(1,2,1)
        a.spines['top'].set_color('none')
        a.spines['bottom'].set_color('none')
        a.spines['left'].set_color('none')
        a.spines['right'].set_color('none')
        a.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off')
        img = plt.imshow(data[i].reshape(shape))
        a.set_title('input')
        
        a=fig.add_subplot(1,2,2)
        a.spines['top'].set_color('none')
        a.spines['bottom'].set_color('none')
        a.spines['left'].set_color('none')
        a.spines['right'].set_color('none')
        a.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off')
        img = plt.imshow(pred[i].reshape(shape))
        a.set_title('decoded')
        if(show_latent):
            a.set_xlabel(vecs[i])
        
        plt.show()

In [None]:
def train_generator(train_dir, batch_size):
    files = os.listdir(train_dir)
    
    x_train = []
    
    while 1:
        
        for fname in os.listdir(train_dir):
            x_train.append(imread(os.path.join(train_dir, fname)))
            if len(x_train) >= batch_size:
                print(x_train)
                yield np.array(x_train), np.repeat(1, batch_size)
                x_train = []
            
def valid_generator(valid_dir):
    files = os.listdir(valid_dir)
    
    while 1:
        for fname in files:
            x_valid = imread(os.path.join(train_dir, fname))
            yield x_valid, np.array([1])

In [None]:
t = train_generator('../scrape/flickr/abstract_art', 8)

In [None]:
x_train = []
train_dir = '../scrape/flickr/watercolor'

for fname in os.listdir(train_dir):
    try:
        img = Image.open(os.path.join(train_dir, fname)).resize((128, 128))
        x_train.append(np.array(img).reshape((128,128,3))/256.)
    except:
        print(fname)

x_train = np.array(x_train)
y_train = np.repeat(1, x_train.shape[0])

In [None]:
x_train.shape

In [None]:
shape = (128,128,3)
latent_dim = 128
int_dim = 256

art_model = build_cvae(shape, latent_dim, int_dim=int_dim)

In [None]:
art_model.summary()

In [None]:
art_model.fit(x_train, y_train, epochs=20, batch_size=6, shuffle=True, validation_data=None, callbacks=[TensorBoard(log_dir='/tmp/ae')])

In [None]:
e = art_model.layers[1]
d = art_model.layers[2]

x_train[100:108]

vecs = e.predict(x_train, batch_size=8, verbose=0)
print('vecs')
pred = d.predict(vecs, batch_size=8, verbose=0)

for i in range(10):
    plt.imshow(x_train[i].reshape((128,128,3)))
    plt.axis('on')
    plt.show()
    print(vecs[i])
    plt.imshow(pred[i].reshape((128,128,3)))
    plt.axis('on')
    plt.show()

In [None]:
e = face_model.layers[1]
d = face_model.layers[2]

inp = next(face_train_generator)

vecs = e.predict(inp[0], batch_size=8, verbose=0)
pred = d.predict(vecs, batch_size=8, verbose=0)

for i in range(10):
    plt.imshow(inp[0][i].reshape((178,218,3)))
    plt.axis('on')
    plt.show()
    print(vecs[i])
    plt.imshow(pred[i].reshape((178,218,3)))
    plt.axis('on')
    plt.show()

Let's try using a pre-trained VGG to help with the encoding!

VGG net from hw4

In [None]:
#For this architecture, we already have all the convolutions from the VGGnet
# we just have to shrink it to latent_dim size!
def build_vgg_encoder(vggnet, shape, latent_dim, num_k=64, k_size=3, int_dim=256):
    
    vgg_encoder = build_vgg(shape[0], shape[1])
    
    encoder_top = Sequential()
    encoder_top.add(MaxPooling2D((2, 2), strides=(2, 2), input_shape = vggnet.output_shape[1:]))
    encoder_top.add(Flatten())
    encoder_top.add(Dense(int_dim, activation='relu'))
    encoder_top.add(Dropout(0.25))
    encoder_top.add(Dense(latent_dim * 2))
    
    vggnet.add(encoder_top)
   
    return vgg_encoder

def build_cvae(img_shape, image_size, latent_dim, int_dim=256, num_k=64, optimizer='adagrad'):
    raw_inp = Input(shape=(shape[0], shape[1], shape[2]))

    encoder = build_encoder(shape, latent_dim, num_k=num_k, int_dim=int_dim)
    encoded = encoder(raw_inp)

    decoder = build_decoder(shape, latent_dim, num_k=num_k, int_dim=int_dim)
    decoded = decoder(encoded)


    flat_raw = Flatten()(raw_inp)
    flat_decoded = Flatten()(decoded)
    loss = CVAELossLayer(latent_dim, image_size, name='cvae_loss')([flat_raw, encoded, flat_decoded])

    model = Model(inputs=[raw_inp], outputs=[loss])
    model.compile(optimizer=optimizer, loss={'cvae_loss' : lambda y_true, y_pred: y_pred})
    return model

def build_vgg_cvae

Hmm, I'm not too happy with the mode collapse from CVAE's. Let's try making a GAN now

In [None]:
def build_generator(output_shape, latent_dims, num_k=64, k_size=5, int_dim=256):
    inp = Input(shape=(latent_dims,))
    x = Dense(int_dim, activation='relu')(inp)
    
    v_dim = output_shape[0] // 4
    h_dim = output_shape[1] // 4
    v_rem = output_shape[0] - (v_dim * 4)
    h_rem = output_shape[1] - (h_dim * 4)
    
    x = Dense(num_k // 2 * v_dim * h_dim, activation = 'relu')(x)
    x = Reshape((v_dim, h_dim, num_k // 2))(x)
    x = Dropout(0.4)(x)
    
    int_shape = (output_shape[0], output_shape[1], num_k)
    params = {'activation' : 'relu', 'padding' : 'valid'}
    p_params= {'pool_size' : (2,2), 'strides' : (2,2), 'padding' : 'same'}
    
    x = Conv2DTranspose(num_k, (k_size, k_size), strides=(1,1), activation = 'relu', padding='same')(x)

    x = Conv2DTranspose(num_k*4, (k_size, k_size), strides=(2,2), padding='same')(x)
    x = BatchNormalization(momentum=0.9)(x)
    x = Activation('relu')(x)
    
    pad, v_rem, h_rem = build_padding(v_rem, h_rem, 8)
    if pad:
        x = pad(x)
        
    x = Conv2DTranspose(num_k*2, (k_size, k_size), strides=(2,2), padding='same')(x)
    x = BatchNormalization(momentum=0.9)(x)
    x = Activation('relu')(x)
    
    pad, v_rem, h_rem = build_padding(v_rem, h_rem, 4)
    if pad:
        x = pad(x)
    
    '''x = Conv2DTranspose(num_k, (k_size, k_size), strides=(2,2), padding='same')(x)
    x = BatchNormalization(momentum=0.9)(x)
    x = Activation('relu')(x)
    
    pad, v_rem, h_rem = build_padding(v_rem, h_rem, 2)
    if pad:
        x = pad(x)'''
    
    x = Conv2DTranspose(num_k, (k_size, k_size), strides=(2,2), padding='same')(x)
    x = BatchNormalization(momentum=0.9)(x)
    x = Activation('relu')(x)
    
    x = MaxPooling2D(**p_params)(x)
    gen = Conv2D(output_shape[2], (4,4), padding = 'same', activation='sigmoid')(x)
    
    return Model(inp, gen)
    
    pad, v_rem, h_rem = build_padding(v_rem, h_rem, 4)
    if pad:
        x = pad(x)
        
    x = Conv2DTranspose(num_k*2, (k_size, k_size), strides=(2,2), activation = 'relu', padding='valid')(x)
    pad, v_rem, h_rem = build_padding(v_rem, h_rem, 2)
    if pad:
        x = pad(x)
        
    x = Conv2DTranspose(num_k, (k_size, k_size), strides=(2,2), activation = 'relu', padding='valid')(x)
    pad, v_rem, h_rem = build_padding(v_rem, h_rem, 1)
    if pad:
        x = pad(x)
        
    x = MaxPooling2D(**p_params)(x)
    gen = Conv2D(output_shape[2], (4,4), padding = 'valid', activation='sigmoid')(x)
    
    return Model(inp, gen)


def build_adversary(shape, num_k=64, k_size=4, int_dim=256):
    c_params = {'padding' : 'same', 'activation' : LeakyReLU(alpha=0.2)}
    p_params= {'pool_size' : (2,2), 'strides' : (2,2), 'padding' : 'same'}
    
    adv = Sequential()
    adv.add(Conv2D(num_k, (k_size, k_size), input_shape=shape, **c_params))
    adv.add(MaxPooling2D(**p_params))
    adv.add(Dropout(0.4))
    adv.add(Conv2D(num_k*2, (k_size, k_size), **c_params))
    adv.add(MaxPooling2D(**p_params))
    adv.add(Dropout(0.4))
    adv.add(Conv2D(num_k*4, (k_size, k_size), **c_params))
    adv.add(MaxPooling2D(**p_params))
    adv.add(Dropout(0.4))
    
    adv.add(Flatten())
    adv.add(Dense(int_dim, activation='relu'))
    adv.add(Dense(2, activation='sigmoid'))
    
    return adv

def build_GAN(shape, latent_dim, num_k=64, k_size=5, int_dim=256, g_opt='adamax', a_opt='adamax', gan_opt='adamax'):
    generator = build_generator(shape, latent_dim, k_size=k_size, num_k=num_k, int_dim=int_dim)
    generator.compile(loss='binary_crossentropy', optimizer=g_opt)
    
    adversary = build_adversary(shape, num_k=num_k, k_size=k_size, int_dim=int_dim)
    adversary.compile(loss='categorical_crossentropy', optimizer=a_opt)
    
    gan = Sequential()
    gan.add(generator)
    gan.add(adversary)
    
    gan.compile(loss='binary_crossentropy', optimizer=gan_opt)
    
    return generator, adversary, gan
    
    

In [None]:
def demo_images(imgs, shape, filename=None):
    fig = plt.figure(figsize=(10,10))
    to_show = min(math.floor(math.sqrt(imgs.shape[0])), 4)
    for i in range(to_show**2):
        a = fig.add_subplot(to_show, to_show, i+1)
        a.spines['top'].set_color('none')
        a.spines['bottom'].set_color('none')
        a.spines['left'].set_color('none')
        a.spines['right'].set_color('none')
        a.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off')
        
        if len(shape) == 2:
            img = plt.imshow(imgs[i].reshape(shape), cmap=plt.get_cmap('gray'))
        else:
            img = plt.imshow(imgs[i].reshape(shape))
    plt.tight_layout()
    if filename:
        fig.savefig(filename)
    else:
        plt.show()
    
def fit_gan(gen, adv, gan, data_stream, epochs, steps_per_epoch, latent_dims, shape):
    # because we are doing our own training effectively
    # we gotta keep track of losses independant of tensorflow
    adv_loss = []
    gan_loss = []
    
    if shape[2] == 1:
        shape = (shape[0], shape[1])
    
    for i in range(epochs):
        adv_loss_ = []
        gan_loss_ = []
        for step in range(steps_per_epoch):
            real_imgs = next(data_stream)
            cur_batch_size = real_imgs.shape[0]
            
            # make batches
            inp = np.random.uniform(-1.0, 1.0, size=(cur_batch_size, latent_dims)).astype('float32')
            gen_imgs = gen.predict(inp)

            # first let's train the adversary a bit
            try:
                X_batch = np.concatenate((real_imgs, gen_imgs))
            except Exception as e:
                print(real_imgs.shape)
                print(gen_imgs.shape)
                raise e
            y_batch = np.zeros([2*cur_batch_size,2])
            y_batch[cur_batch_size:,0] = 1
            y_batch[:cur_batch_size,1] = 1

            adv_loss_.append(adv.train_on_batch(X_batch, y_batch))

            # now we can train the whole GAN
            gaussian_noise = np.random.uniform(-1.0, 1.0, size=(cur_batch_size, latent_dims)).astype('float32')
            y_g = np.zeros([cur_batch_size,2])
            y_g[:,1] = 1
            
            gan_loss_.append(gan.train_on_batch(gaussian_noise, y_g))    
        
        adv_loss.append(sum(adv_loss_) / len(adv_loss_))
        gan_loss.append(sum(gan_loss_) / len(gan_loss_))
        
        if i % 2 == 0:
            demo_images(gen_imgs, shape, 'imgs{}.png'.format(i))
            print(adv_loss)
            print(gan_loss)

In [None]:
shape = (28,28,1)
latent_dim = 100
batch_size = 128

g_opt = keras.optimizers.RMSprop(lr=0.0004, clipvalue=1.0, decay=3e-8)
a_opt = keras.optimizers.RMSprop(lr=0.0008, clipvalue=1.0, decay=6e-8)
gan_opt = keras.optimizers.RMSprop(lr=0.0004, clipvalue=1.0, decay=3e-8)
gen, adv, gan = build_GAN(shape, latent_dim, g_opt=g_opt, a_opt=a_opt, gan_opt=gan_opt)

def mnist_data_gen(batch_size):
    (x_train, _), (_, _) = mnist.load_data()

    x_train = x_train.astype('float32') / 255
    x_train = np.reshape(x_train, (len(x_train), 28, 28, 1))
    
    x_len = len(x_train)

    while 1:
        for i in range(x_len // batch_size):
            end_idx = min((i+1) * batch_size, x_len)
            yield x_train[i * batch_size : end_idx, :, :, :]
            
            
epochs = 100
train_size = 8189
steps_per_epoch = train_size // batch_size

In [None]:
fit_gan(gen, adv, gan, mnist_data_gen(batch_size), epochs, steps_per_epoch, latent_dim, shape)

In [None]:
shape = (64,64,1)
latent_dim = 100
batch_size = 32

g_opt = keras.optimizers.RMSprop(lr=0.0002, clipvalue=1.0, decay=3e-8)
a_opt = keras.optimizers.RMSprop(lr=0.0004, clipvalue=1.0, decay=6e-8)
gan_opt = keras.optimizers.RMSprop(lr=0.0002, clipvalue=1.0, decay=3e-8)

#g_opt = keras.optimizers.Adamax(lr=0.001, clipvalue=1.0)
#a_opt = keras.optimizers.Adamax(lr=0.002, clipvalue=1.0)
#gan_opt = keras.optimizers.Adamax(lr=0.001, clipvalue=1.0)

f_gen, f_adv, f_gan = build_GAN(shape, latent_dim, g_opt=g_opt, a_opt=a_opt, gan_opt=gan_opt)

In [None]:
f_gan.summary()

In [None]:
epochs = 25
train_size = 8189
steps_per_epoch = train_size // batch_size
train_data_dir = '../flowers/train'

flower_train_generator = ImageDataGenerator(rescale=1/255).flow_from_directory(
        train_data_dir,
        target_size=(shape[0], shape[1]),
        color_mode='grayscale',
        batch_size = batch_size,
        class_mode=None)

In [None]:
fit_gan(f_gen, f_adv, f_gan, flower_train_generator, epochs, steps_per_epoch, latent_dim, shape)

In [None]:
shape = (128,128,3)
latent_dims = 128
batch_size = 16

g_opt = keras.optimizers.RMSprop(lr=0.0008. decay=2e-8)
a_opt = keras.optimizers.RMSprop(lr=0.0016, decay=4e-8)

gen, adv, gan = build_GAN(shape, latent_dim, g_opt=g_opt, a_opt=a_opt)


In [None]:
# GANs are a bit harder to train too, so we won't use built in fitting
epochs = 50
train_size = 100000

steps_per_epoch = train_size // batch_size

train_data_dir = '../faces/celebs/train'

real_data_gen = ImageDataGenerator(rescale=1/255).flow_from_directory(
        train_data_dir,
        target_size=(shape[0], shape[1]),
        batch_size=batch_size,
        class_mode='none')

fit_gan(adv, gan, real_data_gen, batch_size, epochs,
        steps_per_epoch, latent_dims, shape)