In [1]:
import numpy as np
import matplotlib.pyplot as plt
from functools import partial, reduce
import os.path
import os
import h5py
import math

from PIL import Image
from scipy.ndimage import imread

from keras.layers import Flatten, Reshape, Input, Dense, Lambda, Dropout, Activation, BatchNormalization
from keras.layers import Conv2D, Conv2DTranspose, MaxPooling2D, ZeroPadding2D, UpSampling2D
from keras.models import Model, Sequential
from keras.callbacks import TensorBoard, ModelCheckpoint, Callback
from keras.metrics import binary_crossentropy
from keras.engine.topology import Layer
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.advanced_activations import LeakyReLU
import keras.optimizers
from keras import backend as K
from keras.datasets import mnist

from keras.utils.np_utils import to_categorical

K.set_floatx('float32')

%matplotlib inline

Using TensorFlow backend.


We are gonna play around with mnist data to start, just for proof of concept

### Convolutional Variational Auto Encoder

#### Custom layer to calculate our CVAE loss

In [2]:
# because Keras isn't powerful to handle the loss for a CVAE
# we have to create an actual loss layer to calculate it

class CVAELossLayer(Layer):
    def __init__(self, latent_dim, image_size, **kwargs):
        self.latent_dim = latent_dim
        self.image_size = image_size
        
        self.is_placeholder = True
        
        super(CVAELossLayer, self).__init__(**kwargs)
        
    def calculate_loss(self, inputs):
        x = inputs[0]
        z_mean = inputs[1][:,:self.latent_dim]
        z_log_sigma = inputs[1][:,self.latent_dim:]
        dec_x = inputs[2]
        
        # we add 1e-10 to avoid any possible overflow due to the log calculated within binary_crossentropy
        output_loss = self.image_size * K.mean(K.binary_crossentropy(dec_x + 1e-10, x + 1e-10), axis=-1)
        
        # this is the Kullback Libeler divergence between the
        # distribution in latent space and the prior
        latent_loss = - 0.5 * K.sum(1 + K.clip(z_log_sigma, 1e-10, 1e10) - K.clip(K.square(z_mean), 1e-10, 1e10) - K.clip(K.exp(z_log_sigma), 1e-10, 1e10), axis = -1)
        
        return K.mean(output_loss + latent_loss)
    
    def call(self, inputs):
        loss = self.calculate_loss(inputs)
        # this is the hacky way to calculate our loss
        # we use the inputs given to calculate the loss
        # and then return it for direct use for fitting
        return loss
    
    def compute_output_shape(self, input_shape):
        return (64, 1)

In [3]:
# helper function to ensure we have the right amount of padding
def build_padding(v_rem, h_rem, mult):
    lay = None
    
    v_pad = 0
    h_pad = 0
    
    if v_rem >= mult:
        v_rem -= mult
        v_pad = 1

    if h_rem >= mult:
        h_rem -= mult
        h_pad = 1
    
    if h_pad or v_pad:
        lay = ZeroPadding2D(padding=(v_pad, h_pad))
        
    return lay, v_rem, h_rem

In [None]:
def dist_sample(latent_dims, inputs):
    z_mean = inputs[:,:latent_dims]
    z_log_sigma = inputs[:,latent_dims:]

    eps = K.random_normal(shape=(latent_dims,), mean=0.0, stddev=1.0, dtype='float32')
    
    return z_mean + (K.exp(z_log_sigma) * eps)


def sample_output_shape(input_shape):
    shape = list(input_shape)
    assert len(shape) == 2
    shape[1] //= 2
    return tuple(shape)

And finally the code to build the net itself!

Let us define a helper function to demo a model on a given dataset

In [5]:
def demo_model(model, data, shape, show_latent=False):
    e = model.layers[1]
    d = model.layers[2]
    
    batch_size = data.shape[0]
    
    vecs = e.predict(data, batch_size = batch_size, verbose=0)
    pred = d.predict(vecs, batch_size = batch_size, verbose=0)
    
    for i in range(batch_size):
        fig = plt.figure()
        a=fig.add_subplot(1,2,1)
        a.spines['top'].set_color('none')
        a.spines['bottom'].set_color('none')
        a.spines['left'].set_color('none')
        a.spines['right'].set_color('none')
        a.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off')
        img = plt.imshow(data[i].reshape(shape))
        a.set_title('input')
        
        a=fig.add_subplot(1,2,2)
        a.spines['top'].set_color('none')
        a.spines['bottom'].set_color('none')
        a.spines['left'].set_color('none')
        a.spines['right'].set_color('none')
        a.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off')
        img = plt.imshow(pred[i].reshape(shape))
        a.set_title('decoded')
        if(show_latent):
            a.set_xlabel(vecs[i])
        
        plt.show()
        

In [6]:
class ImageSaver(Callback):
    ''' Keras Callback to save demo images after each epoch'''
    def __init__(self, image_path, images, shape, period=1, inp_path=None, out_path=None):
        self.images = images
        self.batch_size = images.shape[0]
        self.image_path = image_path
        self.shape = shape
        self.period = period
        self.inp_path = inp_path
        self.out_path = out_path
        
        self.dpi = 128
        self.fig_size = ((shape[0] * 4) // self.dpi, int(shape[1] * 1.5 * self.batch_size) // self.dpi)
        
        super(ImageSaver, self).__init__()
        
    def on_epoch_end(self, epoch, logs=None):
        if epoch % self.period == 0:
            vecs = self.model.layers[1].predict(self.images, batch_size=self.batch_size, verbose=0)
            pred = self.model.layers[2].predict(vecs, batch_size=self.batch_size, verbose=0)
        
            if inp_path and out_path:
                vecs.save
        
            fig = plt.figure(figsize=self.fig_size)
            
            for i in range(self.batch_size):
                a = fig.add_subplot(self.batch_size, 2, i * 2 + 1)
                a.spines['top'].set_color('none')
                a.spines['bottom'].set_color('none')
                a.spines['left'].set_color('none')
                a.spines['right'].set_color('none')
                a.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off')
                img = plt.imshow(self.images[i].reshape(shape))
                a.set_title('input')
                
                a = fig.add_subplot(self.batch_size, 2, i * 2 + 2)
                a.spines['top'].set_color('none')
                a.spines['bottom'].set_color('none')
                a.spines['left'].set_color('none')
                a.spines['right'].set_color('none')
                a.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off')
                img = plt.imshow(pred[i].reshape(shape))
                a.set_title('decoded')
            
            fig.show()
            fig.savefig(self.image_path.format(epoch=epoch), dpi=self.dpi)
            

Let's try using this on mnist data, just for a proof of concept

In [None]:
(x_train, _), (x_test, _) = mnist.load_data()

x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255
x_train = np.reshape(x_train, (len(x_train), 28, 28, 1))
x_test = np.reshape(x_test, (len(x_test), 28, 28 ,1))

In [None]:
def train_generator(train_dir, batch_size):
    files = os.listdir(train_dir)
    
    x_train = []
    
    while 1:
        
        for fname in os.listdir(train_dir):
            x_train.append(imread(os.path.join(train_dir, fname)))
            if len(x_train) >= batch_size:
                print(x_train)
                yield np.array(x_train), np.repeat(1, batch_size)
                x_train = []
            
def valid_generator(valid_dir):
    files = os.listdir(valid_dir)
    
    while 1:
        for fname in files:
            x_valid = imread(os.path.join(train_dir, fname))
            yield x_valid, np.array([1])

In [None]:
t = train_generator('../scrape/flickr/abstract_art', 8)

### Hmm, I wasn't to happy with the mode collapse we were seeing from CVAEs, so let's try making a GAN now

In [None]:
def build_generator(output_shape, latent_dims, num_k=64, k_size=5, int_dim=256):
    inp = Input(shape=(latent_dims,))
    x = Dense(int_dim, activation='relu')(inp)
    
    v_dim = output_shape[0] // 4
    h_dim = output_shape[1] // 4
    v_rem = output_shape[0] - (v_dim * 4)
    h_rem = output_shape[1] - (h_dim * 4)
    
    x = Dense(num_k // 2 * v_dim * h_dim, activation = 'relu')(x)
    x = Reshape((v_dim, h_dim, num_k // 2))(x)
    x = Dropout(0.4)(x)
    
    int_shape = (output_shape[0], output_shape[1], num_k)
    params = {'activation' : 'relu', 'padding' : 'valid'}
    p_params= {'pool_size' : (2,2), 'strides' : (2,2), 'padding' : 'same'}
    
    x = Conv2DTranspose(num_k, (k_size, k_size), strides=(1,1), activation = 'relu', padding='same')(x)

    x = Conv2DTranspose(num_k*4, (k_size, k_size), strides=(2,2), padding='same')(x)
    x = BatchNormalization(momentum=0.9)(x)
    x = Activation('relu')(x)
    
    pad, v_rem, h_rem = build_padding(v_rem, h_rem, 8)
    if pad:
        x = pad(x)
        
    x = Conv2DTranspose(num_k*2, (k_size, k_size), strides=(2,2), padding='same')(x)
    x = BatchNormalization(momentum=0.9)(x)
    x = Activation('relu')(x)
    
    pad, v_rem, h_rem = build_padding(v_rem, h_rem, 4)
    if pad:
        x = pad(x)
    
    '''x = Conv2DTranspose(num_k, (k_size, k_size), strides=(2,2), padding='same')(x)
    x = BatchNormalization(momentum=0.9)(x)
    x = Activation('relu')(x)
    
    pad, v_rem, h_rem = build_padding(v_rem, h_rem, 2)
    if pad:
        x = pad(x)'''
    
    x = Conv2DTranspose(num_k, (k_size, k_size), strides=(2,2), padding='same')(x)
    x = BatchNormalization(momentum=0.9)(x)
    x = Activation('relu')(x)
    
    x = MaxPooling2D(**p_params)(x)
    gen = Conv2D(output_shape[2], (4,4), padding = 'same', activation='sigmoid')(x)
    
    return Model(inp, gen)
    
    pad, v_rem, h_rem = build_padding(v_rem, h_rem, 4)
    if pad:
        x = pad(x)
        
    x = Conv2DTranspose(num_k*2, (k_size, k_size), strides=(2,2), activation = 'relu', padding='valid')(x)
    pad, v_rem, h_rem = build_padding(v_rem, h_rem, 2)
    if pad:
        x = pad(x)
        
    x = Conv2DTranspose(num_k, (k_size, k_size), strides=(2,2), activation = 'relu', padding='valid')(x)
    pad, v_rem, h_rem = build_padding(v_rem, h_rem, 1)
    if pad:
        x = pad(x)
        
    x = MaxPooling2D(**p_params)(x)
    gen = Conv2D(output_shape[2], (4,4), padding = 'valid', activation='sigmoid')(x)
    
    return Model(inp, gen)


def build_adversary(shape, num_k=64, k_size=4, int_dim=256):
    c_params = {'padding' : 'same', 'activation' : LeakyReLU(alpha=0.2)}
    p_params= {'pool_size' : (2,2), 'strides' : (2,2), 'padding' : 'same'}
    
    adv = Sequential()
    adv.add(Conv2D(num_k, (k_size, k_size), input_shape=shape, **c_params))
    adv.add(MaxPooling2D(**p_params))
    adv.add(Dropout(0.4))
    adv.add(Conv2D(num_k*2, (k_size, k_size), **c_params))
    adv.add(MaxPooling2D(**p_params))
    adv.add(Dropout(0.4))
    adv.add(Conv2D(num_k*4, (k_size, k_size), **c_params))
    adv.add(MaxPooling2D(**p_params))
    adv.add(Dropout(0.4))
    
    adv.add(Flatten())
    adv.add(Dense(int_dim, activation='relu'))
    adv.add(Dense(2, activation='sigmoid'))
    
    return adv

def build_GAN(shape, latent_dim, num_k=64, k_size=5, int_dim=256, g_opt='adamax', a_opt='adamax', gan_opt='adamax'):
    generator = build_generator(shape, latent_dim, k_size=k_size, num_k=num_k, int_dim=int_dim)
    generator.compile(loss='binary_crossentropy', optimizer=g_opt)
    
    adversary = build_adversary(shape, num_k=num_k, k_size=k_size, int_dim=int_dim)
    adversary.compile(loss='categorical_crossentropy', optimizer=a_opt)
    
    gan = Sequential()
    gan.add(generator)
    gan.add(adversary)
    
    gan.compile(loss='binary_crossentropy', optimizer=gan_opt)
    
    return generator, adversary, gan
    
    

In [None]:
def demo_images(imgs, shape, filename=None):
    fig = plt.figure(figsize=(10,10))
    to_show = min(math.floor(math.sqrt(imgs.shape[0])), 4)
    for i in range(to_show**2):
        a = fig.add_subplot(to_show, to_show, i+1)
        a.spines['top'].set_color('none')
        a.spines['bottom'].set_color('none')
        a.spines['left'].set_color('none')
        a.spines['right'].set_color('none')
        a.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off')
        
        if len(shape) == 2:
            img = plt.imshow(imgs[i].reshape(shape), cmap=plt.get_cmap('gray'))
        else:
            img = plt.imshow(imgs[i].reshape(shape))
    plt.tight_layout()
    if filename:
        fig.savefig(filename)
    else:
        plt.show()
    
def fit_gan(gen, adv, gan, data_stream, epochs, steps_per_epoch, latent_dims, shape):
    # because we are doing our own training effectively
    # we gotta keep track of losses independant of tensorflow
    adv_loss = []
    gan_loss = []
    
    if shape[2] == 1:
        shape = (shape[0], shape[1])
    
    for i in range(epochs):
        adv_loss_ = []
        gan_loss_ = []
        for step in range(steps_per_epoch):
            real_imgs = next(data_stream)
            cur_batch_size = real_imgs.shape[0]
            
            # make batches
            inp = np.random.uniform(-1.0, 1.0, size=(cur_batch_size, latent_dims)).astype('float32')
            gen_imgs = gen.predict(inp)

            # first let's train the adversary a bit
            try:
                X_batch = np.concatenate((real_imgs, gen_imgs))
            except Exception as e:
                print(real_imgs.shape)
                print(gen_imgs.shape)
                raise e
            y_batch = np.zeros([2*cur_batch_size,2])
            y_batch[cur_batch_size:,0] = 1
            y_batch[:cur_batch_size,1] = 1

            adv_loss_.append(adv.train_on_batch(X_batch, y_batch))

            # now we can train the whole GAN
            gaussian_noise = np.random.uniform(-1.0, 1.0, size=(cur_batch_size, latent_dims)).astype('float32')
            y_g = np.zeros([cur_batch_size,2])
            y_g[:,1] = 1
            
            gan_loss_.append(gan.train_on_batch(gaussian_noise, y_g))    
        
        adv_loss.append(sum(adv_loss_) / len(adv_loss_))
        gan_loss.append(sum(gan_loss_) / len(gan_loss_))
        
        if i % 2 == 0:
            demo_images(gen_imgs, shape, 'imgs{}.png'.format(i))
            print(adv_loss)
            print(gan_loss)

In [None]:
shape = (28,28,1)
latent_dim = 100
batch_size = 128

g_opt = keras.optimizers.RMSprop(lr=0.0004, clipvalue=1.0, decay=3e-8)
a_opt = keras.optimizers.RMSprop(lr=0.0008, clipvalue=1.0, decay=6e-8)
gan_opt = keras.optimizers.RMSprop(lr=0.0004, clipvalue=1.0, decay=3e-8)
gen, adv, gan = build_GAN(shape, latent_dim, g_opt=g_opt, a_opt=a_opt, gan_opt=gan_opt)

def mnist_data_gen(batch_size):
    (x_train, _), (_, _) = mnist.load_data()

    x_train = x_train.astype('float32') / 255
    x_train = np.reshape(x_train, (len(x_train), 28, 28, 1))
    
    x_len = len(x_train)

    while 1:
        for i in range(x_len // batch_size):
            end_idx = min((i+1) * batch_size, x_len)
            yield x_train[i * batch_size : end_idx, :, :, :]
            
            
epochs = 100
train_size = 8189
steps_per_epoch = train_size // batch_size

In [None]:
fit_gan(gen, adv, gan, mnist_data_gen(batch_size), epochs, steps_per_epoch, latent_dim, shape)

In [None]:
shape = (64,64,1)
latent_dim = 100
batch_size = 32

g_opt = keras.optimizers.RMSprop(lr=0.0002, clipvalue=1.0, decay=3e-8)
a_opt = keras.optimizers.RMSprop(lr=0.0004, clipvalue=1.0, decay=6e-8)
gan_opt = keras.optimizers.RMSprop(lr=0.0002, clipvalue=1.0, decay=3e-8)

#g_opt = keras.optimizers.Adamax(lr=0.001, clipvalue=1.0)
#a_opt = keras.optimizers.Adamax(lr=0.002, clipvalue=1.0)
#gan_opt = keras.optimizers.Adamax(lr=0.001, clipvalue=1.0)

f_gen, f_adv, f_gan = build_GAN(shape, latent_dim, g_opt=g_opt, a_opt=a_opt, gan_opt=gan_opt)

In [None]:
f_gan.summary()

In [None]:
epochs = 25
train_size = 8189
steps_per_epoch = train_size // batch_size
train_data_dir = '../flowers/train'

flower_train_generator = ImageDataGenerator(rescale=1/255).flow_from_directory(
        train_data_dir,
        target_size=(shape[0], shape[1]),
        color_mode='grayscale',
        batch_size = batch_size,
        class_mode=None)

In [None]:
fit_gan(f_gen, f_adv, f_gan, flower_train_generator, epochs, steps_per_epoch, latent_dim, shape)

In [None]:
shape = (128,128,3)
latent_dims = 128
batch_size = 16

g_opt = keras.optimizers.RMSprop(lr=0.0008. decay=2e-8)
a_opt = keras.optimizers.RMSprop(lr=0.0016, decay=4e-8)

gen, adv, gan = build_GAN(shape, latent_dim, g_opt=g_opt, a_opt=a_opt)


In [None]:
# GANs are a bit harder to train too, so we won't use built in fitting
epochs = 50
train_size = 100000

steps_per_epoch = train_size // batch_size

train_data_dir = '../faces/celebs/train'

real_data_gen = ImageDataGenerator(rescale=1/255).flow_from_directory(
        train_data_dir,
        target_size=(shape[0], shape[1]),
        batch_size=batch_size,
        class_mode='none')

fit_gan(adv, gan, real_data_gen, batch_size, epochs,
        steps_per_epoch, latent_dims, shape)