In [5]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [6]:
import os
import pandas as pd

from keras.models import Model
from keras.layers import Input, Dense, Flatten, Reshape, Layer, TimeDistributed, Concatenate, Lambda
from keras.datasets import mnist
from keras.utils import to_categorical

import keras.backend as K

from vae_utils import *

Using TensorFlow backend.


In [7]:
os.chdir('./data/iwae/')

# Data

In [8]:
def preprocess(data):
    x, y = data
    x = x.reshape((len(x), 28, 28))
    x = x/255.
    #y = to_categorical(y, 10) #don't need to categorise y
    return x, y

train, test = mnist.load_data()

x_train, y_train = preprocess(train)
x_test, y_test = preprocess(test)

print(x_train.shape)

(60000, 28, 28)


# Custom Layers

In [9]:
# we need a layer that samples a latent variable given a mean and standard deviation

class Sampler(Layer):
    
    def __init__(self, always_sample=False, **kwargs):
        self.stddev = 1
        self.always_sample = always_sample
        super(Sampler, self).__init__(**kwargs)
    
    def call(self, x, training=None):
        assert isinstance(x, list)
        z_mean, log_z_var = x
        z_std = K.exp(log_z_var/2)
        
        # sample epsilon from N(0, stddev)
        shape = K.shape(z_std)
        epsilon = K.random_normal(shape, mean=0, stddev=self.stddev)
        z_sample = z_mean + z_std * epsilon
        
        if self.always_sample:
            return z_sample
        else:
            return K.in_train_phase(z_sample, z_mean, training=training)
    
    def compute_output_shape(self, input_shape):
        assert isinstance(input_shape, list)
        assert input_shape[0] == input_shape[1]
        return input_shape[0]

# Model 1

In [10]:
def model_1(k, latent_dim=100, hidden_dim=200, always_sample=False):
    # encoder shared layers
    enc_hid_1 = Dense(hidden_dim, activation='tanh', name='enc_1_hidden_1')
    enc_hid_2 = Dense(hidden_dim, activation='tanh', name='enc_1_hidden_2')
    z_mean = Dense(latent_dim, name='enc_1_latent_mean')
    log_z_var = Dense(latent_dim, name='enc_1_log_latent_var')
    sampler = Sampler(always_sample, name='z1_sampler')
    
    # decoder shared layers
    dec_hid_1 = Dense(hidden_dim, activation='tanh', name='dec_1_hidden_1')
    dec_hid_2 = Dense(hidden_dim, activation='tanh', name='dec_1_hidden_2')
    bernoulli_mean = Dense(28*28, activation='sigmoid', name='dec_1_mean')
    reshape = Reshape((28, 28), name='dec_1_output')
    
    # single pass model
    x = Input(shape=(28, 28), name='enc_1_input')
    y = Flatten(name='enc_1_flatten')(x)
    y = enc_hid_1(y)
    y = enc_hid_2(y)
    mu = z_mean(y)
    log_var = log_z_var(y)
    z1 = sampler([z_mean(y), log_z_var(y)])
    y = dec_hid_1(z1)
    y = dec_hid_2(y)
    y = bernoulli_mean(y)
    y = reshape(y)
    
    model = Model(x, y, name='model_1')
    
    # k forward passes - start from first sampling layer
    k_z1 = [sampler([mu, log_var]) for i in range(k)]
    k_y = [dec_hid_1(z1) for z1 in k_z1]
    k_y = [dec_hid_2(y) for y in k_y]
    k_y = [bernoulli_mean(y) for y in k_y]
    k_y = [reshape(y) for y in k_y]
    
    return model, mu, log_var, k_z1, k_y

## Training k forward pass model 1

###  Losses

In [11]:
def iwae_loss(y_true, y_pred):
    # calculate log distributions (log_p_x_y := p(x|y))
    elbo = []
    weights = []
    for i in range(k):
        log_q_h1_x = -0.5 * K.sum(log_z_var + K.exp(-log_z_var)*(z1_samples[i] - z_mean)**2, axis=-1)
        log_p_h1 = -0.5 * K.sum(K.square(z1_samples[i]), axis=-1)
        log_p_x_h1 = -K.sum(K.binary_crossentropy(y_true, out_samples[i]), axis=(-1, -2))

        # calculate weights
        log_weights = log_p_x_h1 + log_p_h1 - log_q_h1_x
        weights.append(log_weights)
        
        elbo.append(log_p_x_h1 + log_p_h1 - log_q_h1_x)

    weights = K.softmax(weights, axis=0)
    elbo = K.sum(weights * elbo, axis=0)

    loss = -elbo

    return loss

def vae_loss(y_true, y_pred):
    loss = 0
    for i in range(k):
        log_q_h1_x = -0.5 * K.sum(log_z_var + K.exp(-log_z_var)*(z1_samples[i] - z_mean)**2, axis=-1)
        log_p_h1 = -0.5 * K.sum(K.square(z1_samples[i]), axis=-1)
        log_p_x_h1 = -K.sum(K.binary_crossentropy(y_true, out_samples[i]), axis=(-1, -2))

        elbo = log_p_x_h1 + log_p_h1 - log_q_h1_x
        loss -= elbo

    return loss/k

### Model 1 trainer

In [12]:
def train_model_1(k, latent_dim=100, epochs=50, batch_size=512, train='both'):
    out = []
    hists = []
    
    if train.lower() == 'iwae' or train.lower() == 'both':
        # IWAE training
        print('Training IWAE model')
        print('-------------------')

        iwae_model, z_mean, log_z_var, z1_samples, out_samples = model_1(k, latent_dim)
        
        def iwae_loss(y_true, y_pred):
            # calculate log distributions (log_p_x_y := p(x|y))
            log_weights = []
            for i in range(k):
                log_q_h1_x = -0.5 * K.sum(log_z_var + K.exp(-log_z_var)*(z1_samples[i] - z_mean)**2, axis=-1)
                log_p_h1 = -0.5 * K.sum(K.square(z1_samples[i]), axis=-1)
                log_p_x_h1 = -K.sum(K.binary_crossentropy(y_true, out_samples[i]), axis=(-1, -2))

                # calculate weights
                log_weight = log_p_x_h1 + log_p_h1 - log_q_h1_x
                log_weights.append(log_weight)
                
            weights = K.exp(log_weights - K.max(log_weights, axis=0, keepdims=True))
            elbo = 1/k * K.sum(weights, axis=0)
            elbo = K.log(elbo) + K.max(log_weights, axis=0, keepdims=True)
            
            # let's try adding the second order term and see what happens
            v_sqr = K.square(log_weights)
            v_sqr = 1/k * K.sum(v_sqr, axis=0)
            second_ord = -1/2 * (v_sqr - elbo**2)
            elbo = elbo + second_ord
            
            loss = -elbo

            return loss

        iwae_model.compile(optimizer='adam', loss=iwae_loss)
        hist = iwae_model.fit(x_train, x_train, batch_size=batch_size, epochs=epochs)
        
        out.append(iwae_model)
        hists.append(hist)

        model_path = './iwae_model_1_k_%d_dim_%d.weights' %(k, latent_dim)
        iwae_model.save_weights(model_path)
        
        
    if train.lower() == 'vae' or train.lower() == 'both':
        # VAE training
        if train.lower() == 'both':
            print('\n')
        print('Training VAE model')
        print('------------------')
        
        vae_model, z_mean, log_z_var, z1_samples, out_samples = model_1(k, latent_dim)
        
        def vae_loss(y_true, y_pred):
            loss = 0
            elbos = []
            for i in range(k):
                log_q_h1_x = -0.5 * K.sum(log_z_var + K.exp(-log_z_var)*(z1_samples[i] - z_mean)**2, axis=-1)
                log_p_h1 = -0.5 * K.sum(K.square(z1_samples[i]), axis=-1)
                log_p_x_h1 = -K.sum(K.binary_crossentropy(y_true, out_samples[i]), axis=(-1, -2))

                elbo = log_p_x_h1 + log_p_h1 - log_q_h1_x
                elbos.append(elbo)
                loss -= elbo
            
            loss = loss / k
            
            # uncomment to see what happens when we add the second order term to the vae loss
            #elbo = 1/k * K.sum(elbos, axis=0)
            #v_sqr = K.square(elbos)
            #v_sqr = 1/k * K.sum(v_sqr, axis=0)
            #second_ord = -1/2 * (v_sqr - elbo**2)
            #elbo = elbo + second_ord
            
            #loss = -elbo
            
            return loss

        vae_model.compile(optimizer='adam', loss=vae_loss)
        hist = vae_model.fit(x_train, x_train, batch_size=batch_size, epochs=epochs)
        
        out.append(vae_model)
        hists.append(hist)

        model_path = './vae_model_1_k_%d_dim_%d.weights' %(k, latent_dim)
        vae_model.save_weights(model_path)
    
    
    if train.lower() not in ['both', 'vae', 'iwae']:
        print('Set parameter train to "both", "vae", or "iwae".')
        return
    
    if train.lower() == 'both':
        return out, hists
    return out[0], hists[0]

In [13]:
# batch trainer

def train_for_k(ks, latent_dim=100, train='both'):
    ks = np.asarray(ks)
    print('TRAINING')
    for k in ks:
        print('\n')
        print('-------------------')
        print('k = %d' %k)
        print('latent_dim = %d' %latent_dim)
        print('-------------------\n')
        train_model_1(k, latent_dim, train=train)
        print('\n\n')

In [14]:
#ks = [1, 5, 10, 20, 30, 40, 50]
ks = [10]

In [15]:
train_for_k(ks, train='vae')

TRAINING


-------------------
k = 10
latent_dim = 100
-------------------

Training VAE model
------------------
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50





## Testing Model 1

### IWAE

In [47]:
k = 100

load_k = 10
latent_dim = 100

iwae_path = 'iwae_model_1_k_%d_dim_%d.weights' %(load_k, latent_dim)
    
# load model
iwae_model, z_mean, log_z_var, z1_samples, out_samples = model_1(k, latent_dim, always_sample=True)
iwae_model.load_weights(iwae_path, by_name=True)

k_iwae_model = Model(iwae_model.input, out_samples)
k_z_samples = Model(iwae_model.input, z1_samples)
enc_model = Model(iwae_model.input, [z_mean, log_z_var])

iwae_model.compile(optimizer='adam', loss=iwae_loss)

x_preds = k_iwae_model.predict(x_test, batch_size=512)
z1_samples = k_z_samples.predict(x_test, batch_size=512)
z_mean, log_z_var = enc_model.predict(x_test, batch_size=512)

In [48]:
def iwae_elbo(y_true, y_pred):
    # calculate log distributions (log_p_x_y := p(x|y))
    elbo = []
    log_weights = []
    for i in range(k):
        log_q_h1_x = -0.5 * np.sum(log_z_var + np.exp(-log_z_var)*(z1_samples[i] - z_mean)**2, axis=-1)
        log_p_h1 = -0.5 * np.sum(z1_samples[i]**2, axis=-1)
        log_p_x_h1 = np.sum(y_true * np.log(y_pred[i]) + (1 - y_true) * np.log(1 - y_pred[i]), axis=(-1, -2))

        # calculate weights
        log_weight = log_p_x_h1 + log_p_h1 - log_q_h1_x
        log_weights.append(log_weight)
    
    #weights = np.exp(log_weights - np.max(log_weights, axis=1, keepdims=True))
    #weights = weights / sum(weights, axis=0)
    var = np.var(log_weights, axis=0)
    #elbo = np.sum(weights * log_weights, axis=0)
    
    weights = np.exp(log_weights - np.max(log_weights, axis=0, keepdims=True))
    elbo = 1/k * np.sum(weights, axis=0)
    elbo = np.log(elbo) + np.max(log_weights, axis=0, keepdims=True)
    loss = -elbo

    return elbo, var, log_weights

In [49]:
elbo, var, log_weights = iwae_elbo(x_test, x_preds)

In [46]:
elbo, mean(elbo), var, mean(var)

(array([[ -72.37829892, -109.90690892,  -40.300041  , ...,  -77.9211534 ,
         -101.36395393, -123.6241121 ]]),
 -93.68109201047788,
 array([132.75004576, 131.25136892, 140.8115718 , ...,  74.33978318,
        162.48521614, 129.71405675]),
 124.346180775778)

In [50]:
elbo, mean(elbo), var, mean(var)

(array([[ -72.10513569, -112.07000651,  -37.50728277, ...,  -80.86118173,
          -96.71502434, -124.02455561]]),
 -93.56910197242789,
 array([ 66.0260446 ,  99.83693518,  80.51568945, ...,  49.71792848,
        115.92595999,  94.69907044]),
 73.40518760855443)

In [26]:
A_u = np.var(z_mean, axis=0)
sum(log(A_u) >= -2)

18

### VAE

In [16]:
k = 100

load_k = 10
latent_dim = 100

iwae_path = 'vae_model_1_k_%d_dim_%d.weights' %(load_k, latent_dim)
    
# load model
iwae_model, z_mean, log_z_var, z1_samples, out_samples = model_1(k, latent_dim, always_sample=True)
iwae_model.load_weights(iwae_path, by_name=True)

k_iwae_model = Model(iwae_model.input, out_samples)
k_z_samples = Model(iwae_model.input, z1_samples)
enc_model = Model(iwae_model.input, [z_mean, log_z_var])

iwae_model.compile(optimizer='adam', loss=iwae_loss)

x_preds = k_iwae_model.predict(x_test, batch_size=512)
z1_samples = k_z_samples.predict(x_test, batch_size=512)
z_mean, log_z_var = enc_model.predict(x_test, batch_size=512)

In [19]:
def vae_elbo(y_true, y_pred):
    # calculate log distributions (log_p_x_y := p(x|y))
    elbos = []
    for i in range(k):
        log_q_h1_x = -0.5 * np.sum(log_z_var + np.exp(-log_z_var)*(z1_samples[i] - z_mean)**2, axis=-1)
        log_p_h1 = -0.5 * np.sum(z1_samples[i]**2, axis=-1)
        log_p_x_h1 = np.sum(y_true * np.log(y_pred[i]) + (1 - y_true) * np.log(1 - y_pred[i]), axis=(-1, -2))
        
        elbos.append(log_p_x_h1 + log_p_h1 - log_q_h1_x)
    
    var = np.var(elbos, axis=0)
    elbo = np.mean(elbos, axis=0)

    return elbo, var

In [20]:
elbo, var = vae_elbo(x_test, x_preds)

In [21]:
elbo, mean(elbo), var, mean(var)

(array([-165.21271034, -252.591892  ,  -88.20299404, ..., -165.88884886,
        -226.65836364, -245.90310142]),
 -196.51514868754143,
 array([4.28513664, 6.51203712, 4.99395905, ..., 4.20919283, 3.47501041,
        9.21949225]),
 5.142778526796924)

In [36]:
elbo, mean(elbo), var, mean(var)

(array([ -69.19617049, -114.037363  ,  -35.44858195, ...,  -77.55053169,
        -104.04649974, -119.47013777]),
 -93.71715004979777,
 array([19.18326246, 18.14852099, 17.93589541, ..., 30.9315636 ,
        19.10827392, 26.10820491]),
 20.589260445661967)

In [37]:
A_u = np.var(z_mean, axis=0)
sum(log(A_u) >= -2)

12