# Autoencoder

#### Dependecies

In [2]:
import random
from skimage import io
import matplotlib.pyplot as plt
from ipywidgets import interact
from keras.models import Model, Sequential 
from keras.models import load_model
from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
from keras.applications.vgg16 import VGG16
from keras.optimizers import SGD
import numpy as np
from keras.layers import Input, Flatten, Dense, Dropout, Conv2D, MaxPooling2D, Merge, UpSampling2D
from keras.layers import Reshape
from keras.layers.normalization import BatchNormalization
from keras.losses import mse, binary_crossentropy
from keras.optimizers import RMSprop
from keras import backend as K
from keras.applications.vgg16 import VGG16, preprocess_input, decode_predictions
from keras.preprocessing.image import img_to_array
from keras.utils import to_categorical
from keras.layers.core import Lambda
import keras
import os
from sklearn.metrics import accuracy_score
import seaborn as sns
from keras.utils.vis_utils import plot_model
import pandas as pd
import sys
sys.path.append("../")
from networks.networks import *




Using TensorFlow backend.


### Load Data

In [3]:
images = np.array(io.imread_collection("../raw_data/body_sept/*.jpg"))

In [4]:
images.shape

(143559, 300, 250, 3)

In [17]:
reshaped_images = images[:10000, 2:-2, 1:-1, :]/255.

In [18]:
reshaped_images.shape

(10000, 296, 248, 3)

## Built Architeture

In [19]:
latent_dim = 512

In [20]:
def sampling(args):
    """Reparameterization trick by sampling fr an isotropic unit Gaussian.
    # Arguments:
        args (tensor): mean and log of variance of Q(z|X)
    # Returns:
        z (tensor): sampled latent vector
    """

    z_mean, z_log_var = args
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    # by default, random_normal has mean=0 and std=1.0
    epsilon = K.random_normal(shape=(batch, dim))
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

### Encoder 

In [21]:
# build encoder model
input_layer = Input(shape=(296, 248, 3), name='encoder_input') 

x = Conv2D(16, (3, 3), activation='relu', padding='same')(input_layer)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)

x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
#print(x.shape)
x = MaxPooling2D((2, 2), padding='same')(x)

# shape info needed to build decoder model
shape = K.int_shape(x)

# generate latent vector Q(z|X)
x = Flatten()(x)
x = Dense(16, activation='relu')(x)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)

# use reparameterization trick to push the sampling out as input
# note that "output_shape" isn't necessary with the TensorFlow backend
z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])

# instantiate encoder model
encoder = Model(input_layer, [z_mean, z_log_var, z], name='encoder')
encoder.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_input (InputLayer)      (None, 296, 248, 3)  0                                            
__________________________________________________________________________________________________
conv2d_11 (Conv2D)              (None, 296, 248, 16) 448         encoder_input[0][0]              
__________________________________________________________________________________________________
max_pooling2d_7 (MaxPooling2D)  (None, 148, 124, 16) 0           conv2d_11[0][0]                  
__________________________________________________________________________________________________
conv2d_12 (Conv2D)              (None, 148, 124, 8)  1160        max_pooling2d_7[0][0]            
__________________________________________________________________________________________________
max_poolin

### Decoder

In [22]:
latent_inputs = Input(shape=(latent_dim,), name='z_sampling')


x = Dense(shape[1] * shape[2] * shape[3], activation='relu')(latent_inputs)
x = Reshape((shape[1], shape[2], shape[3]))(x)

x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
outputs = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x)
print(outputs.shape)
# instantiate decoder model
decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()

(?, 296, 248, 3)
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
z_sampling (InputLayer)      (None, 512)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 9176)              4707288   
_________________________________________________________________
reshape_2 (Reshape)          (None, 37, 31, 8)         0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 37, 31, 8)         584       
_________________________________________________________________
up_sampling2d_4 (UpSampling2 (None, 74, 62, 8)         0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 74, 62, 8)         584       
_________________________________________________________________
up_sampling2d_5 (UpSampling2 (None, 148, 124, 8)       0   

In [23]:
outputs = decoder(encoder(input_layer)[2])
vae = Model(input_layer, outputs, name='vae')

In [24]:
reconstruction_loss = mse(K.flatten(input_layer), K.flatten(outputs))

In [25]:
reconstruction_loss *= 124 * 104
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
vae_loss = K.mean(reconstruction_loss + kl_loss)
vae.add_loss(vae_loss)
vae.compile(optimizer='rmsprop')
vae.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_input (InputLayer)   (None, 296, 248, 3)       0         
_________________________________________________________________
encoder (Model)              [(None, 512), (None, 512) 166432    
_________________________________________________________________
decoder (Model)              (None, 296, 248, 3)       4710059   
Total params: 4,876,491
Trainable params: 4,876,491
Non-trainable params: 0
_________________________________________________________________


  import sys


In [26]:
vae.fit(reshaped_images,
                epochs=20,
                batch_size=128,
                shuffle=True)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f2de1a71f60>

In [27]:
decoded_images = vae.predict(reshaped_images)

In [28]:
def f(x):
    f, (ax1, ax2) = plt.subplots(1, 2)
    ax1.imshow(reshaped_images[x])
    ax2.imshow(decoded_images[x])
    plt.show()
    
interact(f, x=(0, 6697, 1))

interactive(children=(IntSlider(value=3348, description='x', max=6697), Output()), _dom_classes=('widget-inter…

<function __main__.f>