In [1]:
from __future__ import absolute_import, division, print_function

import pandas as pd
import numpy as np
import sklearn as sk
import os
import tensorflow as tf
from tensorflow import keras
import seaborn as sns
# %load_ext autoreload
# %autoreload 2

In [2]:
data = np.load("/Users/amir/Downloads/processed_data.npz")

In [3]:
data.files

['x_train', 'y_train', 'x_test', 'y_test']

In [4]:
data['x_train'].shape

(153582, 47)

In [5]:
data['y_train'].shape

(153582, 46)

In [6]:
class Dataset:

    def __init__(self, data):
    #     self._index_in_epoch = 0
    #     self._epochs_completed = 0
        self.x_train = data['x_train'][:,:][:,:-1]
        self.y_train = data['y_train']
        self.x_test = data['x_test'][:,:][:,:-1]
        self.y_test = data['y_test']
        self.num_train, self.input_dim = data['y_train'].shape
        self.num_test = data['x_test'].shape[0]

#     def next_batch(self, batch_size, training=True):

#         '''
#         Return a total of `num` random samples and labels. 
#         '''
#         num_examples = self.num_train if training else self.num_test
#         X = self.x_train if training else self.x_test
#         y = self.y_train if training else self.y_test
#         idx = np.arange(0, num_examples)
#         np.random.shuffle(idx)
#         idx = idx[:batch_size]
#         data_shuffle = X[idx]
#         labels_shuffle = y[idx]
#         #print(labels_shuffle.shape)
#         #labels_shuffle = np.asarray(labels_shuffle.reshape(batch_size, -1))

#         return data_shuffle, labels_shuffle

In [17]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from keras.layers import Lambda, Input, Dense
from keras.models import Model
from keras.losses import mse, binary_crossentropy
from keras.utils import plot_model
from keras import backend as K
from keras.optimizers import adam

import numpy as np
import matplotlib.pyplot as plt
import argparse
import os

# reparameterization trick
# instead of sampling from Q(z|X), sample epsilon = N(0,I)
# z = z_mean + sqrt(var) * epsilon
def sampling(args):
    """Reparameterization trick by sampling from an isotropic unit Gaussian.
    # Arguments
        args (tensor): mean and log of variance of Q(z|X)
    # Returns
        z (tensor): sampled latent vector
    """

    z_mean, z_log_var = args
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    # by default, random_normal has mean = 0 and std = 1.0
    epsilon = K.random_normal(shape=(batch, dim))
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

data = Dataset(np.load("/Users/amir/Downloads/processed_data.npz"))

# network parameters
input_shape = (data.input_dim, )
latent_dim = 10
intermediate_dim = 4 * latent_dim

# VAE model = encoder + decoder
# build encoder model
inputs = Input(shape=input_shape, name='encoder_input')
x = Dense(intermediate_dim, activation='relu')(inputs)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)

# use reparameterization trick to push the sampling out as input
# note that "output_shape" isn't necessary with the TensorFlow backend
z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])

# instantiate encoder model
encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
encoder.summary()

# build decoder model
latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
x = Dense(intermediate_dim, activation='relu')(latent_inputs)
outputs = Dense(data.input_dim, activation='sigmoid')(x)

# instantiate decoder model
decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()

# instantiate VAE model
outputs = decoder(encoder(inputs)[2])
vae = Model(inputs, outputs, name='vae_mlp')

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_input (InputLayer)      (None, 46)           0                                            
__________________________________________________________________________________________________
dense_4 (Dense)                 (None, 40)           1880        encoder_input[0][0]              
__________________________________________________________________________________________________
z_mean (Dense)                  (None, 10)           410         dense_4[0][0]                    
__________________________________________________________________________________________________
z_log_var (Dense)               (None, 10)           410         dense_4[0][0]                    
__________________________________________________________________________________________________
z (Lambda)

In [18]:

models = (encoder, decoder)
batch_size = 128
epochs = 100

# VAE loss = mse_loss or xent_loss + kl_loss
# if args.mse:
#     reconstruction_loss = mse(inputs, outputs)
# else:
reconstruction_loss = mse(inputs, outputs)
#reconstruction_loss = binary_crossentropy(inputs,outputs)

reconstruction_loss *= data.input_dim
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
vae_loss = K.mean(reconstruction_loss + kl_loss)
vae.add_loss(vae_loss)
vae.compile(optimizer=adam())
vae.summary()


# if args.weights:
#     vae.load_weights(args.weights)
# else:

# train the autoencoder
vae.fit(data.x_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(data.x_test, None))
vae.save_weights('vae_mlp_mnist.h5')


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_input (InputLayer)   (None, 46)                0         
_________________________________________________________________
encoder (Model)              [(None, 10), (None, 10),  2700      
_________________________________________________________________
decoder (Model)              (None, 46)                2326      
Total params: 5,026
Trainable params: 5,026
Non-trainable params: 0
_________________________________________________________________
Train on 153582 samples, validate on 39232 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
 35200/153582 [=====>........................] - ETA: 1s - loss: nan

KeyboardInterrupt: 

In [9]:
# input_dim = data['x_train'].shape[1]-1
# encoding_dim = 10

# compression_factor = float(input_dim) / encoding_dim
# print("Compression factor: %s" % compression_factor)

# autoencoder = tf.keras.Sequential([
#     # Encoder Layers
#     tf.keras.layers.Dense(4 * encoding_dim, input_shape=(input_dim,), activation='relu'),
#     tf.keras.layers.Dense(2 * encoding_dim, activation='relu'),
#     tf.keras.layers.Dense(encoding_dim, activation='relu'),
#     # Decoder Layers
#     tf.keras.layers.Dense(2 * encoding_dim, activation='relu'),
#     tf.keras.layers.Dense(4 * encoding_dim, activation='relu'),
#     tf.keras.layers.Dense(input_dim, activation='sigmoid')
#     ]
# )

# autoencoder.summary()

In [10]:
# input_img = tf.keras.layers.Input(shape=(input_dim,))
# encoder_layer1 = autoencoder.layers[0]
# encoder_layer2 = autoencoder.layers[1]
# encoder_layer3 = autoencoder.layers[2]
# encoder = tf.keras.Model(input_img, encoder_layer3(encoder_layer2(encoder_layer1(input_img))))

# encoder.summary()

In [11]:
# optimizer = keras.optimizers.Adam()
# autoencoder.compile(optimizer='adam', loss='mean_squared_error')
# autoencoder.fit(data['x_train'][:,:][:,:-1], data['x_train'][:,:][:,:-1],
#                 epochs=10,
#                 shuffle=True,
#                 validation_data=(data['x_test'][:,:][:,:-1], data['x_test'][:,:][:,:-1]))

In [12]:
def get_embedded(not_embedded_data):
    embedded = encoder.predict(not_embedded_data[:,:][:,:-1])
    return np.append(embedded, not_embedded_data[:][:,-1].reshape(embedded.shape[0], 1), axis=1)

In [13]:
x_train_e = get_embedded(data['x_train'])
y_train_e = encoder.predict(data['y_train'])

TypeError: 'Dataset' object is not subscriptable

In [None]:
x_test_e = get_embedded(data['x_test'])
y_test_e = encoder.predict(data['y_test'])

In [None]:
np.savez("embedded_data.npz", x_train_e=x_train_e, y_train_e=y_train_e, x_test_e=x_test_e, y_test_e=y_test_e)