In [None]:
!nvidia-smi

In [None]:
# importing data science libraries
import pandas as pd

fraud_dataset = pd.read_csv('../data/nonames.csv')
# print("There are ", len(fraud_dataset), " samples")
# print(fraud_dataset.shape)
# print(fraud_dataset.head(10))
# print(fraud_dataset.describe())

In [None]:
from sklearn.model_selection import train_test_split

import tensorflow as tf
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras import regularizers, metrics
from keras.layers import Input, Dense, Lambda
from keras.models import Model
from keras import backend as K
from keras import metrics

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
K.set_session(sess)

In [None]:
RANDOM_SEED = 42

In [None]:
X_train, X_test = train_test_split(fraud_dataset, test_size=0.2, random_state=RANDOM_SEED)
# print("X_train: ", X_train.shape)
# y_train = X_train["isFraud"].copy(deep=True)
drop1 = X_train.pop("isFraud")
# print("X_train: ", X_train.shape)
X_train, X_val = train_test_split(X_train, test_size=0.2, random_state=RANDOM_SEED)
# print("X_train: ", X_train.shape)
# print("X_val: ", X_val.shape)
# print("X_test: ", X_test.shape)
y_test = X_test["isFraud"].copy(deep=True)
drop2 = X_test.pop("isFraud")
# print("X_test: ", X_test.shape)

In [None]:
hidden_layer = [10, 8, 4]

def sampling(args):
    z_mean, z_log_var = args
    epsilon = K.random_normal(shape=(K.shape(z_mean)[0], hidden_layer[2]))
    return z_mean + K.exp(z_log_var / 2) * epsilon

In [None]:
input_shape = X_train.shape[1]

regulizer_value = 1e-5

input_layer = Input(shape=(input_shape,))
encoder1 = Dense(hidden_layer[0], activation="relu", activity_regularizer=regularizers.l1(regulizer_value))(input_layer)
encoder2 = Dense(hidden_layer[1], activation="relu", activity_regularizer=regularizers.l1(regulizer_value))(encoder1)
z_mean = Dense(hidden_layer[2])(encoder2)
z_log_sigma = Dense(hidden_layer[2])(encoder2)

# note that "output_shape" isn't necessary with the TensorFlow backend
# so you could write `Lambda(sampling)([z_mean, z_log_sigma])`
z = Lambda(sampling, output_shape=(hidden_layer[2],))([z_mean, z_log_sigma])

decoder1 = Dense(hidden_layer[1], activation="relu", activity_regularizer=regularizers.l1(regulizer_value))(z)
decoder2 = Dense(hidden_layer[0], activation="relu", activity_regularizer=regularizers.l1(regulizer_value))(decoder1)
decoder3 = Dense(input_shape, activation="sigmoid", activity_regularizer=regularizers.l1(regulizer_value))(decoder2)
vae = Model(inputs=input_layer, outputs=decoder3)

In [None]:
def vae_loss(input_layer, decoder3):
    xent_loss = input_shape + metrics.binary_crossentropy(input_layer, decoder3)
    kl_loss = - 0.5 * K.mean(1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1)
    return K.mean(xent_loss + kl_loss)

vae.compile(optimizer='rmsprop', loss=vae_loss)
vae.summary()

In [None]:
epoch = 200
batch_size = 1000

# using mean squared error
checkpointer = ModelCheckpoint(filepath="../saved/VAE1.h5",
                               verbose=0,
                               save_best_only=True)
tensorboard = TensorBoard(log_dir='./logs',)
# vae.fit(X_train, epochs=nb_epoch, batch_size=batch_size, shuffle=True, validation_data=(X_val, X_val), verbose=1,callbacks=[checkpointer, tensorboard])
vae.fit(X_train,
        shuffle=True,
        epochs=epoch,
        batch_size=batch_size,
        validation_data=(X_val, None))

In [None]:
# importing visualization tools
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline

vae = load_model('../saved/VAE1.h5')

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')

In [None]:
import numpy as np


predictions = autoencoder.predict(X_test)
# calculate my own MSE
mse = np.mean(np.power(X_test - predictions, 2), axis=1)
error_df = pd.DataFrame({'reconstruction_error': mse})
error_df.describe()
print(predictions.shape)

In [None]:
print(predictions[0][:])
X_test.head(1)