In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
from scipy.stats import norm
from keras.layers import Input, Dense, Lambda
from keras.models import Model
from keras import backend as K
from keras import objectives
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import PowerTransformer, MinMaxScaler
import sklearn.cluster as cluster


In [3]:
!ls ../credit_card_kaggle

creditcard.csv     creditcard.csv.zip


In [4]:
!head -n 5 ../credit_card_kaggle/creditcard.csv

"Time","V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","V11","V12","V13","V14","V15","V16","V17","V18","V19","V20","V21","V22","V23","V24","V25","V26","V27","V28","Amount","Class"
0,-1.3598071336738,-0.0727811733098497,2.53634673796914,1.37815522427443,-0.338320769942518,0.462387777762292,0.239598554061257,0.0986979012610507,0.363786969611213,0.0907941719789316,-0.551599533260813,-0.617800855762348,-0.991389847235408,-0.311169353699879,1.46817697209427,-0.470400525259478,0.207971241929242,0.0257905801985591,0.403992960255733,0.251412098239705,-0.018306777944153,0.277837575558899,-0.110473910188767,0.0669280749146731,0.128539358273528,-0.189114843888824,0.133558376740387,-0.0210530534538215,149.62,"0"
0,1.19185711131486,0.26615071205963,0.16648011335321,0.448154078460911,0.0600176492822243,-0.0823608088155687,-0.0788029833323113,0.0851016549148104,-0.255425128109186,-0.166974414004614,1.61272666105479,1.06523531137287,0.48909501589608,-0.143772296441519,0.635558093258208,0.46391704

In [5]:
data = pd.read_csv('../credit_card_kaggle/creditcard.csv')

In [6]:
data.shape

(284807, 31)

In [7]:
data.columns

Index(['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
       'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
       'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount',
       'Class'],
      dtype='object')

In [8]:
data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [9]:
data_cols = list(data.columns[ data.columns != 'Class' ])
label_cols = ['Class']

print('Dataset columns: {}'.format(data_cols))
sorted_cols = ['V14', 'V4', 'V10', 'V17', 'V12', 'V26', 'Amount', 'V21', 'V8', 'V11', 'V7', 'V28', 'V19', 'V3', 'V22', 'V6', 'V20', 'V27', 'V16', 'V13', 'V25', 'V24', 'V18', 'V2', 'V1', 'V5', 'V15', 'V9', 'V23', 'Class']
processed_data = data[ sorted_cols ].copy()

Dataset columns: ['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount']


In [10]:
def transformations(data):
    #Log transformation to Amount variable
    data_cols = list(data.columns[data.columns != 'Class'])
  
    #data[data_cols] = StandardScaler().fit_transform(data[data_cols])
    data[data_cols] = PowerTransformer(method='yeo-johnson', standardize=True, copy=True).fit_transform(data[data_cols])
    
    return data

# here we've applied a PowerTransformation
train_data = transformations(data)

#For the purpose of this example we will only synthesize the minority class
train_data_1 = train_data.loc[ data['Class']==1 ].copy()
train_data_0 = train_data.loc[ data['Class']==0 ].copy()

In [11]:
train_data = train_data_1
print("Dataset info: Number of records - {} Number of varibles - {}".format(train_data.shape[0], train_data.shape[1]))

algorithm = cluster.KMeans
args, kwds = (), {'n_clusters':2, 'random_state':0}
labels = algorithm(*args, **kwds).fit_predict(train_data[ data_cols ])

print( pd.DataFrame( [ [np.sum(labels==i)] for i in np.unique(labels) ], columns=['count'], index=np.unique(labels) ) )


Dataset info: Number of records - 492 Number of varibles - 31
   count
0    384
1    108


In [12]:
minmaxscaler=MinMaxScaler()
minmaxscaler.fit(data[data_cols])
data[data_cols] = minmaxscaler.transform(data[data_cols])

In [13]:
data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,0.731869,0.500272,0.621413,0.442612,0.724608,0.655572,0.266075,0.429073,0.618089,...,0.680905,0.460318,0.573236,0.223032,0.440056,0.5941,0.302563,0.447695,0.55069,0
1,0.0,0.879919,0.505036,0.530978,0.398695,0.727466,0.649272,0.264137,0.428865,0.597244,...,0.677772,0.418247,0.5766,0.182041,0.442324,0.646331,0.300146,0.448504,0.155526,0
2,6.5e-05,0.731925,0.484567,0.588907,0.395295,0.723434,0.668703,0.269433,0.431395,0.550405,...,0.68484,0.483958,0.589824,0.150887,0.414389,0.602889,0.299367,0.446816,0.639354,0
3,6.5e-05,0.747529,0.498761,0.589714,0.327248,0.726959,0.663559,0.266062,0.433484,0.555355,...,0.679553,0.447547,0.571981,0.111742,0.471619,0.588235,0.301357,0.449556,0.531963,0
4,0.000123,0.739718,0.514309,0.57988,0.396455,0.724117,0.651429,0.268225,0.423697,0.632446,...,0.681038,0.485247,0.572811,0.231247,0.421043,0.700378,0.304034,0.452974,0.475782,0


In [14]:
len(data_cols)

30

In [15]:
# Hyperparameters
batch_size = 1
original_dim = 31
latent_dim = 4
intermediate_dim = 10
nb_epoch = 10
epsilon_std = 1.0

#encoder
'''
The encoder's jo is to map inputs to our hidden distribution
parameters. We take the input and send it through a dense fully
connected layer with ReLU (classic non-linearity to squash
dimensionality). Next, we convert the input data into two
parameters in the hidden space. We predefine the size using
dense, fully connected layers — z mean and z log sigma.
'''
x = Input(batch_shape = (batch_size, original_dim))
h = Dense(intermediate_dim, activation = 'relu')(x)
z_mean = Dense(latent_dim)(h)
z_log_var = Dense(latent_dim)(h)

print(z_mean)
print(z_log_var)

KerasTensor(type_spec=TensorSpec(shape=(1, 4), dtype=tf.float32, name=None), name='dense_1/BiasAdd:0', description="created by layer 'dense_1'")
KerasTensor(type_spec=TensorSpec(shape=(1, 4), dtype=tf.float32, name=None), name='dense_2/BiasAdd:0', description="created by layer 'dense_2'")


In [16]:
'''
The decoder(in the next block) will take z as its input
and output the parameters to the probability distribution 
of the data.
Epsilon is a random normal tensor
'''

def sampling(args):
    z_mean, z_log_var = args
    epsilon = K.random_normal(shape=(batch_size, latent_dim), mean=0.)
    return z_mean + K.exp(z_log_var / 2) * epsilon
    
# note that "output_shape" isn't necessary with the TensorFlow backend
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])

#latent hidden state
print(z)

KerasTensor(type_spec=TensorSpec(shape=(1, 4), dtype=tf.float32, name=None), name='lambda/add:0', description="created by layer 'lambda'")


In [17]:
'''
we first initialize it with two fully connected layers and
their own respective activation functions. Because the data
is extracted from a small dimensionality to a larger one, some
of it will be lost in the reconstruction process.
'''

#decoder
# we instantiate these layers separately so as to reuse them later
decoder_h = Dense(intermediate_dim, activation='relu')
decoder_mean = Dense(original_dim, activation='sigmoid')
h_decoded = decoder_h(z)
h_decoded2 = Dense(intermediate_dim, activation = 'relu')(h_decoded)
x_decoded_mean = decoder_mean(h_decoded2)

print(x_decoded_mean)

KerasTensor(type_spec=TensorSpec(shape=(1, 31), dtype=tf.float32, name=None), name='dense_4/Sigmoid:0', description="created by layer 'dense_4'")


In [26]:
# from tensorflow.python.framework.ops import disable_eager_execution

# disable_eager_execution()

'''
The first term below measures the reconstruction loss. If the
decoder output is bad at reconstructing data, the cost in terms
of loss will be considerable at this point. The next term is a
regularizer, meaning it keeps the representation of each digit as
diverse as possible. So for instance, if two different people were 
to write out the digit three, the representations might end up
looking very different because, of course, different people write
differently. This can be bad, and the regularizer comes to rescue!
We penalize bad behaviour (like the example here) and ensure similar
representations are close together. Our total loss function is
defined as the sum of our reconstruction term, and the KL divergence
regularization term.
'''

#loss
def vae_loss(x, x_decoded_mean):
    xent_loss = original_dim * objectives.binary_crossentropy(x, x_decoded_mean)
    kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
    return xent_loss + kl_loss

vae = Model(x, x_decoded_mean)
vae.compile(optimizer='rmsprop', loss=vae_loss)

TypeError: unsupported callable

In [None]:
vae.summary()

In [None]:
# train the VAE 
x_train , x_test = train_test_split(data.values)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))

vae.fit(x_train, x_train,
        shuffle=True,
        epochs=nb_epoch,
        batch_size=batch_size,
        validation_data=(x_test, x_test),verbose=1)

In [None]:
#plot latent/hidden space

# build a model to project inputs on the latent space
encoder = Model(x, z_mean)

# display a 2D plot of the digit classes in the latent space
x_test_encoded = encoder.predict(x_test, batch_size=batch_size)
plt.figure(figsize=(6, 6))
plt.scatter(x_test_encoded[:, 0], x_test_encoded[:, 1], c=y_test)
plt.colorbar()
plt.show()

In [None]:
np.prod(x_train.shape[1:])

In [None]:
x_train.shape[1:]