## CREDIT CARD FRAUD DETECTION

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os
from datetime import datetime 
from sklearn.metrics import roc_auc_score as auc 
import seaborn as sns

In [2]:
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
%matplotlib inline

## Data Exploration

In [3]:
df = pd.read_csv("creditcards.csv")

In [4]:
df.shape

(284807, 31)

In [5]:
print("Total time spanning: {:.1f} days".format(df['Time'].max() / (3600 * 24.0)))
print("{:.3f} % of all transactions are fraud. ".format(np.sum(df['Class']) / df.shape[0] * 100))

Total time spanning: 2.0 days
0.173 % of all transactions are fraud. 


In [6]:
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [7]:
df.columns

Index(['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
       'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
       'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount',
       'Class'],
      dtype='object')

In [8]:
df.dtypes

Time      float64
V1        float64
V2        float64
V3        float64
V4        float64
V5        float64
V6        float64
V7        float64
V8        float64
V9        float64
V10       float64
V11       float64
V12       float64
V13       float64
V14       float64
V15       float64
V16       float64
V17       float64
V18       float64
V19       float64
V20       float64
V21       float64
V22       float64
V23       float64
V24       float64
V25       float64
V26       float64
V27       float64
V28       float64
Amount    float64
Class       int64
dtype: object

### Time and Amount columns have been scaled

In [9]:
# Since most of our data has already been scaled we should scale the columns that are left to scale (Amount and Time)
from sklearn.preprocessing import StandardScaler

std_scaler = StandardScaler()

df['scaled_amount'] = std_scaler.fit_transform(df['Amount'].reshape(-1,1))
df['scaled_time'] = std_scaler.fit_transform(df['Time'].reshape(-1,1))

df.drop(['Time','Amount'], axis=1, inplace=True)

  
  import sys


In [10]:
scaled_amount = df['scaled_amount']
scaled_time = df['scaled_time']

df.drop(['scaled_amount', 'scaled_time'], axis=1, inplace=True)
df.insert(0, 'scaled_amount', scaled_amount)
df.insert(1, 'scaled_time', scaled_time)

df.head()

Unnamed: 0,scaled_amount,scaled_time,V1,V2,V3,V4,V5,V6,V7,V8,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Class
0,0.244964,-1.996583,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,...,0.251412,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,0
1,-0.342475,-1.996583,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,...,-0.069083,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,0
2,1.160686,-1.996562,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,...,0.52498,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,0
3,0.140534,-1.996562,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,...,-0.208038,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,0
4,-0.073403,-1.996541,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,...,0.408542,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,0


## Train and test split on time series, using first 75% as training/val, and last 25% as test

In [11]:
TEST_RATIO = 0.25
df.sort_values('scaled_time', inplace = True)
TRA_INDEX = int((1-TEST_RATIO) * df.shape[0])
train_x = df.iloc[:TRA_INDEX, 1:-2].values
train_y = df.iloc[:TRA_INDEX, -1].values

test_x = df.iloc[TRA_INDEX:, 1:-2].values
test_y = df.iloc[TRA_INDEX:, -1].values

In [12]:
print("Total train examples: {}, total fraud cases: {}, equal to {:.5f} of total cases. ".format(train_x.shape[0], np.sum(train_y), np.sum(train_y)/train_x.shape[0]))

Total train examples: 213605, total fraud cases: 398, equal to 0.00186 of total cases. 


In [13]:
print("Total test examples: {}, total fraud cases: {}, equal to {:.5f} of total cases. ".format(test_x.shape[0], np.sum(test_y), np.sum(test_y)/test_y.shape[0]))

Total test examples: 71202, total fraud cases: 94, equal to 0.00132 of total cases. 


In [14]:
cols_mean = []
cols_std = []
for c in range(train_x.shape[1]):
    cols_mean.append(train_x[:,c].mean())
    cols_std.append(train_x[:,c].std())
    train_x[:, c] = (train_x[:, c] - cols_mean[-1]) / cols_std[-1]
    test_x[:, c] =  (test_x[:, c] - cols_mean[-1]) / cols_std[-1]

## Modelling and results


##  Auto-encoder as unsupervised learning

In [15]:
# Parameters
learning_rate = 0.001
training_epochs = 10
batch_size = 256
display_step = 1

# Network Parameters
n_hidden_1 = 15 # 1st layer num features
#n_hidden_2 = 15 # 2nd layer num features
n_input = train_x.shape[1] # MNIST data input (img shape: 28*28)
data_dir = '.'

### Train and val the model with 1 hidden layer - Tanh and RMSProp Optimizer- Gaussian Initialization-random


In [16]:
X = tf.placeholder("float", [None, n_input])

weights = {
   'encoder_h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
  
   'decoder_h1': tf.Variable(tf.random_normal([n_hidden_1, n_input])),
  
}
biases = {
   'encoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),
  
   'decoder_b1': tf.Variable(tf.random_normal([n_input])),
  
}


# Building the encoder
def encoder(x):
   # Encoder Hidden layer with tanh activation #1
   layer_1 = tf.nn.tanh(tf.add(tf.matmul(x, weights['encoder_h1']),
                                  biases['encoder_b1']))
 
   return layer_1


# Building the decoder
def decoder(x):
   # Encoder Hidden layer with sigmoid activation #1
   layer_1 = tf.nn.tanh(tf.add(tf.matmul(x, weights['decoder_h1']),
                                  biases['decoder_b1']))
 
   return layer_1

# Construct model
encoder_op = encoder(X)
decoder_op = decoder(encoder_op)

# Prediction
y_pred = decoder_op
# Targets (Labels) are the input data.
y_true = X

# Define batch mse
batch_mse = tf.reduce_mean(tf.pow(y_true - y_pred, 2), 1)

# Define loss and optimizer, minimize the squared error
cost = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)

# TRAIN StARTS
save_model = os.path.join(data_dir, 'temp_saved_model_1layer.ckpt')
saver = tf.train.Saver()

# Initializing the variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
   now = datetime.now()
   sess.run(init)
   total_batch = int(train_x.shape[0]/batch_size)
   # Training cycle
   for epoch in range(training_epochs):
       # Loop over all batches
       for i in range(total_batch):
           batch_idx = np.random.choice(train_x.shape[0], batch_size)
           batch_xs = train_x[batch_idx]
           # Run optimization op (backprop) and cost op (to get loss value)
           _, c = sess.run([optimizer, cost], feed_dict={X: batch_xs})
           
       # Display logs per epoch step
       if epoch % display_step == 0:
           train_batch_mse = sess.run(batch_mse, feed_dict={X: train_x})
           print("Epoch:", '%04d' % (epoch+1),
                 "cost=", "{:.9f}".format(c),
                 "Train auc=", "{:.6f}".format(auc(train_y, train_batch_mse)),
                 "Time elapsed=", "{}".format(datetime.now() - now))

   print("Optimization Finished!")
   
   save_path = saver.save(sess, save_model)
   print("Model saved in file: %s" % save_path)

Epoch: 0001 cost= 1.203795433 Train auc= 0.955355 Time elapsed= 0:00:02.355264
Epoch: 0002 cost= 0.736128449 Train auc= 0.954717 Time elapsed= 0:00:04.398700
Epoch: 0003 cost= 0.606346786 Train auc= 0.958361 Time elapsed= 0:00:06.579539
Epoch: 0004 cost= 0.534644902 Train auc= 0.959066 Time elapsed= 0:00:08.772334
Epoch: 0005 cost= 0.515544415 Train auc= 0.959412 Time elapsed= 0:00:10.850908
Epoch: 0006 cost= 0.376697451 Train auc= 0.960006 Time elapsed= 0:00:13.043700
Epoch: 0007 cost= 0.476166815 Train auc= 0.960606 Time elapsed= 0:00:15.238531
Epoch: 0008 cost= 0.474456370 Train auc= 0.960141 Time elapsed= 0:00:17.355162
Epoch: 0009 cost= 0.457053006 Train auc= 0.959373 Time elapsed= 0:00:19.373531
Epoch: 0010 cost= 0.264274508 Train auc= 0.957305 Time elapsed= 0:00:21.524251
Optimization Finished!
Model saved in file: .\temp_saved_model_1layer.ckpt


### Get auto-encoder embedding (the `encoder_op` tensor) for both train and test data

In [17]:
save_model = os.path.join(data_dir, 'temp_saved_model_1layer.ckpt')
saver = tf.train.Saver()

# Initializing the variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
    now = datetime.now()
    saver.restore(sess, save_model)
    
    test_encoding = sess.run(encoder_op, feed_dict={X: test_x})
    train_encoding = sess.run(encoder_op, feed_dict={X: train_x})
    
    print("Dim for test_encoding and train_encoding are: \n", test_encoding.shape, '\n', train_encoding.shape)

INFO:tensorflow:Restoring parameters from .\temp_saved_model_1layer.ckpt
Dim for test_encoding and train_encoding are: 
 (71202, 15) 
 (213605, 15)


### CrossEntropy-truncated_normal- ADAMOptimizer

In [18]:
#n_input = test_encoding.shape[1]
n_input = test_encoding.shape[1]

hidden_size = 4
output_size = 2

X = tf.placeholder(tf.float32, [None, n_input], name='input_x')
y_ = tf.placeholder(tf.int32, shape=[None, output_size], name='target_y')

weights = {
    'W1': tf.Variable(tf.truncated_normal([n_input, hidden_size])),
    'W2': tf.Variable(tf.truncated_normal([hidden_size, output_size])),
}
biases = {
    'b1': tf.Variable(tf.zeros([hidden_size])),
    'b2': tf.Variable(tf.zeros([output_size])),
}

hidden_layer =  tf.nn.relu(tf.add(tf.matmul(X, weights['W1']), biases['b1']))
pred_logits = tf.add(tf.matmul(hidden_layer, weights['W2']), biases['b2'])
pred_probs = tf.nn.softmax(pred_logits)

cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=pred_logits))

optimizer = tf.train.AdamOptimizer(2e-4).minimize(cross_entropy)


### Prepare the data set
### Now we need to re-split the train into train/val, due to supervised training.
### We will therefore use 80% of out previous training data as our new training, and the remaining 20% as new validation.  
### AUC 95

In [19]:
n_epochs = 80
batch_size = 256

# PREPARE DATA
VAL_PERC = 0.2
all_y_bin = np.zeros((df.shape[0], 2))
all_y_bin[range(df.shape[0]), df['Class'].values] = 1

train_enc_x = train_encoding[:int(train_encoding.shape[0] * (1-VAL_PERC))]
train_enc_y = all_y_bin[:int(train_encoding.shape[0] * (1-VAL_PERC))]

val_enc_x = train_encoding[int(train_encoding.shape[0] * (1-VAL_PERC)):]
val_enc_y = all_y_bin[int(train_encoding.shape[0] * (1-VAL_PERC)):train_encoding.shape[0]]

test_enc_y = all_y_bin[train_encoding.shape[0]:]
print("Num of data for train, val and test are: \n{}, \n{}, \n{}".format(train_enc_x.shape[0], val_enc_x.shape[0], \
                                                                        test_encoding.shape[0]))

# TRAIN STARTS
save_model = os.path.join(data_dir, 'temp_saved_model_FCLayers.ckpt')
saver = tf.train.Saver()

# Initializing the variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
    now = datetime.now()
    sess.run(init)
    total_batch = int(train_enc_x.shape[0]/batch_size)
    # Training cycle
    for epoch in range(n_epochs):
        # Loop over all batches
        for i in range(total_batch):
            batch_idx = np.random.choice(train_enc_x.shape[0], batch_size)
            batch_xs = train_enc_x[batch_idx]
            batch_ys = train_enc_y[batch_idx]

            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([optimizer, cross_entropy], feed_dict={X: batch_xs, y_: batch_ys})
            
        # Display logs per epoch step
        if epoch % display_step == 0:
            val_probs = sess.run(pred_probs, feed_dict={X: val_enc_x})
            print("Epoch:", '%04d' % (epoch+1),
                  "cost=", "{:.9f}".format(c), 
                  "Val auc=", "{:.6f}".format(auc(val_enc_y[:, 1], val_probs[:, 1])), 
                  "Time elapsed=", "{}".format(datetime.now() - now))

    print("Optimization Finished!")
    
    save_path = saver.save(sess, save_model)
    print("Model saved in file: %s" % save_path)
    

Num of data for train, val and test are: 
170884, 
42721, 
71202
Epoch: 0001 cost= 1.155563951 Val auc= 0.447061 Time elapsed= 0:00:01.179452
Epoch: 0002 cost= 0.389483750 Val auc= 0.564113 Time elapsed= 0:00:01.821158
Epoch: 0003 cost= 0.160097718 Val auc= 0.682188 Time elapsed= 0:00:02.451835
Epoch: 0004 cost= 0.065608770 Val auc= 0.754667 Time elapsed= 0:00:03.313175
Epoch: 0005 cost= 0.061806075 Val auc= 0.781916 Time elapsed= 0:00:04.001958
Epoch: 0006 cost= 0.029504806 Val auc= 0.790426 Time elapsed= 0:00:04.637650
Epoch: 0007 cost= 0.011212895 Val auc= 0.793379 Time elapsed= 0:00:05.306610
Epoch: 0008 cost= 0.027531363 Val auc= 0.795063 Time elapsed= 0:00:06.103549
Epoch: 0009 cost= 0.013827392 Val auc= 0.797254 Time elapsed= 0:00:06.969855
Epoch: 0010 cost= 0.006277842 Val auc= 0.799983 Time elapsed= 0:00:07.801095
Epoch: 0011 cost= 0.039207526 Val auc= 0.801882 Time elapsed= 0:00:08.671378
Epoch: 0012 cost= 0.004125549 Val auc= 0.805078 Time elapsed= 0:00:09.427389
Epoch: 0013

### Test the model on the same test data as before - AUC remains constant


In [20]:
save_model = os.path.join(data_dir, 'temp_saved_model_FCLayers.ckpt')
saver = tf.train.Saver()
# Initializing the variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
    now = datetime.now()
    
    saver.restore(sess, save_model)
    
    test_probs = sess.run(pred_probs, feed_dict={X: test_encoding})
    
    print("\nTest auc score: {}".format(auc(test_enc_y[:, 1], test_probs[:, 1])))

INFO:tensorflow:Restoring parameters from .\temp_saved_model_FCLayers.ckpt

Test auc score: 0.9320594444889942


## Let's test a simple supervisied neural network with 2 layers - without using auto-encoder

### Softmax and Relu with Gradient estimation ADAMOptimizer -changing the layer size 

In [21]:
n_epochs = 50
batch_size = 256

#n_input = test_encoding.shape[1]
n_input = train_x.shape[1]

hidden1_size = 8
hidden2_size = 4
output_size = 2

X = tf.placeholder(tf.float32, [None, n_input], name='input_x')
y_ = tf.placeholder(tf.int32, shape=[None, output_size], name='target_y')

weights = {
    'W1': tf.Variable(tf.truncated_normal([n_input, hidden1_size])),
    'W2': tf.Variable(tf.truncated_normal([hidden1_size, hidden2_size])),
    'W3': tf.Variable(tf.truncated_normal([hidden2_size, output_size])),
}
biases = {
    'b1': tf.Variable(tf.zeros([hidden1_size])),
    'b2': tf.Variable(tf.zeros([hidden2_size])),
    'b3': tf.Variable(tf.zeros([output_size])),
}

hidden1_layer =  tf.nn.relu(tf.add(tf.matmul(X, weights['W1']), biases['b1']))
hidden2_layer =  tf.nn.relu(tf.add(tf.matmul(hidden1_layer, weights['W2']), biases['b2']))
pred_logits = tf.add(tf.matmul(hidden2_layer, weights['W3']), biases['b3'])
pred_probs = tf.nn.softmax(pred_logits)

cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=pred_logits))

optimizer = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)


### Prepare the data set: Now we need to re-split the train into train/val. 
### Again, we will use 80% of out previous training data as our new training, and the remaining 20% as new validation. 


In [22]:
# PREPARE DATA
VAL_PERC = 0.2
all_y_bin = np.zeros((df.shape[0], 2))
all_y_bin[range(df.shape[0]), df['Class'].values] = 1

train_enc_x = train_x[:int(train_x.shape[0] * (1-VAL_PERC))]
train_enc_y = all_y_bin[:int(train_x.shape[0] * (1-VAL_PERC))]

val_enc_x = train_x[int(train_encoding.shape[0] *  (1-VAL_PERC)):]
val_enc_y = all_y_bin[int(train_encoding.shape[0] * (1-VAL_PERC)):train_x.shape[0]]

test_enc_y = all_y_bin[train_x.shape[0]:]

print("Num of data for train, val and test are: \n{}, \n{}, \n{}".format(train_enc_x.shape[0], val_enc_x.shape[0], \
                                                                        test_encoding.shape[0]))
# TRAIN STARTS
save_model = os.path.join(data_dir, 'temp_saved_model_FCNNets_raw.ckpt')
saver = tf.train.Saver()

# Initializing the variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
    now = datetime.now()
    sess.run(init)
    total_batch = int(train_enc_x.shape[0]/batch_size)
    # Training cycle
    for epoch in range(n_epochs):
        # Loop over all batches
        for i in range(total_batch):
            batch_idx = np.random.choice(train_enc_x.shape[0], batch_size)
            batch_xs = train_enc_x[batch_idx]
            batch_ys = train_enc_y[batch_idx]
            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([optimizer, cross_entropy], feed_dict={X: batch_xs, y_: batch_ys})
            
        # Display logs per epoch step
        if epoch % display_step == 0:
            val_probs = sess.run(pred_probs, feed_dict={X: val_enc_x})
            print("Epoch:", '%04d' % (epoch+1),
                  "cost=", "{:.9f}".format(c), 
                  "Val auc=", "{:.6f}".format(auc(val_enc_y[:, 1], val_probs[:, 1])), 
                  "Time elapsed=", "{}".format(datetime.now() - now))

    print("Optimization Finished!")
    
    save_path = saver.save(sess, save_model)
    print("Model saved in file: %s" % save_path)
    

Num of data for train, val and test are: 
170884, 
42721, 
71202
Epoch: 0001 cost= 11.543346405 Val auc= 0.814476 Time elapsed= 0:00:01.916098
Epoch: 0002 cost= 7.711556435 Val auc= 0.866564 Time elapsed= 0:00:03.053157
Epoch: 0003 cost= 5.879520893 Val auc= 0.903337 Time elapsed= 0:00:04.197203
Epoch: 0004 cost= 3.347528219 Val auc= 0.921169 Time elapsed= 0:00:05.329176
Epoch: 0005 cost= 1.997199774 Val auc= 0.925576 Time elapsed= 0:00:06.442135
Epoch: 0006 cost= 1.826838970 Val auc= 0.920316 Time elapsed= 0:00:07.547078
Epoch: 0007 cost= 1.040879726 Val auc= 0.904171 Time elapsed= 0:00:08.527683
Epoch: 0008 cost= 0.758096695 Val auc= 0.890673 Time elapsed= 0:00:09.594524
Epoch: 0009 cost= 0.522125483 Val auc= 0.864090 Time elapsed= 0:00:10.706482
Epoch: 0010 cost= 0.328337401 Val auc= 0.814523 Time elapsed= 0:00:11.799383
Epoch: 0011 cost= 0.228455901 Val auc= 0.780235 Time elapsed= 0:00:12.893294
Epoch: 0012 cost= 0.146458179 Val auc= 0.771305 Time elapsed= 0:00:13.960131
Epoch: 001

### AUC with Cross Softmax Entropy (Cost Function)= 97%

### Predict on test data

In [23]:
save_model = os.path.join(data_dir, 'temp_saved_model_FCNNets_raw.ckpt')
saver = tf.train.Saver()
# Initializing the variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
    now = datetime.now()
    
    saver.restore(sess, save_model)
    
    test_probs = sess.run(pred_probs, feed_dict={X: test_x})
    
    print("Test auc score: {}".format(auc(test_enc_y[:, 1], test_probs[:, 1])))

INFO:tensorflow:Restoring parameters from .\temp_saved_model_FCNNets_raw.ckpt
Test auc score: 0.9237228596836218


### Hinge loss entropy (Cost Function) with 70 epochs

In [24]:
 print("Dim for test_encoding and train_encoding are: \n", test_encoding.shape, '\n', train_encoding.shape)
#n_input = test_encoding.shape[1]
n_input1 = test_encoding.shape[1]

hidden_size = 4
output_size = 2

X = tf.placeholder(tf.float32, [None, n_input1], name='input_x')
y_ = tf.placeholder(tf.int32, shape=[None, output_size], name='target_y')

weights = {
   'W1': tf.Variable(tf.truncated_normal([n_input1, hidden_size])),
   'W2': tf.Variable(tf.truncated_normal([hidden_size, output_size])),
}
biases = {
   'b1': tf.Variable(tf.zeros([hidden_size])),
   'b2': tf.Variable(tf.zeros([output_size])),
}

hidden_layer =  tf.nn.relu(tf.add(tf.matmul(X, weights['W1']), biases['b1']))
pred_logits = tf.add(tf.matmul(hidden_layer, weights['W2']), biases['b2'])
pred_probs = tf.nn.softmax(pred_logits)

cross_entropy1 = tf.losses.hinge_loss(labels=y_, logits=pred_logits)

optimizer = tf.train.AdamOptimizer(2e-4).minimize(cross_entropy1)

Dim for test_encoding and train_encoding are: 
 (71202, 15) 
 (213605, 15)


In [25]:
n_epochs = 70
batch_size = 256

# PREPARE DATA
VAL_PERC = 0.2
all_y_bin = np.zeros((df.shape[0], 2))
all_y_bin[range(df.shape[0]), df['Class'].values] = 1

train_enc_x = train_encoding[:int(train_encoding.shape[0] * (1-VAL_PERC))]
train_enc_y = all_y_bin[:int(train_encoding.shape[0] * (1-VAL_PERC))]

val_enc_x = train_encoding[int(train_encoding.shape[0] * (1-VAL_PERC)):]
val_enc_y = all_y_bin[int(train_encoding.shape[0] * (1-VAL_PERC)):train_encoding.shape[0]]

test_enc_y = all_y_bin[train_encoding.shape[0]:]
print("Num of data for train, val and test are: \n{}, \n{}, \n{}".format(train_enc_x.shape[0], val_enc_x.shape[0], \
                                                                       test_encoding.shape[0]))

# TRAIN STARTS
save_model = os.path.join(data_dir, 'temp_saved_model_FCLayers.ckpt')
saver = tf.train.Saver()

# Initializing the variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
   now = datetime.now()
   sess.run(init)
   total_batch = int(train_enc_x.shape[0]/batch_size)
   # Training cycle
   for epoch in range(n_epochs):
       # Loop over all batches
       for i in range(total_batch):
           batch_idx = np.random.choice(train_enc_x.shape[0], batch_size)
           batch_xs = train_enc_x[batch_idx]
           batch_ys = train_enc_y[batch_idx]

           # Run optimization op (backprop) and cost op (to get loss value)
           _, c = sess.run([optimizer, cross_entropy1], feed_dict={X: batch_xs, y_: batch_ys})
           
       # Display logs per epoch step
       if epoch % display_step == 0:
           val_probs = sess.run(pred_probs, feed_dict={X: val_enc_x})
           print("Epoch:", '%04d' % (epoch+1),
                 "cost=", "{:.9f}".format(c),
                 "Val auc=", "{:.6f}".format(auc(val_enc_y[:, 1], val_probs[:, 1])),
                 "Time elapsed=", "{}".format(datetime.now() - now))

   print("Optimization Finished!")
   
   save_path = saver.save(sess, save_model)
   print("Model saved in file: %s" % save_path)

Num of data for train, val and test are: 
170884, 
42721, 
71202
Epoch: 0001 cost= 0.705070674 Val auc= 0.355648 Time elapsed= 0:00:01.649808
Epoch: 0002 cost= 0.231795177 Val auc= 0.496578 Time elapsed= 0:00:02.404327
Epoch: 0003 cost= 0.089236572 Val auc= 0.642158 Time elapsed= 0:00:03.278571
Epoch: 0004 cost= 0.063563339 Val auc= 0.743194 Time elapsed= 0:00:03.968548
Epoch: 0005 cost= 0.017952789 Val auc= 0.804824 Time elapsed= 0:00:04.861027
Epoch: 0006 cost= 0.014160321 Val auc= 0.819732 Time elapsed= 0:00:05.556774
Epoch: 0007 cost= 0.003721347 Val auc= 0.825534 Time elapsed= 0:00:06.437204
Epoch: 0008 cost= 0.008375487 Val auc= 0.828642 Time elapsed= 0:00:07.307527
Epoch: 0009 cost= 0.002765207 Val auc= 0.829902 Time elapsed= 0:00:08.177747
Epoch: 0010 cost= 0.019551896 Val auc= 0.830511 Time elapsed= 0:00:09.103207
Epoch: 0011 cost= 0.006081137 Val auc= 0.831850 Time elapsed= 0:00:09.995580
Epoch: 0012 cost= 0.000141117 Val auc= 0.833488 Time elapsed= 0:00:10.888957
Epoch: 0013

## hinge loss entropy
AUC:88


### Train and test split on time series, using first 75% as training/val, and last 25% as test
### Train and val the model with 1 hidden layer - Tanh and RMSProp Optimizer- Gaussian Initialization-random --Scaled(96) Unscaled(93)
### AUC with Cross Softmax Entropy- Unscaled data(92%), Scaled data(97%)
### AUC with Hinge loss entropy- Unscaled data(87%), Scaled data(88%)
## Best accuracy achieved from-  Softmax and Relu with Gradient estimation ADAM Optimizer on scaled data

## References

The code in the document by Venelin Valkov curiousily-https://github.com/curiousily/Credit-Card-Fraud-Detection-using-Autoencoders-in-Keras/blob/master/LICENSE is licensed under the MIT License https://opensource.org/licenses/MIT

The code in the document by cloudacademy- https://github.com/cloudacademy/fraud-detection/blob/master/LICENSE
is licensed under the Apache License  http://www.apache.org/licenses/

The code in the document by Aymeric Damien- https://github.com/aymericdamien is licensed under the MIT License https://opensource.org/licenses/MIT
