In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os
from datetime import datetime 
from sklearn.metrics import roc_auc_score as auc 
import seaborn as sns

In [2]:
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
%matplotlib inline

## Data Exploration

In [3]:
df = pd.read_csv("creditcards.csv")

In [4]:
df.shape

(284807, 31)

In [5]:
print("Total time spanning: {:.1f} days".format(df['Time'].max() / (3600 * 24.0)))
print("{:.3f} % of all transactions are fraud. ".format(np.sum(df['Class']) / df.shape[0] * 100))

Total time spanning: 2.0 days
0.173 % of all transactions are fraud. 


In [6]:
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [7]:
df.columns

Index(['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
       'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
       'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount',
       'Class'],
      dtype='object')

In [8]:
df.dtypes

Time      float64
V1        float64
V2        float64
V3        float64
V4        float64
V5        float64
V6        float64
V7        float64
V8        float64
V9        float64
V10       float64
V11       float64
V12       float64
V13       float64
V14       float64
V15       float64
V16       float64
V17       float64
V18       float64
V19       float64
V20       float64
V21       float64
V22       float64
V23       float64
V24       float64
V25       float64
V26       float64
V27       float64
V28       float64
Amount    float64
Class       int64
dtype: object

## Train and test split on time series, using first 75% as training/val, and last 25% as test

In [10]:
TEST_RATIO = 0.25
df.sort_values('Time', inplace = True)
TRA_INDEX = int((1-TEST_RATIO) * df.shape[0])
train_x = df.iloc[:TRA_INDEX, 1:-2].values
train_y = df.iloc[:TRA_INDEX, -1].values

test_x = df.iloc[TRA_INDEX:, 1:-2].values
test_y = df.iloc[TRA_INDEX:, -1].values

In [11]:
print("Total train examples: {}, total fraud cases: {}, equal to {:.5f} of total cases. ".format(train_x.shape[0], np.sum(train_y), np.sum(train_y)/train_x.shape[0]))

Total train examples: 213605, total fraud cases: 398, equal to 0.00186 of total cases. 


In [12]:
print("Total test examples: {}, total fraud cases: {}, equal to {:.5f} of total cases. ".format(test_x.shape[0], np.sum(test_y), np.sum(test_y)/test_y.shape[0]))

Total test examples: 71202, total fraud cases: 94, equal to 0.00132 of total cases. 


In [13]:
cols_mean = []
cols_std = []
for c in range(train_x.shape[1]):
    cols_mean.append(train_x[:,c].mean())
    cols_std.append(train_x[:,c].std())
    train_x[:, c] = (train_x[:, c] - cols_mean[-1]) / cols_std[-1]
    test_x[:, c] =  (test_x[:, c] - cols_mean[-1]) / cols_std[-1]

## Modelling and results


## 1. Auto-encoder as unsupervised learning

In [14]:
# Parameters
learning_rate = 0.001
training_epochs = 10
batch_size = 256
display_step = 1

# Network Parameters
n_hidden_1 = 15 # 1st layer num features
#n_hidden_2 = 15 # 2nd layer num features
n_input = train_x.shape[1] # MNIST data input (img shape: 28*28)
data_dir = '.'

### Train and val the model with 1 hidden layer - Tanh and RMSProp Optimizer- Gaussian Initialization-random


In [15]:
X = tf.placeholder("float", [None, n_input])

weights = {
    'encoder_h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])), 
    'decoder_h1': tf.Variable(tf.random_normal([n_hidden_1, n_input])),
    
}
biases = {
    'encoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'decoder_b1': tf.Variable(tf.random_normal([n_input])),
}


# Building the encoder
def encoder(x):
    # Encoder Hidden layer with tanh activation #1
    layer_1 = tf.nn.tanh(tf.add(tf.matmul(x, weights['encoder_h1']),
                                   biases['encoder_b1'])
    return layer_1


# Building the decoder
def decoder(x):
    # Encoder Hidden layer with tanh activation #1
    layer_1 = tf.nn.tanh(tf.add(tf.matmul(x, weights['decoder_h1']),
                                   biases['decoder_b1']))
    return layer_1

# Construct model
encoder_op = encoder(X)
decoder_op = decoder(encoder_op)

# Prediction
y_pred = decoder_op
# Targets (Labels) are the input data.
y_true = X

# Define batch mse
batch_mse = tf.reduce_mean(tf.pow(y_true - y_pred, 2), 1)

# Define loss and optimizer, minimize the squared error
cost = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)

# TRAIN StARTS
save_model = os.path.join(data_dir, 'temp_saved_model_1layer.ckpt')
saver = tf.train.Saver()

# Initializing the variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
    now = datetime.now()
    sess.run(init)
    total_batch = int(train_x.shape[0]/batch_size)
    # Training cycle
    for epoch in range(training_epochs):
        # Loop over all batches
        for i in range(total_batch):
            batch_idx = np.random.choice(train_x.shape[0], batch_size)
            batch_xs = train_x[batch_idx]
            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([optimizer, cost], feed_dict={X: batch_xs})
            
        # Display logs per epoch step
        if epoch % display_step == 0:
            train_batch_mse = sess.run(batch_mse, feed_dict={X: train_x})
            print("Epoch:", '%04d' % (epoch+1),
                  "cost=", "{:.9f}".format(c), 
                  "Train auc=", "{:.6f}".format(auc(train_y, train_batch_mse)), 
                  "Time elapsed=", "{}".format(datetime.now() - now))

    print("Optimization Finished!")
    
    save_path = saver.save(sess, save_model)
    print("Model saved in file: %s" % save_path)

Epoch: 0001 cost= 1.478872895 Train auc= 0.957891 Time elapsed= 0:00:02.320953
Epoch: 0002 cost= 0.985197067 Train auc= 0.958765 Time elapsed= 0:00:05.270322
Epoch: 0003 cost= 1.790869951 Train auc= 0.958700 Time elapsed= 0:00:07.611269
Epoch: 0004 cost= 0.577246010 Train auc= 0.959454 Time elapsed= 0:00:09.791744
Epoch: 0005 cost= 0.461763293 Train auc= 0.960296 Time elapsed= 0:00:11.992906
Epoch: 0006 cost= 0.473216265 Train auc= 0.961003 Time elapsed= 0:00:14.230938
Epoch: 0007 cost= 0.842373073 Train auc= 0.960281 Time elapsed= 0:00:16.427414
Epoch: 0008 cost= 0.325677723 Train auc= 0.957533 Time elapsed= 0:00:18.637305
Epoch: 0009 cost= 0.386690944 Train auc= 0.956865 Time elapsed= 0:00:20.898712
Epoch: 0010 cost= 0.366356373 Train auc= 0.957167 Time elapsed= 0:00:23.277154
Optimization Finished!
Model saved in file: .\temp_saved_model_1layer.ckpt


### 1) Get auto-encoder embedding (the `encoder_op` tensor) for both train and test data

In [16]:
save_model = os.path.join(data_dir, 'temp_saved_model_1layer.ckpt')
saver = tf.train.Saver()

# Initializing the variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
    now = datetime.now()
    saver.restore(sess, save_model)
    
    test_encoding = sess.run(encoder_op, feed_dict={X: test_x})
    train_encoding = sess.run(encoder_op, feed_dict={X: train_x})
    
    print("Dim for test_encoding and train_encoding are: \n", test_encoding.shape, '\n', train_encoding.shape)

INFO:tensorflow:Restoring parameters from .\temp_saved_model_1layer.ckpt
Dim for test_encoding and train_encoding are: 
 (71202, 15) 
 (213605, 15)


### 2) CrossEntropy-truncated_normal- ADAM

In [17]:
#n_input = test_encoding.shape[1]
n_input = test_encoding.shape[1]

hidden_size = 4
output_size = 2

X = tf.placeholder(tf.float32, [None, n_input], name='input_x')
y_ = tf.placeholder(tf.int32, shape=[None, output_size], name='target_y')

weights = {
    'W1': tf.Variable(tf.truncated_normal([n_input, hidden_size])),
    'W2': tf.Variable(tf.truncated_normal([hidden_size, output_size])),
}
biases = {
    'b1': tf.Variable(tf.zeros([hidden_size])),
    'b2': tf.Variable(tf.zeros([output_size])),
}

hidden_layer =  tf.nn.relu(tf.add(tf.matmul(X, weights['W1']), biases['b1']))
pred_logits = tf.add(tf.matmul(hidden_layer, weights['W2']), biases['b2'])
pred_probs = tf.nn.softmax(pred_logits)

cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=pred_logits))

optimizer = tf.train.AdamOptimizer(2e-4).minimize(cross_entropy)


### 3) Prepare the data set. 
### Now we need to re-split the train into train/val, due to supervised training.
### We will therefore use 80% of out previous training data as our new training, and the remaining 20% as new validation.  
### AUC 95

In [18]:
n_epochs = 80
batch_size = 256

# PREPARE DATA
VAL_PERC = 0.2
all_y_bin = np.zeros((df.shape[0], 2))
all_y_bin[range(df.shape[0]), df['Class'].values] = 1

train_enc_x = train_encoding[:int(train_encoding.shape[0] * (1-VAL_PERC))]
train_enc_y = all_y_bin[:int(train_encoding.shape[0] * (1-VAL_PERC))]

val_enc_x = train_encoding[int(train_encoding.shape[0] * (1-VAL_PERC)):]
val_enc_y = all_y_bin[int(train_encoding.shape[0] * (1-VAL_PERC)):train_encoding.shape[0]]

test_enc_y = all_y_bin[train_encoding.shape[0]:]
print("Num of data for train, val and test are: \n{}, \n{}, \n{}".format(train_enc_x.shape[0], val_enc_x.shape[0], \
                                                                        test_encoding.shape[0]))

# TRAIN STARTS
save_model = os.path.join(data_dir, 'temp_saved_model_FCLayers.ckpt')
saver = tf.train.Saver()

# Initializing the variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
    now = datetime.now()
    sess.run(init)
    total_batch = int(train_enc_x.shape[0]/batch_size)
    # Training cycle
    for epoch in range(n_epochs):
        # Loop over all batches
        for i in range(total_batch):
            batch_idx = np.random.choice(train_enc_x.shape[0], batch_size)
            batch_xs = train_enc_x[batch_idx]
            batch_ys = train_enc_y[batch_idx]

            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([optimizer, cross_entropy], feed_dict={X: batch_xs, y_: batch_ys})
            
        # Display logs per epoch step
        if epoch % display_step == 0:
            val_probs = sess.run(pred_probs, feed_dict={X: val_enc_x})
            print("Epoch:", '%04d' % (epoch+1),
                  "cost=", "{:.9f}".format(c), 
                  "Val auc=", "{:.6f}".format(auc(val_enc_y[:, 1], val_probs[:, 1])), 
                  "Time elapsed=", "{}".format(datetime.now() - now))

    print("Optimization Finished!")
    
    save_path = saver.save(sess, save_model)
    print("Model saved in file: %s" % save_path)
    

Num of data for train, val and test are: 
170884, 
42721, 
71202
Epoch: 0001 cost= 0.238808006 Val auc= 0.815804 Time elapsed= 0:00:01.172733
Epoch: 0002 cost= 0.102786362 Val auc= 0.827126 Time elapsed= 0:00:02.013724
Epoch: 0003 cost= 0.068595916 Val auc= 0.843399 Time elapsed= 0:00:02.876446
Epoch: 0004 cost= 0.044349782 Val auc= 0.860015 Time elapsed= 0:00:03.749218
Epoch: 0005 cost= 0.018551897 Val auc= 0.872920 Time elapsed= 0:00:04.619408
Epoch: 0006 cost= 0.011627660 Val auc= 0.883144 Time elapsed= 0:00:05.473859
Epoch: 0007 cost= 0.009519242 Val auc= 0.890153 Time elapsed= 0:00:06.348519
Epoch: 0008 cost= 0.005414076 Val auc= 0.897618 Time elapsed= 0:00:07.200052
Epoch: 0009 cost= 0.003358477 Val auc= 0.905037 Time elapsed= 0:00:08.067150
Epoch: 0010 cost= 0.001853112 Val auc= 0.911442 Time elapsed= 0:00:08.934362
Epoch: 0011 cost= 0.001572615 Val auc= 0.920207 Time elapsed= 0:00:09.815685
Epoch: 0012 cost= 0.001371119 Val auc= 0.926485 Time elapsed= 0:00:10.668089
Epoch: 0013

### 4) Test the model on the same test data as before - AUC decreased - 0.93


In [19]:
save_model = os.path.join(data_dir, 'temp_saved_model_FCLayers.ckpt')
saver = tf.train.Saver()
# Initializing the variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
    now = datetime.now()
    
    saver.restore(sess, save_model)
    
    test_probs = sess.run(pred_probs, feed_dict={X: test_encoding})
    
    print("\nTest auc score: {}".format(auc(test_enc_y[:, 1], test_probs[:, 1])))

INFO:tensorflow:Restoring parameters from .\temp_saved_model_FCLayers.ckpt

Test auc score: 0.9396701331747094


## Let's test a simple supervisied neural network with 2 layers - without using auto-encoder

### Softmax and Relu with Gradient estimation ADAM -changing the layer size 

In [20]:
n_epochs = 50
batch_size = 256

#n_input = test_encoding.shape[1]
n_input = train_x.shape[1]

hidden1_size = 8
hidden2_size = 4
output_size = 2

X = tf.placeholder(tf.float32, [None, n_input], name='input_x')
y_ = tf.placeholder(tf.int32, shape=[None, output_size], name='target_y')

weights = {
    'W1': tf.Variable(tf.truncated_normal([n_input, hidden1_size])),
    'W2': tf.Variable(tf.truncated_normal([hidden1_size, hidden2_size])),
    'W3': tf.Variable(tf.truncated_normal([hidden2_size, output_size])),
}
biases = {
    'b1': tf.Variable(tf.zeros([hidden1_size])),
    'b2': tf.Variable(tf.zeros([hidden2_size])),
    'b3': tf.Variable(tf.zeros([output_size])),
}

hidden1_layer =  tf.nn.relu(tf.add(tf.matmul(X, weights['W1']), biases['b1']))
hidden2_layer =  tf.nn.relu(tf.add(tf.matmul(hidden1_layer, weights['W2']), biases['b2']))
pred_logits = tf.add(tf.matmul(hidden2_layer, weights['W3']), biases['b3'])
pred_probs = tf.nn.softmax(pred_logits)

cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=pred_logits))

optimizer = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)


### Prepare the data set. Now we need to re-split the train into train/val. 
### Again, we will use 80% of out previous training data as our new training, and the remaining 20% as new validation. 


In [21]:
# PREPARE DATA
VAL_PERC = 0.2
all_y_bin = np.zeros((df.shape[0], 2))
all_y_bin[range(df.shape[0]), df['Class'].values] = 1

train_enc_x = train_x[:int(train_x.shape[0] * (1-VAL_PERC))]
train_enc_y = all_y_bin[:int(train_x.shape[0] * (1-VAL_PERC))]

val_enc_x = train_x[int(train_encoding.shape[0] *  (1-VAL_PERC)):]
val_enc_y = all_y_bin[int(train_encoding.shape[0] * (1-VAL_PERC)):train_x.shape[0]]

test_enc_y = all_y_bin[train_x.shape[0]:]

print("Num of data for train, val and test are: \n{}, \n{}, \n{}".format(train_enc_x.shape[0], val_enc_x.shape[0], \
                                                                        test_encoding.shape[0]))
# TRAIN STARTS
save_model = os.path.join(data_dir, 'temp_saved_model_FCNNets_raw.ckpt')
saver = tf.train.Saver()

# Initializing the variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
    now = datetime.now()
    sess.run(init)
    total_batch = int(train_enc_x.shape[0]/batch_size)
    # Training cycle
    for epoch in range(n_epochs):
        # Loop over all batches
        for i in range(total_batch):
            batch_idx = np.random.choice(train_enc_x.shape[0], batch_size)
            batch_xs = train_enc_x[batch_idx]
            batch_ys = train_enc_y[batch_idx]
            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([optimizer, cross_entropy], feed_dict={X: batch_xs, y_: batch_ys})
            
        # Display logs per epoch step
        if epoch % display_step == 0:
            val_probs = sess.run(pred_probs, feed_dict={X: val_enc_x})
            print("Epoch:", '%04d' % (epoch+1),
                  "cost=", "{:.9f}".format(c), 
                  "Val auc=", "{:.6f}".format(auc(val_enc_y[:, 1], val_probs[:, 1])), 
                  "Time elapsed=", "{}".format(datetime.now() - now))

    print("Optimization Finished!")
    
    save_path = saver.save(sess, save_model)
    print("Model saved in file: %s" % save_path)
    

Num of data for train, val and test are: 
170884, 
42721, 
71202
Epoch: 0001 cost= 0.273930579 Val auc= 0.134462 Time elapsed= 0:00:01.694410
Epoch: 0002 cost= 0.053971462 Val auc= 0.122343 Time elapsed= 0:00:02.674651
Epoch: 0003 cost= 0.573769450 Val auc= 0.122879 Time elapsed= 0:00:03.703272
Epoch: 0004 cost= 0.376451492 Val auc= 0.135745 Time elapsed= 0:00:04.860840
Epoch: 0005 cost= 0.024410944 Val auc= 0.159853 Time elapsed= 0:00:05.976339
Epoch: 0006 cost= 0.175251752 Val auc= 0.193141 Time elapsed= 0:00:07.086186
Epoch: 0007 cost= 0.015292783 Val auc= 0.241998 Time elapsed= 0:00:08.231790
Epoch: 0008 cost= 0.014697665 Val auc= 0.286177 Time elapsed= 0:00:09.338120
Epoch: 0009 cost= 0.010906655 Val auc= 0.331807 Time elapsed= 0:00:10.459901
Epoch: 0010 cost= 0.034842875 Val auc= 0.384543 Time elapsed= 0:00:11.596600
Epoch: 0011 cost= 0.007506951 Val auc= 0.431591 Time elapsed= 0:00:12.667259
Epoch: 0012 cost= 0.059454374 Val auc= 0.481831 Time elapsed= 0:00:13.752805
Epoch: 0013

### AUC with Cross Softmax Entropy= 92%

### Predict on test data

In [22]:
save_model = os.path.join(data_dir, 'temp_saved_model_FCNNets_raw.ckpt')
saver = tf.train.Saver()
# Initializing the variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
    now = datetime.now()
    
    saver.restore(sess, save_model)
    
    test_probs = sess.run(pred_probs, feed_dict={X: test_x})
    
    print("Test auc score: {}".format(auc(test_enc_y[:, 1], test_probs[:, 1])))

INFO:tensorflow:Restoring parameters from .\temp_saved_model_FCNNets_raw.ckpt
Test auc score: 0.944725673503535


### Hinge loss entropy with 40 epochs

In [23]:
 print("Dim for test_encoding and train_encoding are: \n", test_encoding.shape, '\n', train_encoding.shape)
#n_input = test_encoding.shape[1]
n_input1 = test_encoding.shape[1]

hidden_size = 4
output_size = 2

X = tf.placeholder(tf.float32, [None, n_input1], name='input_x')
y_ = tf.placeholder(tf.int32, shape=[None, output_size], name='target_y')

weights = {
   'W1': tf.Variable(tf.truncated_normal([n_input1, hidden_size])),
   'W2': tf.Variable(tf.truncated_normal([hidden_size, output_size])),
}
biases = {
   'b1': tf.Variable(tf.zeros([hidden_size])),
   'b2': tf.Variable(tf.zeros([output_size])),
}

hidden_layer =  tf.nn.relu(tf.add(tf.matmul(X, weights['W1']), biases['b1']))
pred_logits = tf.add(tf.matmul(hidden_layer, weights['W2']), biases['b2'])
pred_probs = tf.nn.softmax(pred_logits)

cross_entropy1 = tf.losses.hinge_loss(labels=y_, logits=pred_logits)

optimizer = tf.train.AdamOptimizer(2e-4).minimize(cross_entropy1)

Dim for test_encoding and train_encoding are: 
 (71202, 15) 
 (213605, 15)


In [24]:
n_epochs = 40
batch_size = 256

# PREPARE DATA
VAL_PERC = 0.2
all_y_bin = np.zeros((df.shape[0], 2))
all_y_bin[range(df.shape[0]), df['Class'].values] = 1

train_enc_x = train_encoding[:int(train_encoding.shape[0] * (1-VAL_PERC))]
train_enc_y = all_y_bin[:int(train_encoding.shape[0] * (1-VAL_PERC))]

val_enc_x = train_encoding[int(train_encoding.shape[0] * (1-VAL_PERC)):]
val_enc_y = all_y_bin[int(train_encoding.shape[0] * (1-VAL_PERC)):train_encoding.shape[0]]

test_enc_y = all_y_bin[train_encoding.shape[0]:]
print("Num of data for train, val and test are: \n{}, \n{}, \n{}".format(train_enc_x.shape[0], val_enc_x.shape[0], \
                                                                       test_encoding.shape[0]))

# TRAIN STARTS
save_model = os.path.join(data_dir, 'temp_saved_model_FCLayers.ckpt')
saver = tf.train.Saver()

# Initializing the variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
   now = datetime.now()
   sess.run(init)
   total_batch = int(train_enc_x.shape[0]/batch_size)
   # Training cycle
   for epoch in range(n_epochs):
       # Loop over all batches
       for i in range(total_batch):
           batch_idx = np.random.choice(train_enc_x.shape[0], batch_size)
           batch_xs = train_enc_x[batch_idx]
           batch_ys = train_enc_y[batch_idx]

           # Run optimization op (backprop) and cost op (to get loss value)
           _, c = sess.run([optimizer, cross_entropy1], feed_dict={X: batch_xs, y_: batch_ys})
           
       # Display logs per epoch step
       if epoch % display_step == 0:
           val_probs = sess.run(pred_probs, feed_dict={X: val_enc_x})
           print("Epoch:", '%04d' % (epoch+1),
                 "cost=", "{:.9f}".format(c),
                 "Val auc=", "{:.6f}".format(auc(val_enc_y[:, 1], val_probs[:, 1])),
                 "Time elapsed=", "{}".format(datetime.now() - now))

   print("Optimization Finished!")
   
   save_path = saver.save(sess, save_model)
   print("Model saved in file: %s" % save_path)

Num of data for train, val and test are: 
170884, 
42721, 
71202
Epoch: 0001 cost= 0.388080537 Val auc= 0.872042 Time elapsed= 0:00:02.361014
Epoch: 0002 cost= 0.184989512 Val auc= 0.841299 Time elapsed= 0:00:03.174040
Epoch: 0003 cost= 0.104796737 Val auc= 0.815421 Time elapsed= 0:00:03.982850
Epoch: 0004 cost= 0.032521646 Val auc= 0.809020 Time elapsed= 0:00:04.768262
Epoch: 0005 cost= 0.025235254 Val auc= 0.814739 Time elapsed= 0:00:05.613985
Epoch: 0006 cost= 0.015862674 Val auc= 0.832547 Time elapsed= 0:00:06.454091
Epoch: 0007 cost= 0.017648680 Val auc= 0.851951 Time elapsed= 0:00:07.349193
Epoch: 0008 cost= 0.002816314 Val auc= 0.860884 Time elapsed= 0:00:08.216149
Epoch: 0009 cost= 0.000000000 Val auc= 0.859376 Time elapsed= 0:00:09.121139
Epoch: 0010 cost= 0.001977457 Val auc= 0.859405 Time elapsed= 0:00:10.045152
Epoch: 0011 cost= 0.000000000 Val auc= 0.859200 Time elapsed= 0:00:11.018658
Epoch: 0012 cost= 0.006664959 Val auc= 0.860927 Time elapsed= 0:00:11.945272
Epoch: 0013

## hinge loss entropy
AUC:93
