In [1]:
import sys
import os.path
import pandas as pd
import numpy as np 
import tensorflow as tf
import sklearn as sk

from datetime import datetime
from sklearn.model_selection import train_test_split


# Choose device from cmd line. Options: gpu or cpu. 
# Note Tensorflow only works with Nvidia GPU since it is CUDA based
device_name = sys.argv[1]  

if device_name == 'gpu':
    device_name = '/gpu:0'
else:
    device_name = '/cpu:0'

tf.device(device_name)
print('Device used:' + str(device_name))

#datasetCSVPath = sys.argv[2]
datasetCSVPath = "creditcard.csv"
parentPath = os.path.abspath(os.path.join(datasetCSVPath, os.pardir))
print(datasetCSVPath)
print(parentPath)

print('Reading dataset...')
df = pd.read_csv(datasetCSVPath, index_col=False)

print('Adding Class inverse column...')
clsInv = [ 1 - row.Class for index, row in df.iterrows() ]
df = df.assign(classInverse = clsInv)
print('')


print('Raw Data: \n------------------------------ \n')
print(df.head())
print('')
print('Whole dataset size: ' + str(len(df)))
print('')
print('Data Discription\n------------------------------ \n')
print(df.describe())
print('')

X = pd.DataFrame(df,columns=['Time','V1','V2','V3','V4','V5','V6','V7','V8','V9','V10','V11','V12','V13','V14','V15','V16','V17','V18','V19','V20','V21','V22','V23','V24','V25','V26','V27','V28','Amount'])
print('Features Vector:\n------------------------------ \n')
print(X.head())
print('')

Y = pd.DataFrame(df,columns=['Class','classInverse'])
print('Class Vector:\n------------------------------ \n')
print(Y.head())
print('')


print('Splitting Features and Class vectors into training and validation sets by 70/30 ratio...')
#X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.3, shuffle=False)
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.1, shuffle=False)
print('Train features set size: ' + str(len(X_train)))
print('Train Class size: ' + str(len(Y_train)))
print('Train fraud frequency: ')
print(Y_train['Class'].value_counts())
print('')
print('Test features size: ' + str(len(X_test)))
print('Test Class size: ' + str(len(Y_test)))
print('Train fraud frequency: ')
print(Y_test['Class'].value_counts())


# Parameters
learning_rate = 0.0005

training_epochs = 1400

batch_size = 2048
display_step = 1


# Network Parameters
n_hidden_1 = 45 # 1st layer number of features

n_hidden_2 = 45 # 2nd layer number of features

n_input = 30 # Number of feature
n_classes = 2 # Number of classes to predict


# tf Graph input
# x = tf.placeholder(tf.float32, [101, n_input])
# y = tf.placeholder(tf.float32)
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])

# Create model
def multilayer_perceptron(x, weights, biases):
    # Hidden layer with RELU activation
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    # Hidden layer with RELU activation
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)
    # Output layer with linear activation
    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
    return out_layer

# Store layers weight & bias
weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}

biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

# Construct model
pred = multilayer_perceptron(x, weights, biases)

# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Initializing the variables
init = tf.global_variables_initializer()

# Implement get_GPU_tensorflow_session method, to use CPU just return tf.Session() instead
def get_session():
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    return tf.Session(config=config)

print('')   
startTime = datetime.now()
print("Started at: ", startTime)
print('')

saver = tf.train.Saver()

# Launch the graph
with get_session() as sess:

    if os.path.isfile(parentPath + "/model.ckpt"):
        saver.restore(sess, parentPath + "/model.ckpt")
    else:
        sess.run(init)

    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(len(X_train)/batch_size)
        X_batches = np.array_split(X_train, total_batch)
        Y_batches = np.array_split(Y_train, total_batch)
        # Loop over all batches
        for i in range(total_batch):
            batch_x, batch_y = X_batches[i], Y_batches[i]
            #print(batch_x)
            #print(batch_y)
            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
                                                          y: batch_y})
            # Compute average loss
            avg_cost += c / total_batch
        # Display logs per epoch step
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))
    save_path = saver.save(sess, parentPath + "/model.ckpt")

    print('')
    print('Optimization Finished:\n------------------------------ \n')
    print("Model saved in path: %s" % save_path)

    # Test model
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))

    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float" ))
    print("Accuracy:", accuracy.eval({x: X_test, y: Y_test}))

    # Calculate Precision
    y_p = tf.argmax(pred, 1)
    val_accuracy, y_pred = sess.run([accuracy, y_p], feed_dict={x:X_test, y:Y_test})

    y_true = np.array(Y_test['classInverse'],dtype=np.int)
    np.savetxt(parentPath + '/y_true.txt', y_true, fmt='%d')
    np.savetxt(parentPath + '/y_pred.txt', y_pred, fmt='%d')

    print('y_predicted length:',y_pred.size)
    df_pred = pd.DataFrame(data=y_pred)
    print(df_pred[0].value_counts())

    print('')   

    print('y_true labels length:',y_true.size)
    df_true = pd.DataFrame(data=y_true)
    print(df_true[0].value_counts())


    # print("Precision,0,macro:", sk.metrics.precision_score(y_true, y_pred, pos_label=0, average='macro'))
    print("Recall", sk.metrics.recall_score(y_true, y_pred))
    print("\nconfusion_matrix legend:\n")
    print("[True Positives    False Negatives]\n")
    print("[False Positives   True Negatives]\n")
    print("\n\nconfusion_matrix:\n")

    print(sk.metrics.confusion_matrix(y_true, y_pred))
    print('')
    print("Time taken:", datetime.now() - startTime)
    global result 
    result = tf.argmax(pred, 1).eval({x: X_test, y: Y_test})
    


Device used:/cpu:0
creditcard.csv
C:\Users\Kevin\Documents\Kevin\DeepLearning\Final Project
Reading dataset...
Adding Class inverse column...

Raw Data: 
------------------------------ 

   Time        V1        V2        V3        V4        V5        V6        V7  \
0   0.0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
1   0.0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
2   1.0 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
3   1.0 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
4   2.0 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   

         V8        V9      ...            V22       V23       V24       V25  \
0  0.098698  0.363787      ...       0.277838 -0.110474  0.066928  0.128539   
1  0.085102 -0.255425      ...      -0.638672  0.101288 -0.339846  0.167170   
2  0.247676 -1.514654      ...       0.771679  0.909412 -0.689281 -0.327642   
3  0.37743

Epoch: 0049 cost= 1425.281103382
Epoch: 0050 cost= 1267.533685825
Epoch: 0051 cost= 1055.326203873
Epoch: 0052 cost= 829.908508575
Epoch: 0053 cost= 590.370218090
Epoch: 0054 cost= 335.710276735
Epoch: 0055 cost= 72.626038253
Epoch: 0056 cost= 18.692447204
Epoch: 0057 cost= 40.987685624
Epoch: 0058 cost= 55.761810259
Epoch: 0059 cost= 1338.216129503
Epoch: 0060 cost= 1163.931846092
Epoch: 0061 cost= 951.088379801
Epoch: 0062 cost= 724.965604353
Epoch: 0063 cost= 484.633243327
Epoch: 0064 cost= 229.164655312
Epoch: 0065 cost= 18.132250915
Epoch: 0066 cost= 12.406195953
Epoch: 0067 cost= 8.306750536
Epoch: 0068 cost= 8.907852437
Epoch: 0069 cost= 22.360649150
Epoch: 0070 cost= 1394.870030636
Epoch: 0071 cost= 1258.228160728
Epoch: 0072 cost= 1059.003192564
Epoch: 0073 cost= 847.393489263
Epoch: 0074 cost= 622.542086080
Epoch: 0075 cost= 383.528291254
Epoch: 0076 cost= 129.429261189
Epoch: 0077 cost= 5.091578945
Epoch: 0078 cost= 8.754577708
Epoch: 0079 cost= 10.907391371
Epoch: 0080 cost

Epoch: 0311 cost= 10.110601777
Epoch: 0312 cost= 10.149740007
Epoch: 0313 cost= 12.972780008
Epoch: 0314 cost= 933.458893146
Epoch: 0315 cost= 346.821048243
Epoch: 0316 cost= 279.932159567
Epoch: 0317 cost= 625.596599659
Epoch: 0318 cost= 82.365296553
Epoch: 0319 cost= 11.043098042
Epoch: 0320 cost= 11.245541067
Epoch: 0321 cost= 11.122586381
Epoch: 0322 cost= 11.435514654
Epoch: 0323 cost= 10.887542333
Epoch: 0324 cost= 50.639627103
Epoch: 0325 cost= 1470.837631750
Epoch: 0326 cost= 1290.765029755
Epoch: 0327 cost= 1062.663271357
Epoch: 0328 cost= 820.953792633
Epoch: 0329 cost= 564.741551363
Epoch: 0330 cost= 294.768301348
Epoch: 0331 cost= 47.299019695
Epoch: 0332 cost= 11.102656729
Epoch: 0333 cost= 12.025754233
Epoch: 0334 cost= 11.187210680
Epoch: 0335 cost= 9.866692896
Epoch: 0336 cost= 9.664683657
Epoch: 0337 cost= 9.122193866
Epoch: 0338 cost= 9.062557687
Epoch: 0339 cost= 9.292659407
Epoch: 0340 cost= 8.515372171
Epoch: 0341 cost= 8.511669938
Epoch: 0342 cost= 8.378461127
Epo

Epoch: 0575 cost= 10.905864529
Epoch: 0576 cost= 11.060606360
Epoch: 0577 cost= 11.563143510
Epoch: 0578 cost= 10.441165953
Epoch: 0579 cost= 9.253992768
Epoch: 0580 cost= 7.899158373
Epoch: 0581 cost= 5.115158929
Epoch: 0582 cost= 130.124158874
Epoch: 0583 cost= 13.121581630
Epoch: 0584 cost= 11.937367551
Epoch: 0585 cost= 5.825766778
Epoch: 0586 cost= 4.892004677
Epoch: 0587 cost= 36.704844402
Epoch: 0588 cost= 1416.655164576
Epoch: 0589 cost= 1262.991803698
Epoch: 0590 cost= 1053.984731591
Epoch: 0591 cost= 833.232313377
Epoch: 0592 cost= 599.899578654
Epoch: 0593 cost= 353.381550824
Epoch: 0594 cost= 108.521620714
Epoch: 0595 cost= 10.757779716
Epoch: 0596 cost= 12.465372566
Epoch: 0597 cost= 11.847483314
Epoch: 0598 cost= 11.949577723
Epoch: 0599 cost= 11.717933788
Epoch: 0600 cost= 11.663521019
Epoch: 0601 cost= 11.466656039
Epoch: 0602 cost= 11.235855452
Epoch: 0603 cost= 11.133699261
Epoch: 0604 cost= 10.476662395
Epoch: 0605 cost= 6.015260271
Epoch: 0606 cost= 5.190081159
Epoc

Epoch: 0838 cost= 1043.232225260
Epoch: 0839 cost= 814.017390256
Epoch: 0840 cost= 571.655674742
Epoch: 0841 cost= 316.061300997
Epoch: 0842 cost= 62.754255994
Epoch: 0843 cost= 8.226088543
Epoch: 0844 cost= 8.649322467
Epoch: 0845 cost= 8.187241585
Epoch: 0846 cost= 8.402116945
Epoch: 0847 cost= 8.423817218
Epoch: 0848 cost= 8.614806436
Epoch: 0849 cost= 8.322020880
Epoch: 0850 cost= 10.314180011
Epoch: 0851 cost= 8.320039645
Epoch: 0852 cost= 8.302128175
Epoch: 0853 cost= 10.200193016
Epoch: 0854 cost= 818.881591521
Epoch: 0855 cost= 397.329504084
Epoch: 0856 cost= 26.650259699
Epoch: 0857 cost= 8.393140419
Epoch: 0858 cost= 8.502278244
Epoch: 0859 cost= 8.888707696
Epoch: 0860 cost= 10.746942625
Epoch: 0861 cost= 207.938526369
Epoch: 0862 cost= 437.031186138
Epoch: 0863 cost= 15.487651904
Epoch: 0864 cost= 8.518816145
Epoch: 0865 cost= 285.621154945
Epoch: 0866 cost= 11.137220927
Epoch: 0867 cost= 824.924825208
Epoch: 0868 cost= 304.323693771
Epoch: 0869 cost= 7.384888932
Epoch: 087

Epoch: 1101 cost= 72.502223401
Epoch: 1102 cost= 16.618017741
Epoch: 1103 cost= 15.936983610
Epoch: 1104 cost= 15.854197701
Epoch: 1105 cost= 14.507602569
Epoch: 1106 cost= 12.495097786
Epoch: 1107 cost= 11.691432468
Epoch: 1108 cost= 12.533061536
Epoch: 1109 cost= 12.067099822
Epoch: 1110 cost= 12.323933668
Epoch: 1111 cost= 15.935526694
Epoch: 1112 cost= 11.918300205
Epoch: 1113 cost= 9.288574001
Epoch: 1114 cost= 9.590662287
Epoch: 1115 cost= 9.214546914
Epoch: 1116 cost= 27.590955662
Epoch: 1117 cost= 1211.707615326
Epoch: 1118 cost= 1001.011263086
Epoch: 1119 cost= 745.405635122
Epoch: 1120 cost= 476.260798913
Epoch: 1121 cost= 207.191297014
Epoch: 1122 cost= 24.032761482
Epoch: 1123 cost= 16.612222891
Epoch: 1124 cost= 17.255898311
Epoch: 1125 cost= 16.245039401
Epoch: 1126 cost= 15.572729087
Epoch: 1127 cost= 12.599789940
Epoch: 1128 cost= 11.573968537
Epoch: 1129 cost= 13.863069513
Epoch: 1130 cost= 15.885051233
Epoch: 1131 cost= 19.952993249
Epoch: 1132 cost= 17.567989626
Epoc

Epoch: 1363 cost= 41.320134507
Epoch: 1364 cost= 97.389302279
Epoch: 1365 cost= 24.962842792
Epoch: 1366 cost= 27.296702830
Epoch: 1367 cost= 189.086507344
Epoch: 1368 cost= 384.806906254
Epoch: 1369 cost= 723.692535703
Epoch: 1370 cost= 162.841984985
Epoch: 1371 cost= 397.706149281
Epoch: 1372 cost= 23.265237190
Epoch: 1373 cost= 15.130542472
Epoch: 1374 cost= 15.622419947
Epoch: 1375 cost= 15.595861481
Epoch: 1376 cost= 15.153794261
Epoch: 1377 cost= 15.722029632
Epoch: 1378 cost= 15.717569291
Epoch: 1379 cost= 15.839189033
Epoch: 1380 cost= 15.801175198
Epoch: 1381 cost= 15.606057827
Epoch: 1382 cost= 15.467424868
Epoch: 1383 cost= 36.724955681
Epoch: 1384 cost= 562.862275815
Epoch: 1385 cost= 1055.103763237
Epoch: 1386 cost= 679.358124647
Epoch: 1387 cost= 291.912707739
Epoch: 1388 cost= 23.733540576
Epoch: 1389 cost= 15.116531552
Epoch: 1390 cost= 15.564820344
Epoch: 1391 cost= 14.740219616
Epoch: 1392 cost= 14.769350265
Epoch: 1393 cost= 14.487486976
Epoch: 1394 cost= 15.38590924