## Credit Card Fraud Detection

In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np

df = pd.read_csv('data/creditcard.csv')

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


## Preprocessing

In [2]:
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [3]:
fraud = df[df.Class == 1]
normal = df[df.Class == 0]

In [4]:
X_train = pd.concat([fraud.sample(frac=0.8, random_state=0),
                    normal.sample(frac=0.8, random_state=0)],axis=0)

In [5]:
X_test = df.loc[~df.index.isin(X_train.index)]

In [6]:
from sklearn.utils import shuffle

X_train = shuffle(X_train, random_state=0)
X_test = shuffle(X_test, random_state=0)

In [7]:
y_train = np.zeros((X_train.shape[0], 2), dtype=float)
y_train[:, 1] = X_train.Class
y_train[:, 0] = 1 - y_train[:, 1]

y_test = np.zeros((X_test.shape[0], 2), dtype=float)
y_test[:, 1] = X_test.Class
y_test[:, 0] = 1 - y_test[:, 1]

In [8]:
ratio = len(X_train) / len(X_train[X_train.Class == 1])
# ratio = 2
y_train[:,1] *= ratio

In [9]:
fields = ['Class']
X_train = X_train.drop(fields, axis=1)
X_test = X_test.drop(fields, axis=1)

In [10]:
split = (X_train.shape[0] * 7) // 8

inputX = X_train.as_matrix()[:split]
inputX_valid = X_train.as_matrix()[split:]
inputX_test = X_test.as_matrix()

inputY = y_train[:split]
inputY_valid = y_train[split:]
inputY_test = y_test

  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.
  """


In [11]:
features = X_train.columns.values
for feature in features:
    mean = X_train[feature].mean()
    std = X_train[feature].std()
    X_train.loc[:, feature] = \
        (X_train[feature] - mean) / std
    X_test.loc[:, feature] = \
        (X_test[feature] - mean) / std

## Model Implementation

In [12]:
input_nodes = X_train.shape[1]  # input_nodes = 30
X = tf.placeholder(tf.float32, [None, input_nodes])
y_ = tf.placeholder(tf.float32, [None, 2])
pkeep = tf.placeholder(tf.float32)

In [13]:
hidden_nodes1 = 60
hidden_nodes2 = 30
hidden_nodes3 = 15
W1 = tf.Variable(tf.zeros([input_nodes, hidden_nodes1]))
b1 = tf.Variable(tf.zeros([hidden_nodes1]))
y1 = tf.nn.sigmoid(tf.matmul(X, W1) + b1)

Instructions for updating:
Colocations handled automatically by placer.


In [14]:
W2 = tf.Variable(tf.zeros([hidden_nodes1, hidden_nodes2]))
b2 = tf.Variable(tf.zeros([hidden_nodes2]))
y2 = tf.nn.sigmoid(tf.matmul(y1, W2) + b2)

In [15]:
W3 = tf.Variable(tf.zeros([hidden_nodes2, hidden_nodes3]))
b3 = tf.Variable(tf.zeros([hidden_nodes3]))
y3 = tf.nn.sigmoid(tf.matmul(y2, W3) + b3)
y3 = tf.nn.dropout(y3, pkeep)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [16]:
W4 = tf.Variable(tf.zeros([hidden_nodes3, 2]))
b4 = tf.Variable(tf.zeros([2]))
y4 = tf.nn.softmax(tf.matmul(y3, W4) + b4)
y = y4

## Training

In [17]:
learning_rate = 0.005
cost = -tf.reduce_sum(y_ * tf.log(y))
optimizer = tf.train.AdamOptimizer(learning_rate)
optimizer = optimizer.minimize(cost)

In [18]:
labels = tf.argmax(y_,1)
predictions = tf.argmax(y, 1)
correct = tf.equal(labels, predictions)
accuracy = tf.reduce_mean(tf.cast(correct,tf.float32))

In [19]:
_, pre_op = tf.metrics.precision(labels, predictions)
_, rec_op = tf.metrics.recall(labels, predictions)

Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


In [20]:
n_epochs = 100
n_samples = inputY.shape[0]
batch_size = 2048
n_batches = n_samples // batch_size
dropout = 0.9

In [21]:
def print_epoch(epoch):
    trn_res = sess.run([accuracy, pre_op, rec_op, cost],
                      feed_dict = {X: inputX,
                                  y_: inputY,
                                  pkeep: dropout})
    vld_res = sess.run([accuracy, pre_op, rec_op, cost],
                      feed_dict = {X: inputX_valid,
                                  y_: inputY_valid,
                                  pkeep: 1})
    print('Epoch: {}'.format(epoch))
    print('Training ({:.4f},{:.4f},{:.4f},{:.1f})'.format(*trn_res))
    print('Training ({:.4f},{:.4f},{:.4f},{:.1f})'.format(*vld_res))

In [22]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    for epoch in range(n_epochs): 
        for batch in range(n_batches):
            idx1 = batch * batch_size
            idx2 = (1 + batch) * batch_size
            batch_x = inputX[idx1:idx2]
            batch_y = inputY[idx1:idx2]

            sess.run([optimizer], feed_dict={X: batch_x, y_: batch_y, pkeep: dropout})

        if epoch % 5 == 0:
            print_epoch(epoch)

Epoch: 0
Training (0.9849,0.0923,0.8834,199343.5)
Training (0.9843,0.0926,0.8858,28824.0)
Epoch: 5
Training (0.9840,0.0917,0.9009,63652.9)
Training (0.9833,0.0915,0.9010,10115.9)
Epoch: 10
Training (0.9804,0.0861,0.9116,59823.2)
Training (0.9791,0.0854,0.9112,10379.8)
Epoch: 15
Training (0.9821,0.0848,0.9167,59182.6)
Training (0.9811,0.0846,0.9162,10412.6)
Epoch: 20
Training (0.9808,0.0833,0.9197,58371.7)
Training (0.9795,0.0830,0.9193,10655.4)
Epoch: 25
Training (0.9819,0.0828,0.9213,58310.9)
Training (0.9808,0.0827,0.9209,10713.6)
Epoch: 30
Training (0.9798,0.0815,0.9228,58342.4)
Training (0.9788,0.0813,0.9224,10834.5)
Epoch: 35
Training (0.9821,0.0815,0.9236,58222.7)
Training (0.9808,0.0814,0.9232,10805.2)
Epoch: 40
Training (0.9796,0.0806,0.9245,58499.4)
Training (0.9783,0.0804,0.9241,10890.6)
Epoch: 45
Training (0.9811,0.0802,0.9249,58567.0)
Training (0.9797,0.0802,0.9246,10865.7)
Epoch: 50
Training (0.9817,0.0803,0.9255,58791.3)
Training (0.9807,0.0802,0.9252,10652.8)
Epoch: 55
T

In [29]:
print(ratio)

578.2893401015228
