In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf # Neural network

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

# from subprocess import check_output
# print(check_output(["ls", "../input"]).decode("utf8"))

# Any results you write to the current directory are saved as output.
df = pd.read_csv('../input/creditcard.csv')
df.head()

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
print(plt.style.available)

In [None]:
plt.style.use('ggplot')

In [None]:
plt.scatter(df[df.Class == 0].Time, df[df.Class == 0].Amount)

In [None]:
plt.scatter(df[df.Class == 1].Time, df[df.Class == 1].Amount, c=df[df.Class == 1].Class)

In [None]:
data = df.values

normals = df[df.Class == 0].values[:, 1:30]
frauds = df[df.Class == 1].values[:, 1:30]

training = normals[:-16384]
validation = normals[-16384:]

In [None]:
input_size = 29
hidden_size = 128

# Graph definition
graph = tf.Graph()
with graph.as_default():
    features = tf.placeholder(tf.float32, shape=(None, input_size), name='features')
    is_training = tf.placeholder(tf.bool, name='is_training')

    with tf.name_scope('hidden_layer'):
        fcw = tf.Variable(tf.truncated_normal([input_size, hidden_size],
                                              dtype=tf.float32,
                                              stddev=1e-1), name='weights')
        fcb = tf.Variable(tf.constant(1.0, shape=[hidden_size], dtype=tf.float32), name='biases')
        logits = tf.nn.bias_add(tf.matmul(features, fcw), fcb)
        # logits = tf.layers.batch_normalization(logits, training=is_training)
        logits = tf.nn.relu(logits)

    '''
    with tf.name_scope('secret_layer'):
        fcw = tf.Variable(tf.truncated_normal([hidden_size, hidden_size],
                                              dtype=tf.float32,
                                              stddev=1e-1), name='weights')
        fcb = tf.Variable(tf.constant(1.0, shape=[hidden_size], dtype=tf.float32), name='biases')
        logits = tf.nn.bias_add(tf.matmul(logits, fcw), fcb)
        # logits = tf.layers.batch_normalization(logits, training=is_training)
        logits = tf.nn.relu(logits)
    '''

    with tf.name_scope('output_layer'):
        fcw = tf.Variable(tf.truncated_normal([hidden_size, input_size],
                                              dtype=tf.float32,
                                              stddev=1e-1), name='weights')
        fcb = tf.Variable(tf.constant(1.0, shape=[input_size], dtype=tf.float32), name='biases')
        logits = tf.nn.bias_add(tf.matmul(logits, fcw), fcb)

    # Define loss and optimizer
    batch_losses = tf.sqrt(tf.reduce_sum(tf.pow(tf.subtract(features, logits), 2), 1))
    loss = tf.reduce_mean(batch_losses)
    optimize = tf.train.AdamOptimizer(0.0001).minimize(loss=loss)

In [None]:
 batch_size = 1024

# Run graph
with tf.Session(graph=graph) as sess:
    sess.run(tf.global_variables_initializer())
    losses = []
    
    # Training
    for epoch in range(24):
        epoch_loss = 0.
        for i in range(0, len(training), batch_size):
            start_idx = i
            end_idx = min(i + batch_size, len(normals))

            batch_loss, _ = sess.run([loss, optimize], feed_dict={
                features: training[start_idx: end_idx],
                is_training: True
            })
            # print('Loss at', start_idx, batch_loss)
            losses.append(batch_loss)
            epoch_loss += batch_loss
        # print('Mean Epoch Loss', (epoch_loss / (len(normals) / batch_size)))
        
    # print('Mean Loss', np.mean(losses))
    
    # Evaluation
    batch_loss, valid_predictions = sess.run([loss, batch_losses], feed_dict={
        features: validation,
        is_training: False
    })
    print('Non-fraudulent transactions loss', batch_loss)
    batch_loss, fraud_predictions = sess.run([loss, batch_losses], feed_dict={
        features: frauds,
        is_training: False
    })
    print('Fraudulent transactions Loss', batch_loss)
    
    print('Valid transactions:', np.shape(valid_predictions)[0])
    print('Fraudulent transactions:', np.shape(fraud_predictions)[0])
    
    # Calculation
    true_positives = 0
    false_positives = 0
    true_negatives = 0
    false_negatives = 0
    
    # threshold = 2. # 24 epochs, 2 hidden layers, no batch normalization - 0.87, 0.82, 0.83
    threshold = 2 # 24 epochs, 1x128 hidden layers - 0.89, 0.81, 0.84
    for x in valid_predictions:
        if x > threshold:
            false_positives += 1
        else:
            true_negatives += 1


    for x in fraud_predictions:
        if x > threshold:
            true_positives += 1
        else:
            false_negatives += 1

    print('True fraudulent transactions: {}'.format(true_positives))
    print('False fraudulent transactions: {}'.format(false_positives))
    print('True valid transactions: {}'.format(true_negatives))
    print('False valid transactions: {}'.format(false_negatives))

    precision = true_positives * 1.0 / (true_positives + false_positives)
    recall = true_positives * 1.0 / (true_positives + false_negatives)
    f1_score = 2.0 * (precision * recall) / (precision + recall)
    print('Precision: {:.2}'.format(precision))
    print('Recall: {:.2}'.format(recall))
    print('F1-Score: {:.2}'.format(f1_score))

    plt.figure(figsize=(12, 7))
    plt.plot(losses)
    plt.show()
    
    bins = np.linspace(0, 1000, 100)
    plt.figure(figsize=(12, 7))
    plt.hist(valid_predictions[:492], bins=bins, alpha=0.5, label='valids', color='green')
    plt.hist(fraud_predictions, bins=bins, alpha=0.5, label='frauds', color='red')
    plt.show()