<a href="https://colab.research.google.com/github/coryjcombs/CAPS-Fall-2019/blob/master/TF_2_0_Credit_Fraud_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Neural Network Model: Detecting Credit Fraud**
Base data from https://www.kaggle.com/mlg-ulb/creditcardfraud <br />
Created for use in the Fall 2019 CAPS seminar

###Python Imports

In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals

# TensorFlow 2.x (currently 2.0)
%tensorflow_version 2.x
import tensorflow as tf

# Keras
from tensorflow import keras

# Sklearn
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler

# Helpers
import numpy as np
import pandas as pd
import io

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import pyplot
%matplotlib inline

###Data Import

In [0]:
# Upload data file to server (loads a widget)
from google.colab import files
uploaded = files.upload()

Saving credit_fraud_data.csv to credit_fraud_data (1).csv


KeyboardInterrupt: ignored

In [0]:
# Load data from server into DataFrame
df = pd.read_csv(io.StringIO(uploaded['credit_fraud_data.csv'].decode('utf-8')))

# Remove unnecessary 'Time' column (if not yet already removed)
if 'Time' in df.columns:
  df.drop('Time', axis=1, inplace=True)

KeyError: ignored

In [0]:
df.head(10)

NameError: ignored

###Visualization

In [0]:
# Pairplot - intended for demo on subset of features but not yet set up
# (DO NOT RUN on full set!)

# sns.pairplot(df, hue='Class')

In [0]:
# Correlation plot

pyplot.figure(figsize=(10, 10))
sns.heatmap(df.corr(), annot=False, square=False, annot_kws={"size": 7}, cmap='RdBu')

# Confirms features are uncorrelated (excepting Amount);
# note how we might exclude V9-Amount based on correlations

NameError: ignored

<Figure size 720x720 with 0 Axes>

###Data Preparation

In [0]:
# Separate feature columns and response variable column
features = np.array(df.iloc[:,:-1])  # All except last column, Class
labels = np.array(df.iloc[:,-1:])  # Only last column, Class

In [0]:
# Scale feature data to range(0,1)
stdscale = StandardScaler()
features = stdscale.fit_transform(features)
#print(features)

In [0]:
# Stratify and shuffle for even distribution of classes in training and test data
sss = StratifiedShuffleSplit(n_splits=5, test_size=0.5, random_state=44)

In [0]:
# Get splits
sss.get_n_splits(features, labels)

In [0]:
# Creates training and test sets based on splits
for train_index, test_index in sss.split(features, labels):
  print("Train:", train_index, "Test:", test_index)
  features_train, features_test = features[train_index], features[test_index]
  labels_train, labels_test = labels[train_index], labels[test_index]

In [0]:
# Confirm distribution of classes (1 and 0)
print(labels_train.sum())  # 246 positives in training data
print(labels_test.sum())  # 246 positives in test data

###Modeling

In [0]:
# Instantiate model
# We can specify the number of neurons in each layer and the activation function.
model = keras.Sequential([
  keras.layers.Dense(56, activation='relu'), 
  keras.layers.Dense(28, activation='softmax')
])

In [0]:
# Compile with appropriate optimizer, loss function, and metric(s)
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['binary_accuracy'])

In [0]:
#Fit model to training data
model.fit(features_train, labels_train, epochs=5)

In [0]:
# Evaluate model performance using test data and selected metric(s)
test_loss, test_acc = model.evaluate(features_test, labels_test, verbose=2)
print('Test Loss:', test_loss, ',', 'Test Binary Accuracy:', test_acc)

###Confusion matrix

In [0]:
# Predict values using test features
labels_predicted = model.predict(features_test)

In [0]:
# Copy predictions
labels_predicted_clean = labels_predicted.copy()

# Turn predictions into binary classifications using 0.5 cutoff
labels_predicted_clean[labels_predicted_clean <= 0.5] = 0
labels_predicted_clean[labels_predicted_clean > 0.5] = 1

In [0]:
# Create confusion matrix
conf_mat = tf.math.confusion_matrix(labels=labels_test, predictions=labels_predicted_clean).numpy()

In [0]:
print(conf_mat)

NameError: ignored