In [67]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import sklearn
from sklearn import preprocessing
import matplotlib.pyplot as plt


In [125]:
ccard = pd.read_csv('creditcard.csv')

In [126]:
ccard = sklearn.utils.shuffle(ccard)
X = ccard.drop("V1", axis = 1).values
y = ccard["Class"].values

In [128]:
y

array([0, 0, 0, ..., 0, 0, 0])

In [129]:
ccard = ccard.drop(['V28','V27','V26','V25','V24','V23','V22','V20','V15','V13','V8'], axis =1)

In [130]:
ccard['V1_'] = ccard.V1.map(lambda x: 1 if x < -3 else 0)
ccard['V2_'] = ccard.V2.map(lambda x: 1 if x > 2.5 else 0)
ccard['V3_'] = ccard.V3.map(lambda x: 1 if x < -4 else 0)
ccard['V4_'] = ccard.V4.map(lambda x: 1 if x > 2.5 else 0)
ccard['V5_'] = ccard.V5.map(lambda x: 1 if x < -4.5 else 0)
ccard['V6_'] = ccard.V6.map(lambda x: 1 if x < -2.5 else 0)
ccard['V7_'] = ccard.V7.map(lambda x: 1 if x < -3 else 0)
ccard['V9_'] = ccard.V9.map(lambda x: 1 if x < -2 else 0)
ccard['V10_'] = ccard.V10.map(lambda x: 1 if x < -2.5 else 0)
ccard['V11_'] = ccard.V11.map(lambda x: 1 if x > 2 else 0)
ccard['V12_'] = ccard.V12.map(lambda x: 1 if x < -2 else 0)
ccard['V14_'] = ccard.V14.map(lambda x: 1 if x < -2.5 else 0)
ccard['V16_'] = ccard.V16.map(lambda x: 1 if x < -2 else 0)
ccard['V17_'] = ccard.V17.map(lambda x: 1 if x < -2 else 0)
ccard['V18_'] = ccard.V18.map(lambda x: 1 if x < -2 else 0)
ccard['V19_'] = ccard.V19.map(lambda x: 1 if x > 1.5 else 0)
ccard['V21_'] = ccard.V21.map(lambda x: 1 if x > 0.6 else 0)

In [131]:
df = ccard

In [132]:
#Create a new feature for normal (non-fraudulent) transactions.
df.loc[df.Class == 0, 'Normal'] = 1
df.loc[df.Class == 1, 'Normal'] = 0

In [133]:
df = df.rename(columns={'Class': 'Fraud'})

In [134]:
print(df.Normal.value_counts())
print()
print(df.Fraud.value_counts())

1.0    284315
0.0       492
Name: Normal, dtype: int64

0    284315
1       492
Name: Fraud, dtype: int64


In [135]:
Fraud = df[df.Fraud == 1]
Normal = df[df.Normal == 1]

# Set X_train equal to 80% of the fraudulent transactions.
X_train = Fraud.sample(frac=0.8)
count_Frauds = len(X_train)

# Add 80% of the normal transactions to X_train.
X_train = pd.concat([X_train, Normal.sample(frac = 0.8)], axis = 0)

# X_test contains all the transaction not in X_train.
X_test = df.loc[~df.index.isin(X_train.index)]

In [138]:
#Shuffle the dataframes so that the training is done in a random order.
X_train = sklearn.utils.shuffle(X_train)
X_test = sklearn.utils.shuffle(X_test)

In [139]:
#Add our target features to y_train and y_test.
y_train = X_train.Fraud
y_train = pd.concat([y_train, X_train.Normal], axis=1)

y_test = X_test.Fraud
y_test = pd.concat([y_test, X_test.Normal], axis=1)

In [141]:
y_train.drop("Normal", axis = 1)

Unnamed: 0,Fraud
50566,0
200783,0
151481,0
173202,0
206850,0
...,...
263706,0
109484,0
1577,0
259437,0


In [79]:
#Drop target features from X_train and X_test.
X_train = X_train.drop(['Fraud','Normal'], axis = 1)
X_test = X_test.drop(['Fraud','Normal'], axis = 1)

In [80]:
'''
Due to the imbalance in the data, ratio will act as an equal weighting system for our model. 
By dividing the number of transactions by those that are fraudulent, ratio will equal the value that when multiplied
by the number of fraudulent transactions will equal the number of normal transaction. 
Simply put: # of fraud * ratio = # of normal
'''
ratio = len(X_train)/count_Frauds 

y_train.Fraud *= ratio
y_test.Fraud *= ratio

In [81]:
#Names of all of the features in X_train.
features = X_train.columns.values

#Transform each feature in features so that it has a mean of 0 and standard deviation of 1; 
#this helps with training the neural network.
for feature in features:
    mean, std = df[feature].mean(), df[feature].std()
    X_train.loc[:, feature] = (X_train[feature] - mean) / std
    X_test.loc[:, feature] = (X_test[feature] - mean) / std

In [89]:
# Split the testing data into validation and testing sets
split = int(len(y_test)/2)

inputX = X_train.values
inputY = y_train.values
inputX_valid = X_test.values[:split]
inputY_valid = y_test.values[:split]
inputX_test = X_test.values[split:]
inputY_test = y_test.values[split:]

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V9,V10,...,V10_,V11_,V12_,V14_,V16_,V17_,V18_,V19_,V21_,Normal
118085,74945.0,-0.122937,0.641327,0.794347,0.532442,-0.551600,-0.380113,-0.114485,0.128259,-0.709565,...,0,0,0,0,0,0,0,0,0,1.0
140417,83711.0,1.056418,-1.263388,1.016768,-0.291577,-1.558687,0.178412,-1.038405,0.136915,0.283270,...,0,0,0,0,0,0,0,0,0,1.0
158010,110779.0,0.138295,0.959476,-0.569235,-0.608796,1.275987,-0.008457,0.674017,1.230703,-1.007295,...,0,0,1,0,0,0,0,0,0,1.0
209293,137494.0,1.985309,0.082402,-0.893119,1.484855,0.011855,-0.743823,0.191915,0.552959,0.130177,...,0,0,0,0,0,0,0,0,0,1.0
66862,52251.0,-0.926906,0.852249,1.599737,-0.066270,0.561277,-0.577565,0.723153,-0.319589,-0.613366,...,0,0,0,0,0,0,0,0,0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136658,81809.0,1.556271,-1.250222,-0.874530,-2.515726,0.817404,3.345611,-1.652124,-1.882996,1.471966,...,0,0,0,0,0,0,0,0,0,1.0
282477,170923.0,-1.174865,-0.033591,2.057488,-1.072444,0.555071,-0.098427,-0.043867,0.112554,-0.935477,...,0,0,0,0,0,0,0,0,0,1.0
162808,115388.0,0.527926,-2.252736,-2.644710,1.895875,-0.283903,-0.871300,1.667337,-0.185590,0.160312,...,0,0,0,0,0,0,0,0,1,1.0
153733,99939.0,-4.437150,-2.657455,1.104148,3.671829,4.106912,-2.044546,0.598080,-0.650385,0.289232,...,0,0,1,0,0,0,0,0,0,1.0


In [142]:
# model = keras.Sequential([
#     keras.layers.Dense(36, activation = "relu"),
#     keras.layers.Dense(2, activation = "relu")
# ])

model = keras.Sequential([
    keras.layers.Dense(units=16, input_dim=36, activation="relu"),
    keras.layers.Dense(units=24, activation="relu"),  
    keras.layers.Dropout(0.5),  
    keras.layers.Dense(20, activation="relu"),  
    keras.layers.Dense(24, activation="relu"),  
    keras.layers.Dense(1, activation="softmax"),  
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [143]:
inputX

array([[-0.38697783,  0.64653427, -0.19331524, ..., -0.11891121,
        -0.18459378, -0.20927418],
       [-0.94924446, -0.97254395, -0.54281547, ..., -0.11891121,
        -0.18459378, -0.20927418],
       [-0.36949978, -0.88826513, -0.43737616, ..., -0.11891121,
        -0.18459378, -0.20927418],
       ...,
       [-0.79874375, -0.66598406,  0.94698135, ..., -0.11891121,
         5.41728162, -0.20927418],
       [ 0.90041714, -0.07448983,  0.02806211, ..., -0.11891121,
        -0.18459378, -0.20927418],
       [-1.60538716,  0.53686136, -0.06964843, ...,  8.40960666,
        -0.18459378, -0.20927418]])

In [144]:
model.summary()

Model: "sequential_14"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_37 (Dense)             (None, 16)                592       
_________________________________________________________________
dense_38 (Dense)             (None, 24)                408       
_________________________________________________________________
dropout_3 (Dropout)          (None, 24)                0         
_________________________________________________________________
dense_39 (Dense)             (None, 20)                500       
_________________________________________________________________
dense_40 (Dense)             (None, 24)                504       
_________________________________________________________________
dense_41 (Dense)             (None, 1)                 25        
Total params: 2,029
Trainable params: 2,029
Non-trainable params: 0
___________________________________________________

In [146]:
model.fit(inputX, y_train.drop("Fraud", axis = 1), epochs = 5)

Train on 227846 samples
Epoch 1/5


InvalidArgumentError:  Received a label value of 1 which is outside the valid range of [0, 1).  Label values: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
	 [[node loss_14/dense_41_loss/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits (defined at <ipython-input-145-4b31ac1092e7>:1) ]] [Op:__inference_keras_scratch_graph_9051]

Function call stack:
keras_scratch_graph
