In [67]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import sklearn
from sklearn import preprocessing
import matplotlib.pyplot as plt


In [68]:
ccard = pd.read_csv('creditcard.csv')

In [69]:
ccard = sklearn.utils.shuffle(ccard)
X = ccard.drop("V1", axis = 1).values
y = ccard["Class"].values

In [70]:
ccard = ccard.drop(['V28','V27','V26','V25','V24','V23','V22','V20','V15','V13','V8'], axis =1)

In [71]:
ccard['V1_'] = ccard.V1.map(lambda x: 1 if x < -3 else 0)
ccard['V2_'] = ccard.V2.map(lambda x: 1 if x > 2.5 else 0)
ccard['V3_'] = ccard.V3.map(lambda x: 1 if x < -4 else 0)
ccard['V4_'] = ccard.V4.map(lambda x: 1 if x > 2.5 else 0)
ccard['V5_'] = ccard.V5.map(lambda x: 1 if x < -4.5 else 0)
ccard['V6_'] = ccard.V6.map(lambda x: 1 if x < -2.5 else 0)
ccard['V7_'] = ccard.V7.map(lambda x: 1 if x < -3 else 0)
ccard['V9_'] = ccard.V9.map(lambda x: 1 if x < -2 else 0)
ccard['V10_'] = ccard.V10.map(lambda x: 1 if x < -2.5 else 0)
ccard['V11_'] = ccard.V11.map(lambda x: 1 if x > 2 else 0)
ccard['V12_'] = ccard.V12.map(lambda x: 1 if x < -2 else 0)
ccard['V14_'] = ccard.V14.map(lambda x: 1 if x < -2.5 else 0)
ccard['V16_'] = ccard.V16.map(lambda x: 1 if x < -2 else 0)
ccard['V17_'] = ccard.V17.map(lambda x: 1 if x < -2 else 0)
ccard['V18_'] = ccard.V18.map(lambda x: 1 if x < -2 else 0)
ccard['V19_'] = ccard.V19.map(lambda x: 1 if x > 1.5 else 0)
ccard['V21_'] = ccard.V21.map(lambda x: 1 if x > 0.6 else 0)

In [72]:
df = ccard

In [73]:
#Create a new feature for normal (non-fraudulent) transactions.
df.loc[df.Class == 0, 'Normal'] = 1
df.loc[df.Class == 1, 'Normal'] = 0

In [74]:
df = df.rename(columns={'Class': 'Fraud'})

In [75]:
print(df.Normal.value_counts())
print()
print(df.Fraud.value_counts())

1.0    284315
0.0       492
Name: Normal, dtype: int64

0    284315
1       492
Name: Fraud, dtype: int64


In [76]:
Fraud = df[df.Fraud == 1]
Normal = df[df.Normal == 1]

# Set X_train equal to 80% of the fraudulent transactions.
X_train = Fraud.sample(frac=0.8)
count_Frauds = len(X_train)

# Add 80% of the normal transactions to X_train.
X_train = pd.concat([X_train, Normal.sample(frac = 0.8)], axis = 0)

# X_test contains all the transaction not in X_train.
X_test = df.loc[~df.index.isin(X_train.index)]

In [77]:
#Shuffle the dataframes so that the training is done in a random order.
X_train = sklearn.utils.shuffle(X_train)
X_test = sklearn.utils.shuffle(X_test)

In [78]:
#Add our target features to y_train and y_test.
y_train = X_train.Fraud
y_train = pd.concat([y_train, X_train.Normal], axis=1)

y_test = X_test.Fraud
y_test = pd.concat([y_test, X_test.Normal], axis=1)

In [79]:
#Drop target features from X_train and X_test.
X_train = X_train.drop(['Fraud','Normal'], axis = 1)
X_test = X_test.drop(['Fraud','Normal'], axis = 1)

In [80]:
'''
Due to the imbalance in the data, ratio will act as an equal weighting system for our model. 
By dividing the number of transactions by those that are fraudulent, ratio will equal the value that when multiplied
by the number of fraudulent transactions will equal the number of normal transaction. 
Simply put: # of fraud * ratio = # of normal
'''
ratio = len(X_train)/count_Frauds 

y_train.Fraud *= ratio
y_test.Fraud *= ratio

In [81]:
#Names of all of the features in X_train.
features = X_train.columns.values

#Transform each feature in features so that it has a mean of 0 and standard deviation of 1; 
#this helps with training the neural network.
for feature in features:
    mean, std = df[feature].mean(), df[feature].std()
    X_train.loc[:, feature] = (X_train[feature] - mean) / std
    X_test.loc[:, feature] = (X_test[feature] - mean) / std

In [89]:
# Split the testing data into validation and testing sets
split = int(len(y_test)/2)

inputX = X_train.values
inputY = y_train.values
inputX_valid = X_test.values[:split]
inputY_valid = y_test.values[:split]
inputX_test = X_test.values[split:]
inputY_test = y_test.values[split:]

In [107]:
# model = keras.Sequential([
#     keras.layers.Dense(36, activation = "relu"),
#     keras.layers.Dense(2, activation = "relu")
# ])

model = keras.Sequential([
    keras.layers.Dense(units=16, input_dim=36, activation="relu"),
    keras.layers.Dense(units=24, activation="relu"),  
    keras.layers.Dropout(0.5),  
    keras.layers.Dense(20, activation="relu"),  
    keras.layers.Dense(24, activation="relu"),  
    keras.layers.Dense(1, activation="sigmoid"),  
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

NameError: name 'Sequential' is not defined

In [108]:
inputX

array([[-0.38697783,  0.64653427, -0.19331524, ..., -0.11891121,
        -0.18459378, -0.20927418],
       [-0.94924446, -0.97254395, -0.54281547, ..., -0.11891121,
        -0.18459378, -0.20927418],
       [-0.36949978, -0.88826513, -0.43737616, ..., -0.11891121,
        -0.18459378, -0.20927418],
       ...,
       [-0.79874375, -0.66598406,  0.94698135, ..., -0.11891121,
         5.41728162, -0.20927418],
       [ 0.90041714, -0.07448983,  0.02806211, ..., -0.11891121,
        -0.18459378, -0.20927418],
       [-1.60538716,  0.53686136, -0.06964843, ...,  8.40960666,
        -0.18459378, -0.20927418]])

In [106]:
model.summary()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_20 (Dense)             multiple                  1332      
_________________________________________________________________
dense_21 (Dense)             multiple                  74        
Total params: 1,406
Trainable params: 1,406
Non-trainable params: 0
_________________________________________________________________


In [105]:
model.fit(inputX, inputY, epochs = 5)

Train on 227846 samples
Epoch 1/5


InvalidArgumentError:  Incompatible shapes: [32] vs. [32,2]
	 [[node metrics_20/accuracy/Equal (defined at <ipython-input-105-7430de176290>:1) ]] [Op:__inference_keras_scratch_graph_4599]

Function call stack:
keras_scratch_graph
