# Notebook to support tests for Keras engine based on Credit Card Fraud dataset

In [1]:
import tensorflow as tf
tf.__version__

'2.2.0'

In [2]:
#pip install tensorflow==2.2.0

In [3]:
import numpy as np
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow import keras

In [4]:
ARTIFACTS_PATH = '../../artifacts/keras/'
os.makedirs(ARTIFACTS_PATH, exist_ok=True) # Create path if not exists

In [5]:
df = pd.read_csv('../../data/creditcard/training.csv')
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [6]:
target_value = 'Class'
X_data = df.loc[:, df.columns != target_value].to_numpy()
y_data = df[[target_value]].to_numpy()

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.3, random_state=42)

In [8]:
# Normalize dataset
mean = np.mean(X_train, axis=0)
std = np.std(X_train, axis=0)
X_train = (X_train - mean) / std
X_test = (X_test - mean) / std

In [9]:
# Analyse imbalanced targets
counts = np.bincount(y_train[:, 0])
print('Number of positive samples in training data: %d (%.2f%% of total)' % (
    counts[1], 100 * float(counts[1]) / len(y_train)))

weight_for_0 = 1.0 / counts[0]
weight_for_1 = 1.0 / counts[1]

Number of positive samples in training data: 356 (0.18% of total)


In [10]:
# Metrics to train the model
metrics = [
    keras.metrics.FalseNegatives(name="fn"),
    keras.metrics.FalsePositives(name="fp"),
    keras.metrics.TrueNegatives(name="tn"),
    keras.metrics.TruePositives(name="tp"),
    keras.metrics.Precision(name="precision"),
    keras.metrics.Recall(name="recall"),
]

# Model optimizer
optimizer = keras.optimizers.Adam(1e-2)

In [11]:
X_train.shape

(199364, 30)

## Sequential model

In [12]:
model = keras.Sequential(
    [
        keras.layers.Dense(512, activation='relu', input_shape=(X_train.shape[-1],)),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(256, activation='relu'),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(1, activation='sigmoid'),
    ]
)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=metrics)

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 512)               15872     
_________________________________________________________________
dropout (Dropout)            (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               131328    
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 64)                16448     
_________________________________________________________________
dropout_2 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 6

In [13]:
model.fit(
    X_train,
    y_train,
    batch_size=2048,
    epochs=30,
    verbose=2,
    validation_data=(X_test, y_test),
    class_weight={0: weight_for_0, 1: weight_for_1},
)

Epoch 1/30
98/98 - 1s - loss: 2.7746e-06 - fn: 41.0000 - fp: 25486.0000 - tn: 173522.0000 - tp: 315.0000 - precision: 0.0122 - recall: 0.8848 - val_loss: 0.1415 - val_fn: 12.0000 - val_fp: 2420.0000 - val_tn: 82887.0000 - val_tp: 124.0000 - val_precision: 0.0487 - val_recall: 0.9118
Epoch 2/30
98/98 - 1s - loss: 1.7050e-06 - fn: 29.0000 - fp: 8560.0000 - tn: 190448.0000 - tp: 327.0000 - precision: 0.0368 - recall: 0.9185 - val_loss: 0.1069 - val_fn: 11.0000 - val_fp: 3100.0000 - val_tn: 82207.0000 - val_tp: 125.0000 - val_precision: 0.0388 - val_recall: 0.9191
Epoch 3/30
98/98 - 1s - loss: 1.4856e-06 - fn: 27.0000 - fp: 9347.0000 - tn: 189661.0000 - tp: 329.0000 - precision: 0.0340 - recall: 0.9242 - val_loss: 0.1026 - val_fn: 9.0000 - val_fp: 2459.0000 - val_tn: 82848.0000 - val_tp: 127.0000 - val_precision: 0.0491 - val_recall: 0.9338
Epoch 4/30
98/98 - 1s - loss: 1.1997e-06 - fn: 23.0000 - fp: 7908.0000 - tn: 191100.0000 - tp: 333.0000 - precision: 0.0404 - recall: 0.9354 - val_loss

Epoch 30/30
98/98 - 1s - loss: 6.8207e-07 - fn: 4.0000 - fp: 6024.0000 - tn: 192984.0000 - tp: 352.0000 - precision: 0.0552 - recall: 0.9888 - val_loss: 0.0612 - val_fn: 9.0000 - val_fp: 1578.0000 - val_tn: 83729.0000 - val_tp: 127.0000 - val_precision: 0.0745 - val_recall: 0.9338


<tensorflow.python.keras.callbacks.History at 0x7f06c7d49700>

In [14]:
model.save(ARTIFACTS_PATH + 'keras_credit_card_fraud_sequential.h5')

In [15]:
# Load model and make sample prediction
model = tf.keras.models.load_model(ARTIFACTS_PATH + 'keras_credit_card_fraud_sequential.h5')

In [16]:
a = [[0.52941, 0.36364, 0.64286, 0.45833, 0.52941, 0.36364, 0.64286, 0.45833, 0.52941, 0.36364,
      0.52941, 0.36364, 0.64286, 0.45833, 0.52941, 0.36364, 0.64286, 0.45833, 0.52941, 0.36364,
      0.52941, 0.36364, 0.64286, 0.45833, 0.52941, 0.36364, 0.64286, 0.45833, 0.52941, 0.36364]]
model.predict(a)

array([[2.5477624e-14]], dtype=float32)

## Non-sequential model

In [None]:
x_input = keras.layers.Input(shape=(X_train.shape[-1],))

x = x_input
x = keras.layers.Dense(512, activation='relu')(x)
x = keras.layers.Dropout(0.2)(x)
x = keras.layers.Dense(256, activation='relu')(x)
x = keras.layers.Dropout(0.2)(x)
x = keras.layers.Dense(64, activation='relu')(x)
x = keras.layers.Dropout(0.2)(x)
x = keras.layers.Dense(1, activation='sigmoid')(x)
x_output = x

model = keras.Model(inputs=x_input, outputs=x_output)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=metrics)

model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 30)]              0         
_________________________________________________________________
dense_4 (Dense)              (None, 512)               15872     
_________________________________________________________________
dropout_3 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 256)               131328    
_________________________________________________________________
dropout_4 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 64)                16448     
_________________________________________________________________
dropout_5 (Dropout)          (None, 64)                0     

In [None]:
model.fit(
    X_train,
    y_train,
    batch_size=2048,
    epochs=30,
    verbose=2,
    validation_data=(X_test, y_test),
    class_weight={0: weight_for_0, 1: weight_for_1},
)

Epoch 1/30
98/98 - 1s - loss: 2.4854e-06 - fn: 49.0000 - fp: 15845.0000 - tn: 268470.0000 - tp: 443.0000 - precision: 0.0272 - recall: 0.9004 - val_loss: 0.1718 - val_fn: 13.0000 - val_fp: 1178.0000 - val_tn: 84129.0000 - val_tp: 123.0000 - val_precision: 0.0945 - val_recall: 0.9044
Epoch 2/30
98/98 - 1s - loss: 1.9279e-06 - fn: 34.0000 - fp: 7957.0000 - tn: 191051.0000 - tp: 322.0000 - precision: 0.0389 - recall: 0.9045 - val_loss: 0.2467 - val_fn: 16.0000 - val_fp: 3202.0000 - val_tn: 82105.0000 - val_tp: 120.0000 - val_precision: 0.0361 - val_recall: 0.8824
Epoch 3/30
98/98 - 1s - loss: 1.5695e-06 - fn: 32.0000 - fp: 6047.0000 - tn: 192961.0000 - tp: 324.0000 - precision: 0.0509 - recall: 0.9101 - val_loss: 0.1276 - val_fn: 10.0000 - val_fp: 2873.0000 - val_tn: 82434.0000 - val_tp: 126.0000 - val_precision: 0.0420 - val_recall: 0.9265
Epoch 4/30
98/98 - 1s - loss: 1.1139e-06 - fn: 21.0000 - fp: 6433.0000 - tn: 192575.0000 - tp: 335.0000 - precision: 0.0495 - recall: 0.9410 - val_los

Epoch 30/30
98/98 - 1s - loss: 4.6947e-07 - fn: 4.0000 - fp: 4719.0000 - tn: 194289.0000 - tp: 352.0000 - precision: 0.0694 - recall: 0.9888 - val_loss: 0.0292 - val_fn: 13.0000 - val_fp: 871.0000 - val_tn: 84436.0000 - val_tp: 123.0000 - val_precision: 0.1237 - val_recall: 0.9044


<tensorflow.python.keras.callbacks.History at 0x7f06b8499430>

In [None]:
model.save(ARTIFACTS_PATH + 'keras_credit_card_fraud_nonsequential.h5')

## Non-sequential model with branches

In [None]:
x_input = keras.layers.Input(shape=(X_train.shape[-1],))

x = x_input
x = keras.layers.Dense(512, activation='relu')(x)
x = keras.layers.Dropout(0.2)(x)

x_1 = x
x_1 = keras.layers.Dense(256, activation='relu')(x_1)
x_1 = keras.layers.Dropout(0.2)(x_1)
x_1 = keras.layers.Dense(128, activation='relu')(x_1)
x_1 = keras.layers.Dropout(0.2)(x_1)

x_2 = x
x_2 = keras.layers.Dense(128, activation='relu')(x_2)
x_2 = keras.layers.Dropout(0.2)(x_2)

x = keras.layers.Concatenate(axis=-1)([x_1, x_2])
x = keras.layers.Dense(1, activation='sigmoid')(x)
x_output = x

model = keras.Model(inputs=x_input, outputs=x_output)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=metrics)

model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 30)]         0                                            
__________________________________________________________________________________________________
dense_8 (Dense)                 (None, 512)          15872       input_2[0][0]                    
__________________________________________________________________________________________________
dropout_6 (Dropout)             (None, 512)          0           dense_8[0][0]                    
__________________________________________________________________________________________________
dense_9 (Dense)                 (None, 256)          131328      dropout_6[0][0]                  
____________________________________________________________________________________________

In [None]:
model.fit(
    X_train,
    y_train,
    batch_size=2048,
    epochs=30,
    verbose=2,
    validation_data=(X_test, y_test),
    class_weight={0: weight_for_0, 1: weight_for_1},
)

Epoch 1/30
98/98 - 2s - loss: 2.4585e-06 - fn: 55.0000 - fp: 16211.0000 - tn: 268104.0000 - tp: 437.0000 - precision: 0.0262 - recall: 0.8882 - val_loss: 0.1093 - val_fn: 11.0000 - val_fp: 2618.0000 - val_tn: 82689.0000 - val_tp: 125.0000 - val_precision: 0.0456 - val_recall: 0.9191
Epoch 2/30
98/98 - 1s - loss: 2.6098e-06 - fn: 32.0000 - fp: 9213.0000 - tn: 189795.0000 - tp: 324.0000 - precision: 0.0340 - recall: 0.9101 - val_loss: 0.7875 - val_fn: 4.0000 - val_fp: 18608.0000 - val_tn: 66699.0000 - val_tp: 132.0000 - val_precision: 0.0070 - val_recall: 0.9706
Epoch 3/30
98/98 - 1s - loss: 2.6002e-06 - fn: 29.0000 - fp: 10468.0000 - tn: 188540.0000 - tp: 327.0000 - precision: 0.0303 - recall: 0.9185 - val_loss: 0.3203 - val_fn: 9.0000 - val_fp: 5013.0000 - val_tn: 80294.0000 - val_tp: 127.0000 - val_precision: 0.0247 - val_recall: 0.9338
Epoch 4/30
98/98 - 1s - loss: 1.5044e-06 - fn: 25.0000 - fp: 5725.0000 - tn: 193283.0000 - tp: 331.0000 - precision: 0.0547 - recall: 0.9298 - val_los

In [None]:
model.save(ARTIFACTS_PATH + 'keras_credit_card_fraud_nonsequential_branch.h5')

### Sample prediction

In [None]:
a = [[0.52941, 0.36364, 0.64286, 0.45833, 0.52941, 0.36364, 0.64286, 0.45833, 0.52941, 0.36364,
      0.52941, 0.36364, 0.64286, 0.45833, 0.52941, 0.36364, 0.64286, 0.45833, 0.52941, 0.36364,
      0.52941, 0.36364, 0.64286, 0.45833, 0.52941, 0.36364, 0.64286, 0.45833, 0.52941, 0.36364]]
model.predict(a)