In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

data = pd.read_csv('credit_batch_1.csv')
data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,isFraud
0,88672.0,-3.859881,2.632881,-5.264265,3.446113,-0.675231,-1.904959,-3.291041,-0.985766,-1.168114,...,1.664119,0.785075,0.068412,0.778961,-0.863166,-0.00681,-1.065734,1.773326,1.18,1
1,62059.0,-1.644403,3.129852,-2.576977,3.415573,-0.448525,-1.241893,-1.991652,1.002665,-2.809071,...,0.417762,-0.648576,-0.318617,-0.680413,0.389869,0.05575,0.394682,0.298821,6.62,1
2,13323.0,-5.454362,8.287421,-12.752811,8.594342,-3.106002,-3.179949,-9.252794,4.245062,-6.329801,...,1.846165,-0.267172,-0.310804,-1.201685,1.352176,0.608425,1.574715,0.808725,1.0,1
3,93965.0,-11.397727,7.763953,-18.572307,6.711855,-10.174216,-4.395918,-15.893788,2.083013,-4.988837,...,0.339007,1.342923,0.239217,0.534644,-0.174965,-0.50024,-1.72206,-0.574339,11.4,1
4,52934.0,1.036639,0.407227,0.757706,3.161821,-0.568122,0.202181,-0.689804,0.41138,0.336769,...,-0.050108,0.123761,-0.132568,0.350231,0.507701,0.189621,0.061016,0.063141,0.76,1


In [3]:
data['isFraud'].value_counts()

isFraud
0    8000
1     490
Name: count, dtype: int64

## Split Dataset

In [4]:
X = data.drop(['isFraud'], axis=1)
y = data['isFraud']

#Standardization
scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)

In [5]:
from imblearn.over_sampling import SMOTE
X_train, y_train = SMOTE(random_state=75).fit_resample(X_train, y_train)

pd.Series(y_train).value_counts()

isFraud
0    6396
1    6396
Name: count, dtype: int64

## Define TF Model Layers

In [6]:
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout
from sklearn.metrics import accuracy_score

2023-05-02 21:19:51.730704: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
# DNN
model = Sequential([
Dense(input_dim = 30, units = 128, activation = "relu"),
Dense(units= 64, activation = "relu"),
Dropout(0.2),
Dense(units= 32, activation = "relu"),
Dropout(0.2),
Dense(units= 16, activation = "relu"),
Dropout(0.2),
Dense(units=1, activation = "sigmoid")])

In [8]:
# Add training callbacks
from keras.callbacks import LambdaCallback, Callback

current_epoch = 0
current_weights = None
current_loss = None
current_accuracy = None

def update_current_epoch():
    global current_epoch
    current_epoch += 1
    
def update_current_weights():
    global current_weights
    current_weights = model.layers[0].get_weights()
    
def update_current_performance(loss, accuracy):
    global current_loss, current_accuracy
    current_loss = loss
    current_accuracy = accuracy

class CustomCallback(Callback):

    def on_epoch_end(self, batch, logs=None):
        update_current_epoch()
        update_current_weights()
        update_current_performance(logs['loss'], logs['accuracy'])


model.compile(loss='binary_crossentropy', optimizer='adam', metrics='accuracy')

## Model Training

In [9]:
X_train = np.asarray(X_train).astype(np.float32)
y_train = np.asarray(y_train).astype(np.float32)

X_test = np.asarray(X_test).astype(np.float32)
y_test = np.asarray(y_test).astype(np.float32)

model.fit(X_train, y_train, epochs=50, batch_size=100, callbacks = [CustomCallback()])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f9d3a2a7220>

## Model Evaluation

In [10]:
y_hat = model.predict(X_test)
y_hat = [0 if val < 0.5 else 1 for val in y_hat]



In [11]:
accuracy_score(y_test, y_hat)

0.9840989399293286

In [12]:
model.evaluate(X_test, y_test)



[0.27391913533210754, 0.9840989112854004]

## Save Model

In [13]:
# model.save('pretrained_model')

In [14]:
# # Load model
# del model
# model = load_model('pretrained_model')

## Check Updated Local States

In [15]:
def check_local_state():
    print("Current epoch:", current_epoch)
    print("Current weights:", current_weights)
    print("Current loss: {}, accuracy: {}".format(current_loss, current_accuracy))

In [16]:
check_local_state()

Current epoch: 50
Current weights: [array([[ 0.02839822,  0.03048912,  0.10042934, ...,  0.15129304,
        -0.06943314,  0.05577709],
       [ 0.18394242,  0.256213  ,  0.08248425, ...,  0.09234717,
         0.14910537, -0.06446946],
       [-0.14790821, -0.05008426,  0.07158373, ..., -0.2188114 ,
         0.07215978, -0.15730463],
       ...,
       [-0.06779347, -0.24300516, -0.00193673, ..., -0.05084925,
        -0.09523406, -0.12274578],
       [-0.02935541, -0.08869563, -0.05736782, ..., -0.08259995,
        -0.15351874, -0.24491331],
       [-0.10190838,  0.07170135,  0.06277834, ..., -0.1906175 ,
         0.03684642,  0.19233014]], dtype=float32), array([-5.78013770e-02, -1.44956917e-01, -8.41112360e-02, -1.48280144e-01,
       -1.44087806e-01,  1.91065352e-02, -2.95146052e-02, -6.56439215e-02,
       -2.93578655e-02, -7.48589486e-02, -9.41459537e-02, -1.37702078e-01,
        4.71644178e-02, -5.84430285e-02, -1.13337934e-02, -4.66555357e-02,
       -7.30674937e-02, -1.75285161