In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
# Data: https://www.kaggle.com/mlg-ulb/creditcardfraud      # bayesian_optimization Package: https://github.com/fmfn/BayesianOptimization

In [None]:
#1 Obseve dataset
#1. size of csv
df = pd.read_csv('creditcard.csv')
df.drop('Time', inplace=True, axis=1)
y = df.pop('Class')
X = df.copy()
print('X:')
print(X.head())
print('y: ')
print(y.head())
print('X shape:')
print(X.shape)

In [None]:
#1.
df.describe()

In [None]:
#1. number of positive and negative sample
print(y.value_counts())
print(y.value_counts(normalize=True))
#1. mean, variance and scale
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=47)
scale = StandardScaler()
X_train_scaled = scale.fit_transform(X_train)
X_test_scaled = scale.transform(X_test)
#1. PCA
from sklearn.decomposition import PCA
pca = PCA()
pca.fit(X_train_scaled)
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.title('explained variance w.r.t. number of features')
#conclusion: no scope of reducing number of features

In [None]:
#2 define metrics
#2. ROC, FP, FN, TP, TN, Confusion matrix, Accuracy
from tensorflow.keras.metrics import TrueNegatives, TruePositives, AUC, FalseNegatives, FalsePositives
from tensorflow.keras.metrics import Precision, Recall
metrics = [TrueNegatives(), TruePositives(), AUC(), FalseNegatives(), FalsePositives(), Precision(), Recall()]

In [None]:
#3 create model
#3. Neural Network
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
def make_nn_model(num_layers=2, num_nodes=10, activation='relu', batch_norm=False, dropout=0.1, num_feature=29, num_target=1):
    tf.keras.backend.clear_session()
    layer_list = [Dense(num_nodes, activation=activation, input_shape=(num_feature,))]
    if batch_norm == True:
        layer_list.append(BatchNormalization())
    layer_list.append(Dropout(dropout))
    for _ in range(num_layers-2):
        layer_list.append(Dense(num_nodes, activation=activation))
        if batch_norm == True:
            layer_list.append(BatchNormalization())
        layer_list.append(Dropout(dropout))
    layer_list.append(Dense(num_target, activation='sigmoid'))
    model = Sequential(layer_list)
    return model
model = make_nn_model(3, batch_norm=True)
model.summary()

In [None]:
#4 train model
loss = tf.keras.losses.BinaryCrossentropy()
model.compile(loss=loss, metrics=metrics, optimizer='adam')
model.fit(X_train_scaled, y_train.values, epochs=2, batch_size=1000)

In [None]:
#4.1 optimum training time with optimum batch size
import time
print(time.time())
def evaluate_training_time(batch_sizes):
    recorded_time = []
    for batch_size in batch_sizes:
        t1 = time.time()
        model.fit(X_train_scaled, y_train.values, epochs=5, batch_size=1000)
        t2 = time.time()
        recorded_time.append(t2-t1)
    fig = px.line(x=batch_sizes, y=recorded_time).update_traces(mode='lines+markers')
    fig.show()
    return recorded_time

batch_sizes = [np.power(2,i) for i in range(2,18)]
evaluate_training_time(batch_sizes)
#optimum time: 1024batches

In [None]:
#5 tune model
#5. hyperparameter tuning
from tensorboard.plugins.hparams import api as hp
from sklearn.metrics import precision_recall_fscore_support
tf.random.set_seed(47)

def model_run(num_layers, num_nodes, activation, batch_norm, dropout, class_weight):
    #num_layers = (2,6) --> (2,5)
    num_layers = int(num_layers)
    
    #num_nodes = (5,21) --> (5, 20)
    num_nodes = int(num_nodes)
    
    #activation = (0.01,1.99) --> ('relu', 'tanh')
    activation = int(activation)
    activation_dict = {0: 'relu', 1: 'tanh'}
    activation = activation_dict[activation]
    
    #batch_norm = (0.01,1.99) --> ('True', 'False')
    batch_norm = int(batch_norm)
    batch_norm_dict = {0: True, 1: False}
    batch_norm = batch_norm_dict[batch_norm]    
    
    #dropout = (0,0.1)
    dropout = dropout
    
    #class_weight = (0,5) --> (changed classes)
    class_weight_0 = y_train.value_counts()[1]/(y_train.value_counts()[0]+ y_train.value_counts()[1])
    class_weight_1 = y_train.value_counts()[0]/(y_train.value_counts()[0]+ y_train.value_counts()[1])
    class_weight = {0: class_weight_0 + class_weight_0*class_weight,
                    1: class_weight_1 - class_weight_0*class_weight}
    
    
    
    hparams = {'num_layers': num_layers,
               'num_nodes': num_nodes,
               'activation': activation,
               'batch_norm': batch_norm,
               'dropout': dropout,
               'class_weight': class_weight[0]
                          }
    log_seq = int(time.time())
    logdir= r'logs\t_{}'.format(log_seq)
    model = make_nn_model(num_layers=num_layers, num_nodes=num_nodes,
                         activation=activation, batch_norm=batch_norm,
                         dropout=dropout)
    
    model.compile(loss=loss, metrics=metrics, optimizer='adam')
    
    model.fit(X_train_scaled, y_train.values, epochs=10, batch_size=1024,
              callbacks=[tf.keras.callbacks.TensorBoard(logdir),
                         hp.KerasCallback(logdir, hparams, trial_id=str(log_seq))],
              class_weight=class_weight, verbose=0)
    fscore = precision_recall_fscore_support(y_test, model.predict(X_test_scaled)>0.5, average='binary')[2]
    return fscore #maximizing f score


#tensorboard --logdir logs

In [None]:
from bayes_opt import BayesianOptimization
import warnings
# Supress NaN warnings
warnings.filterwarnings("ignore",category =RuntimeWarning)


# Bounded region of parameter space
pbounds = {'num_layers': (2,6),
           'num_nodes': (5,21),
           'activation': (0.01,1.99),
           'batch_norm': (0.01,1.99),
           'dropout': (0,0.5),
           'class_weight': (0,50)
                      }

optimizer = BayesianOptimization(
    f=model_run,
    pbounds=pbounds,
    verbose=2,  # verbose = 1 prints only when a maximum 
    # is observed, verbose = 0 is silent
    random_state=1,
)

optimizer.maximize(init_points=10, n_iter=15)
print(optimizer.max)

In [None]:
print(optimizer.max)
#optimizer.res

In [None]:
#best parameters:
log_seq = int(time.time())
logdir = r'logs_best_para'
model = make_nn_model(num_layers=4, num_nodes=10,
                     activation='relu', batch_norm=True,
                     dropout=0.2666)

model.compile(loss=loss, metrics=metrics, optimizer='adam')

class_weight = 48
class_weight_0 = y_train.value_counts()[1]/(y_train.value_counts()[0]+ y_train.value_counts()[1])
class_weight_1 = y_train.value_counts()[0]/(y_train.value_counts()[0]+ y_train.value_counts()[1])
class_weight = {0: class_weight_0 + class_weight_0*class_weight,
                1: class_weight_1 - class_weight_0*class_weight}

model.fit(X_train_scaled, y_train.values, epochs=100, batch_size=1024,
          callbacks=[tf.keras.callbacks.TensorBoard(logdir),
                     hp.KerasCallback(logdir, hparams, trial_id=str(log_seq))],
          class_weight=class_weight, validation_split=0.2)

#tensorboard --logdir logs_best_para

In [None]:
#store confusion matrix
from sklearn.metrics import confusion_matrix
import seaborn as sns
cm = confusion_matrix(y_test, model.predict(X_test_scaled)>0.5)
sns.heatmap(cm, annot=True , fmt='d', cmap='Blues')
plt.ylabel('actual')
plt.xlabel('prediction')
print(X_train.shape)
print(X_test.shape)

In [None]:
#ROC curve

thresholds = np.linspace(0.1, 0.9, 20)
from sklearn.metrics import roc_curve

def plot_roc(labels, predictions):
    fp, tp, thresholds = roc_curve(labels, predictions)
    fig1 = px.line(x=100*fp, y=100*tp).update_traces(line_color='red')
    fig2 = px.line(x=100*fp, y=100*thresholds).update_traces(line_color='yellow')
    fig = go.Figure()
    fig.add_traces(fig1.data)
    fig.add_traces(fig2.data)
    fig.update_xaxes(title_text='False Positive')
    fig.update_yaxes(title_text='True Positive')
    fig.show()
    print(fig1.data)
plot_roc(y_test, model.predict(X_test_scaled))
#conclusion: scope of improving False Positive

In [None]:
cm = confusion_matrix(y_test, model.predict(X_test_scaled)>0.95)
sns.heatmap(cm, annot=True , fmt='d', cmap='Blues')
plt.ylabel('actual')
plt.xlabel('prediction')