#  Credit card fraud detection

## Učitavanje biblioteka

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn import metrics
import tensorflow as tf
import time

In [None]:
#funkcija za računanje metrika
def metrike(modeli, testData, y_test, time):
    for ind, model in enumerate(modeli):
        print(F"Algoritam: {type(model.optimizer).__name__}")
        #ako je ANN
        y_pred = model.predict(testData)
        y_pred = (y_pred > 0.5)
        print("Accuracy: ", metrics.accuracy_score(y_test, y_pred))
        print('ROC AUC:', metrics.roc_auc_score(y_test, y_pred))
        print("Confusion matrix")
        CM = metrics.confusion_matrix(y_test, y_pred) #confusion matrica
        TN = CM[0, 0]
        TP = CM[1, 1]
        FP = CM[0, 1]
        FN = CM[1, 0]
        print("    P0           P1")
        print(f"S0  {TN}        {FP}")
        print(f"S1  {FN}        {TP}")
        print("Recall: ",metrics.recall_score(y_test,y_pred))
        print("Precision: ",  metrics.precision_score(y_test, y_pred))
        print("F1 score: ", metrics.f1_score(y_test, y_pred))
        print(f"Training time (sec): {time[ind]}" )
        print("-----------------------------------------------")
def metrike_table(modeli, testData, y_test, tr_time, histories):
    data = []
    for ind, model in enumerate(modeli):
        start = time.time()
        y_pred = model.predict(testData)
        y_pred = (y_pred > 0.5)
        test_time = time.time()-start
        data.append([
            type(model.optimizer).__name__,
            metrics.accuracy_score(y_test, y_pred),
            metrics.roc_auc_score(y_test, y_pred),
            metrics.recall_score(y_test,y_pred),
            metrics.precision_score(y_test, y_pred),
            metrics.f1_score(y_test, y_pred),
            tr_time[ind],
            test_time,
            len(histories[ind].history['loss'])
            ])
        plt.plot(pd.DataFrame(histories[ind].history["auc"]))
        plt.plot(pd.DataFrame(histories[ind].history["val_auc"]))
        plt.xlabel('Epochs')
        plt.ylabel('AUC')
        plt.title(f'{type(model.optimizer).__name__} AUC')
        plt.figure(figsize=(6,6), dpi=500)
        plt.show()

        plt.plot(pd.DataFrame(histories[ind].history["loss"]))
        plt.plot(pd.DataFrame(histories[ind].history["val_loss"]))
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.title(f'{type(model.optimizer).__name__} loss')
        plt.figure(figsize=(6,6), dpi=500)
        plt.show()
    df = pd.DataFrame(data, columns = ['Algoritam', "Accuracy",'ROC AUC',"Recall","Precision","F1 score", "Training time (sec)","Test time (sec)",'Epochs'])
    return df

## Učitavanje skupa podataka

In [None]:
dataset = pd.read_csv('../datasets/creditcard.csv')
dataset.head()

## Analiza skupa podataka

In [None]:
dataset.dtypes

In [None]:
dataset.describe()

In [None]:
dataset['Class'].value_counts()

In [None]:
no_fraud = dataset['Class'].value_counts()[0]
fraud = dataset['Class'].value_counts()[1]

In [None]:
plt.figure(figsize=(5,5))
plt.bar(['Fraud','No fraud'],[fraud,no_fraud])
plt.ylabel("Number of transactions")
plt.title("Fraud/no fraud")
plt.show()

In [None]:
dataset['Class'].value_counts()[1]/len(dataset)*100
#0.17% instanci pripada klasi "Fraud"

In [None]:
korelacijska_matrica = dataset.corr()
plt.subplots(figsize=(10,10))
sns.heatmap(korelacijska_matrica, vmax=0.9, square=True)

In [None]:
pd.DataFrame(dataset.corr()['Class'].drop('Class'),dataset.drop(['Class'], axis=1).columns)

## Čišćenje

In [None]:
#nema null vrijednosti
dataset.isna().sum()

In [None]:
X=dataset.drop(['Time','Class'], axis=1)
Y=dataset['Class']

In [None]:
X.head()

In [None]:
Y.head()

## Podjela na trening i test skupove

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.3, random_state=42)

## Standardizacija

In [None]:
from sklearn.preprocessing import StandardScaler
scX = StandardScaler()
X_train = scX.fit_transform(X_train)
X_test = scX.transform(X_test)

## ANN - ADAM

In [None]:
metric = tf.keras.metrics.AUC()
ann_adam = tf.keras.models.Sequential()
#3 skirvena sloja sa po 8, 16 i 32 neurona
ann_adam.add(tf.keras.layers.Dense(units=8, activation='relu'))
ann_adam.add(tf.keras.layers.Dense(units=16, activation='relu'))
ann_adam.add(tf.keras.layers.Dense(units=32, activation='relu'))
# Dodavanje izlaznog sloja
ann_adam.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))
# kompajliranje mreže
ann_adam.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = [metric])

print(f"R2 score:{metrics.r2_score(y_test,grid_result.predict(X_test))}")

In [None]:
start = time.time()
adam_history = ann_adam.fit(X_train, y_train, batch_size = 32, epochs = 5, validation_data=(X_test,y_test))
adam_time = time.time()-start

In [None]:
metrike([ann_adam], X_test, y_test, [adam_time])

In [None]:
plt.plot(pd.DataFrame(adam_history.history["auc"]))
plt.plot(pd.DataFrame(adam_history.history["val_auc"]))
plt.figure(figsize=(6,6), dpi=500)
plt.show()

plt.plot(pd.DataFrame(adam_history.history["loss"]))
plt.plot(pd.DataFrame(adam_history.history["val_loss"]))
plt.figure(figsize=(6,6), dpi=500)
plt.show()

## ANN - ADAMAX

In [None]:

metric.reset_state()
ann_adamax = tf.keras.models.Sequential()
ann_adamax.add(tf.keras.layers.Dense(units=8, activation='relu'))
ann_adamax.add(tf.keras.layers.Dense(units=16, activation='relu'))
ann_adamax.add(tf.keras.layers.Dense(units=32, activation='relu'))
ann_adamax.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))
ann_adamax.compile(optimizer = 'adamax', loss = 'binary_crossentropy', metrics = [metric])

In [None]:
start = time.time()
adamax_history = ann_adamax.fit(X_train, y_train, batch_size = 32, epochs = 28, validation_data=(X_test,y_test))
adamax_time = time.time()-start

In [None]:
metrike([ann_adamax], X_test, y_test, [adamax_time])

In [None]:
plt.plot(pd.DataFrame(adamax_history.history["auc"]))
plt.plot(pd.DataFrame(adamax_history.history["val_auc"]))
plt.figure(figsize=(6,6), dpi=500)
plt.show()

plt.plot(pd.DataFrame(adamax_history.history["loss"]))
plt.plot(pd.DataFrame(adamax_history.history["val_loss"]))
plt.figure(figsize=(6,6), dpi=500)
plt.show()

## ANN - ADAGRAD

In [None]:
metric.reset_state()
ann_adagrad = tf.keras.models.Sequential()
#2 skirvena sloja sa 64 neurona
ann_adagrad.add(tf.keras.layers.Dense(units=8, activation='relu'))
ann_adagrad.add(tf.keras.layers.Dense(units=16, activation='relu'))
ann_adagrad.add(tf.keras.layers.Dense(units=32, activation='relu'))
# Dodavanje izlaznog sloja
ann_adagrad.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))
# kompajliranje mreže
ann_adagrad.compile(optimizer = 'adagrad', loss = 'binary_crossentropy', metrics = [metric])

In [None]:
start = time.time()
adagrad_history = ann_adagrad.fit(X_train, y_train, batch_size = 32, epochs = 100, validation_data=(X_test, y_test))
adagrad_time = time.time()-start

In [None]:
metrike([ann_adagrad], X_test, y_test, [adagrad_time])

In [None]:
plt.plot(pd.DataFrame(adagrad_history.history["auc"]))
plt.plot(pd.DataFrame(adagrad_history.history["val_auc"]))
plt.figure(figsize=(6,6), dpi=500)
plt.show()

plt.plot(pd.DataFrame(adagrad_history.history["loss"]))
plt.plot(pd.DataFrame(adagrad_history.history["val_loss"]))
plt.figure(figsize=(6,6), dpi=500)
plt.show()

## ANN - NADAM

In [None]:
metric.reset_state()
ann_nadam = tf.keras.models.Sequential()
ann_nadam.add(tf.keras.layers.Dense(units=8, activation='relu'))
ann_nadam.add(tf.keras.layers.Dense(units=16, activation='relu'))
ann_nadam.add(tf.keras.layers.Dense(units=32, activation='relu'))
ann_nadam.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))
ann_nadam.compile(optimizer = 'nadam', loss = 'binary_crossentropy', metrics = [metric])

In [None]:
start = time.time()
nadam_history = ann_nadam.fit(X_train, y_train, batch_size = 32, epochs = 5, validation_data=(X_test,y_test))
nadam_time = time.time()-start

In [None]:
metrike([ann_nadam], X_test, y_test, [nadam_time])

In [None]:
plt.plot(pd.DataFrame(nadam_history.history["auc"]))
plt.plot(pd.DataFrame(nadam_history.history["val_auc"]))
plt.figure(figsize=(6,6), dpi=500)
plt.show()

plt.plot(pd.DataFrame(nadam_history.history["loss"]))
plt.plot(pd.DataFrame(nadam_history.history["val_loss"]))
plt.figure(figsize=(6,6), dpi=500)
plt.show()

## ANN - SGD

In [None]:
metric.reset_state()
ann_sgd = tf.keras.models.Sequential()
ann_sgd.add(tf.keras.layers.Dense(units=8, activation='relu'))
ann_sgd.add(tf.keras.layers.Dense(units=16, activation='relu'))
ann_sgd.add(tf.keras.layers.Dense(units=32, activation='relu'))
ann_sgd.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))
ann_sgd.compile(optimizer = 'SGD', loss = 'binary_crossentropy', metrics = [metric])

In [None]:
start = time.time()
sgd_history = ann_sgd.fit(X_train, y_train, batch_size = 32, epochs = 80, validation_data=(X_test,y_test))
sgd_time = time.time()-start

In [None]:
metrike([ann_sgd], X_test, y_test, [sgd_time])

In [None]:
plt.plot(pd.DataFrame(sgd_history.history["auc"]))
plt.plot(pd.DataFrame(sgd_history.history["val_auc"]))
plt.figure(figsize=(6,6), dpi=500)
plt.show()

plt.plot(pd.DataFrame(sgd_history.history["loss"]))
plt.plot(pd.DataFrame(sgd_history.history["val_loss"]))
plt.figure(figsize=(6,6), dpi=500)
plt.show()

## ANN - RMSprop

In [None]:
metric.reset_state()
ann_rms = tf.keras.models.Sequential()
ann_rms.add(tf.keras.layers.Dense(units=8, activation='relu'))
ann_rms.add(tf.keras.layers.Dense(units=16, activation='relu'))
ann_rms.add(tf.keras.layers.Dense(units=32, activation='relu'))
ann_rms.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))
ann_rms.compile(optimizer = 'rmsprop', loss = 'binary_crossentropy', metrics = [metric])

In [None]:
start = time.time()
rms_history = ann_rms.fit(X_train, y_train, batch_size = 32, epochs = 8, validation_data=(X_test,y_test))
rms_time = time.time()-start

In [None]:
metrike([ann_rms], X_test, y_test, [rms_time])

In [None]:
plt.plot(pd.DataFrame(rms_history.history["auc"]))
plt.plot(pd.DataFrame(rms_history.history["val_auc"]))
plt.figure(figsize=(6,6), dpi=500)
plt.show()

plt.plot(pd.DataFrame(rms_history.history["loss"]))
plt.plot(pd.DataFrame(rms_history.history["val_loss"]))
plt.figure(figsize=(6,6), dpi=500)
plt.show()

In [None]:
models = [ann_adam,ann_adamax,ann_adagrad,ann_nadam,ann_sgd,ann_rms]
times = [adam_time,adamax_time,adagrad_time, nadam_time, sgd_time,rms_time]
histories = [adam_history,adamax_history,adagrad_history,nadam_history,sgd_history,rms_history]
metrike_table(models, X_test, y_test, times, histories)

In [None]:
for model in models:
    model.save(f'../saved_models/credit_card/{type(model.optimizer).__name__}')