In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from keras.layers import Input, Dense
from keras.models import Model
from pylab import rcParams
from sklearn.metrics import (confusion_matrix, precision_recall_curve)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

rcParams['figure.figsize'] = 14, 8

#Preprocessing

In [2]:
def process_and_split_encoder_data():
    dataframe = pd.read_csv("creditcard.csv")
    dataframe.drop('Time', axis=1, inplace=True)
    scaler = MinMaxScaler()
    df_scaled = pd.DataFrame(scaler.fit_transform(dataframe), columns=dataframe.columns)
    target = df_scaled["Class"]
    features = df_scaled.drop("Class", axis=1)
    X_train_, X_test_, y_train_, y_test_ = train_test_split(features, target, test_size=0.33, random_state=42)
    X_train_good = X_train_[y_train_ == 0]
    y_train_good = y_train_[y_train_ == 0]
    return X_train_good.values, y_train_good.values, X_test_.values, y_test_.values

In [3]:
X_train, y_train, X_test, y_test = process_and_split_encoder_data()

FileNotFoundError: [Errno 2] No such file or directory: 'creditcard.csv'

#Building the model

In [None]:
def create_model(input_dimension,encoding_dimension):
    input_layer = Input(shape=(input_dimension, ))
    encoder = Dense(encoding_dimension, activation="tanh")(input_layer)
    encoder = Dense(int(encoding_dimension / 2), activation="relu")(encoder)
    decoder = Dense(int(encoding_dimension / 2), activation='tanh')(encoder)
    decoder = Dense(input_dimension, activation='relu')(decoder)
    return Model(inputs=input_layer, outputs=decoder)

In [None]:
autoencoder = create_model(X_train.shape[1],int(X_train.shape[1]/2))

autoencoder.compile(optimizer='adam',
                    loss='mean_squared_error',
                    metrics=['accuracy'])
autoencoder.fit(X_train, X_train,
                    epochs=100,
                    batch_size=30,
                    shuffle=True,
                    validation_data=(X_test, X_test),
                    verbose=1)

#Prediction

In [None]:
prediction = autoencoder.predict(X_test)
measurment_error = np.mean(np.power(X_test - prediction, 2), axis=1)

#Visualizating results

In [None]:
def visualize_curve(y_test_param,y_pred_param):
    precision, recall, th = precision_recall_curve(y_test_param,y_pred_param)
    plt.plot(recall, precision, 'b', label='Precision-Recall curve')
    plt.title('Precision Recall Curve')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.show()

In [None]:
visualize_curve(y_test, measurment_error)

In [None]:
def confusion_heatmap(y_test_param, prediction_param):
    cf_matrix = confusion_matrix(y_test_param, prediction_param)
    group_names = ['True Neg', 'False Pos', 'False Neg', 'True Pos']
    group_counts = ["{0:0.0f}".format(value) for value in
                    cf_matrix.flatten()]
    group_percentages = ["{0:.2%}".format(value) for value in
                         cf_matrix.flatten() / np.sum(cf_matrix)]
    labels = [f"{v1}\n{v2}\n{v3}" for v1, v2, v3 in
              zip(group_names, group_counts, group_percentages)]
    labels = np.asarray(labels).reshape(2, 2)
    categories = ['Normal', 'Fraud']
    sns.heatmap(cf_matrix, annot=labels, fmt='', cmap='Blues', xticklabels=categories, yticklabels=categories)

In [None]:
threshold = 0.002
y_pred = [1 if e > threshold else 0 for e in measurment_error]
confusion_heatmap(y_test,y_pred)