In [None]:
# imports
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import keras
import sklearn
%matplotlib inline

# data loading
credit_df = pd.read_csv("creditcard.csv")

## Inspection
Short inspection of the dataset to get a general understanding before going on with the ML-stuff.
Let's have a general look at the data first.

As we can see, we have 284.807 samples in total, with 492 of these samples being fraudulent transactions (which accounts for 0.173% of the whole dataset). There is a great inequality in the distribution of the two categories, which is typical in such cases (at least it should be, otherwise the credit card company would have some big trouble).

The dataset contains 31 features. Unfortunately, most features are not very descriptive since they have been preprocessed with PCA, probably for privacy reasons. Let's have a look at the remaining features in the next cells.

In [None]:
print("Number of samples: " + str(len(credit_df)) + "\n")
print(credit_df.groupby('Class')['Class'].describe())
print("\nShare of fraudulent messages: " + str(len(credit_df[credit_df['Class'] == 1]) / len(credit_df) * 100) + "%\n")
print(credit_df.head(1))

### Amount
Let's have a look at the amounts of the different transactions.
As we can see from the stats below, fraudulent activities tend to have a higher amount in general. At the same time, the maximum amount for fraudulent transactions was way lower than the one for normal activities. Funny: Some fraudulent activities with an amount of 0 were recorded, seemes like there are some nice swindlers out there.

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# normal
normal_df = credit_df[credit_df['Class'] == 0]
axes[0].set(title = "Normal")
axes[0].set_ylim([0,120000])
axes[0].set_xlim([0,5000])
normal_df.head(100000).Amount.hist(color='b', ax = axes[0])

# fraud
fraud_df = credit_df[credit_df['Class'] == 1]
axes[1].set(title = "Fraud")
axes[1].set_ylim([0,500])
axes[1].set_xlim([0,5000])
fraud_df.head(100000).Amount.hist(color='r', ax = axes[1])

print(credit_df.groupby('Class')['Amount'].describe())

plt.show()

## Data Preparation
Before handing the data to our model, the data has to be prepared.

### Remove redundant features
I will not use the time column, since it does not seem to be valuable to me. 

In [None]:
credit_df.drop('Time', axis=1, inplace=True)

### Scale remaining features

In [None]:
from sklearn.preprocessing import StandardScaler

credit_df['Amount'] = StandardScaler().fit_transform(credit_df['Amount'].values.reshape(-1,1))
print(credit_df['Amount'].head(10))

### Prepare datasets
For the training of the autoencoder, our training set needs to be free from any anomalies, since the model has to learn the representation of normal transactions only.
Therefore, the fraudulent transactions will be removed before splitting the dataset into a training and a test set with a relation of 70:30. After that, the fraudulent data will be added to the test set.

In [None]:
from sklearn.model_selection import train_test_split

fraudulent_df = credit_df[credit_df['Class'] == 1]
normal_df = credit_df[credit_df['Class'] == 0]

X_train, X_test = train_test_split(normal_df, test_size=0.3)
X_train.drop('Class', axis=1, inplace=True)
X_test = pd.concat([X_test, fraudulent_df])
y_true = X_test['Class']
X_test.drop('Class', axis=1, inplace=True)

## Modeling

In [None]:
from keras.layers import Input, Dense
from keras.models import Model

# define parameters
num_encoder_neurons = [20, 10]
num_decoder_neurons = [10, 20]

activation = 'relu'

# build layers
input_layer = Input(shape=(X_train.shape[1], ))
encoder = Dense(num_encoder_neurons[0], activation=activation)(input_layer)
encoder = Dense(num_encoder_neurons[1], activation=activation)(encoder)

decoder = Dense(num_decoder_neurons[0], activation=activation)(encoder)
decoder = Dense(num_decoder_neurons[1], activation=activation)(encoder)
decoder = Dense(X_train.shape[1], activation='relu')(decoder)

# build model
autoencoder = Model(inputs=input_layer, outputs=decoder)
autoencoder.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])

In [None]:
autoencoder.fit(X_train, X_train,
                    epochs=5,
                    batch_size=32,
                    shuffle=True,
                    validation_data=(X_test, X_test),
                    verbose=1)

## Evaluating the model
In the next section, we will find the threshold for classifying a transaction as fraudulent and evaluate the performance of the model.

Classical measures like the accuracy are not applicable here due to the small share of fraudulent data. Instead, the f1-score, which considers the precision and the recall of a model, will be used.

In [None]:
predictions = autoencoder.predict(X_test)
mean_squared_error = np.mean(np.power(X_test - predictions, 2), axis=1)
results = pd.DataFrame({'mean_squared_error':mean_squared_error, 'y_true': y_true})
print(results['mean_squared_error'].describe())

In [None]:
from sklearn.metrics import f1_score

thresholds = np.arange(0, 100, 0.5)
f1_scores = list()
y_t = (y_true == 1).values

for t in thresholds:
    y_pred = mean_squared_error > t
    f1_scores += [f1_score(y_t, y_pred)]

plt.figure(figsize=(12,5)) 
plt.plot(thresholds, f1_scores)
f1_scores = np.array(f1_scores)
max_index = np.where(f1_scores == f1_scores.max())[0][0]
best_threshold = thresholds[max_index]
plt.scatter([best_threshold],[f1_scores.max()], color='r')
plt.title("F1-Score for multiple thresholds")
plt.ylabel('F1-Score')
plt.xlabel('Threshold')