# Credit Card Fraud Detection
This notebook demonstrates how to preprocess credit card transaction data, train and evaluate various deep learning models, and visualize the results.

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_recall_fscore_support, classification_report, roc_auc_score
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, SimpleRNN, LSTM, Input
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
df = pd.read_csv('./creditcard.csv')
df.head()  # Display the first few rows of the dataset

## Data Preprocessing
In this step, we preprocess the data by scaling the `Amount` feature and splitting the dataset into training and testing sets.

In [None]:
# Data Preprocessing
df['Amount'] = StandardScaler().fit_transform(df['Amount'].values.reshape(-1, 1))
X = df.drop(['Time', 'Class'], axis=1)
y = df['Class']

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Display the shape of training and testing sets
X_train.shape, X_test.shape

## Model Training and Evaluation
We will now train and evaluate different models: CNN, RNN, LSTM, and AutoEncoder. The performance of each model will be evaluated using precision, recall, F1-score, and AUC.

In [None]:
# Initialize results DataFrame
results = pd.DataFrame(columns=['Model', 'Precision', 'Recall', 'F1-Score', 'AUC'])

# Common training setup
early_stopping = EarlyStopping(monitor='val_loss', patience=3)

# Function to evaluate and store results
def evaluate_model(model, model_name, X_test, y_test):
    predictions = (model.predict(X_test) > 0.5).astype(int)
    precision, recall, f1, _ = precision_recall_fscore_support(y_test, predictions, average='binary')
    auc = roc_auc_score(y_test, predictions)
    results.loc[len(results)] = [model_name, precision, recall, f1, auc]
    print(f"{model_name} Evaluation:\n")
    print(classification_report(y_test, predictions))
    print(f"AUC Score: {auc}\n")


### Convolutional Neural Network (CNN) Model
Let's train a CNN model on the data.

In [None]:
# CNN Model
X_train_cnn = np.expand_dims(X_train, axis=2)
X_test_cnn = np.expand_dims(X_test, axis=2)
cnn_model = Sequential()
cnn_model.add(Conv1D(filters=32, kernel_size=2, activation='relu', input_shape=(X_train_cnn.shape[1], 1)))
cnn_model.add(MaxPooling1D(pool_size=2))
cnn_model.add(Flatten())
cnn_model.add(Dense(64, activation='relu'))
cnn_model.add(Dropout(0.5))
cnn_model.add(Dense(1, activation='sigmoid'))
cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
cnn_model.fit(X_train_cnn, y_train, epochs=10, batch_size=64, validation_split=0.2, callbacks=[early_stopping])
evaluate_model(cnn_model, 'CNN', X_test_cnn, y_test)

### Recurrent Neural Network (RNN) Model
Now, we will train an RNN model.

In [None]:
# RNN Model
rnn_model = Sequential()
rnn_model.add(SimpleRNN(64, input_shape=(X_train_cnn.shape[1], 1), activation='relu'))
rnn_model.add(Dense(1, activation='sigmoid'))
rnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
rnn_model.fit(X_train_cnn, y_train, epochs=10, batch_size=64, validation_split=0.2, callbacks=[early_stopping])
evaluate_model(rnn_model, 'RNN', X_test_cnn, y_test)

### Long Short-Term Memory (LSTM) Model
Next, we will train an LSTM model.

In [None]:
# LSTM Model
lstm_model = Sequential()
lstm_model.add(LSTM(64, input_shape=(X_train_cnn.shape[1], 1), activation='relu'))
lstm_model.add(Dense(1, activation='sigmoid'))
lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
lstm_model.fit(X_train_cnn, y_train, epochs=10, batch_size=64, validation_split=0.2, callbacks=[early_stopping])
evaluate_model(lstm_model, 'LSTM', X_test_cnn, y_test)

### AutoEncoder Model
Finally, we will train an AutoEncoder model and evaluate its performance.

In [None]:
# AutoEncoder Model
input_dim = X_train.shape[1]
encoding_dim = 14
input_layer = Input(shape=(input_dim,))
encoder = Dense(encoding_dim, activation='relu')(input_layer)
decoder = Dense(input_dim, activation='sigmoid')(encoder)
autoencoder = Model(inputs=input_layer, outputs=decoder)
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')
autoencoder.fit(X_train, X_train, epochs=50, batch_size=256, validation_split=0.2, callbacks=[early_stopping])
X_test_predictions = autoencoder.predict(X_test)
mse = np.mean(np.power(X_test - X_test_predictions, 2), axis=1)
error_df = pd.DataFrame({'reconstruction_error': mse, 'true_class': y_test})
threshold = np.percentile(error_df[error_df.true_class == 0].reconstruction_error.values, 95)
y_pred = [1 if e > threshold else 0 for e in error_df.reconstruction_error.values]
precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred, average='binary')
auc = roc_auc_score(y_test, y_pred)
results.loc[len(results)] = ['AutoEncoder', precision, recall, f1, auc]
print("AutoEncoder Evaluation:\n")
print(classification_report(y_test, y_pred))
print(f"AUC Score: {auc}\n")

## Results Visualization
We will now visualize the performance of each model using bar plots for precision, recall, F1-score, and AUC.

In [None]:
# Set up the matplotlib figure
plt.figure(figsize=(14, 7))

# Plotting Precision, Recall, and F1-Score for each model
metrics = ['Precision', 'Recall', 'F1-Score', 'AUC']
for i, metric in enumerate(metrics, 1):
    plt.subplot(2, 2, i)
    sns.barplot(x='Model', y=metric, data=results)
    plt.title(f'Model Comparison - {metric}')

plt.tight_layout()
plt.show()

## Summary of Results
Finally, we display the results DataFrame showing the performance metrics of all models.

In [None]:
print(results)