In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv("creditcard.csv")

In [None]:
df.shape

(284807, 31)

In [None]:
df.columns

Index(['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
       'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
       'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount',
       'Class'],
      dtype='object')

In [None]:
X = df.drop(['Class'], axis=1)
y = df['Class']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
encoding_dim = 32

In [None]:
X_train.shape[1]

30

In [None]:
encoder = models.Sequential([
    layers.Dense(encoding_dim, activation='relu', input_shape=(X_train.shape[1],)),
])

decoder = models.Sequential([
    layers.Dense(X_train.shape[1], activation='sigmoid'),
])

In [None]:
autoencoder = models.Sequential([
    encoder,
    decoder,
])

In [None]:
autoencoder.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])

In [None]:
autoencoder.fit(X_train, X_train, epochs=10, batch_size=64, shuffle=True, validation_data=(X_test, X_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7c6f786ff3d0>

In [None]:
loss = autoencoder.evaluate(X_test, X_test)
print(f'Test Loss: {loss}')
print(loss[0])
print(loss[1])

Test Loss: [0.6485553979873657, 0.9279864430427551]
0.6485553979873657
0.9279864430427551


In [None]:
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
# Evaluate the model on the test set
decoded_data = autoencoder.predict(X_test)
mse = np.mean(np.power(X_test - decoded_data, 2), axis=1)
threshold = np.percentile(mse, 95)  # Adjust the percentile as needed for tuning sensitivity

# Identify outliers based on the threshold
outliers = mse > threshold

# Print evaluation metrics for anomaly detection
print("Confusion Matrix:\n", confusion_matrix(y_test, outliers))
print("\nClassification Report:\n", classification_report(y_test, outliers))

Confusion Matrix:
 [[108204   5528]
 [    22    169]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.95      0.97    113732
           1       0.03      0.88      0.06       191

    accuracy                           0.95    113923
   macro avg       0.51      0.92      0.52    113923
weighted avg       1.00      0.95      0.97    113923



In [None]:
# Print the number of outliers and anomalies
num_outliers = np.sum(outliers)
num_anomalies = np.sum(y_test[outliers] == 1)  # Assuming 1 represents the positive class

print(f'Number of outliers: {num_outliers}')
print(f'Number of anomalies: {num_anomalies}')

Number of outliers: 5697
Number of anomalies: 169
