In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras import layers
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler

In [None]:
# Load dataset
dataset_path = "creditcard.csv"
data = pd.read_csv(dataset_path)



In [None]:
# Data preprocessing
features = data.drop(columns=["Class"])
labels = data["Class"]

In [None]:

# Handle missing values by filling with column means
data.fillna(data.mean(), inplace=True)

In [None]:
# Balance dataset using SMOTE
smote = SMOTE(sampling_strategy=0.5, random_state=42)
X_resampled, y_resampled = smote.fit_resample(features, labels)


In [None]:
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

In [None]:
# Standardize data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Autoencoder model
input_dim = X_train.shape[1]
autoencoder = keras.Sequential([
    layers.Input(shape=(input_dim,)),
    layers.Dense(32, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(8, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(input_dim, activation='sigmoid')
])


In [None]:

# Compile model
autoencoder.compile(optimizer='adam', loss='mse')

In [None]:
# Train model
autoencoder.fit(X_train, X_train, epochs=20, batch_size=64, validation_data=(X_test, X_test))

Epoch 1/20
[1m5331/5331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 3ms/step - loss: 0.8443 - val_loss: 0.7719
Epoch 2/20
[1m5331/5331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 3ms/step - loss: 0.7555 - val_loss: 0.7624
Epoch 3/20
[1m5331/5331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 3ms/step - loss: 0.7460 - val_loss: 0.7602
Epoch 4/20
[1m5331/5331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 3ms/step - loss: 0.7430 - val_loss: 0.7577
Epoch 5/20
[1m5331/5331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 3ms/step - loss: 0.7414 - val_loss: 0.7552
Epoch 6/20
[1m5331/5331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 3ms/step - loss: 0.7375 - val_loss: 0.7541
Epoch 7/20
[1m5331/5331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 3ms/step - loss: 0.7408 - val_loss: 0.7531
Epoch 8/20
[1m5331/5331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 3ms/step - loss: 0.7365 - val_loss: 0.7519
Epoch 9/20
[1m5

<keras.src.callbacks.history.History at 0x7de7f238a310>

In [None]:
# Generate reconstruction errors
X_test_pred = autoencoder.predict(X_test)
mse = np.mean(np.power(X_test - X_test_pred, 2), axis=1)

[1m2666/2666[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step


In [None]:


# Threshold for anomaly detection
threshold = np.percentile(mse, 95)
y_pred_autoencoder = (mse > threshold).astype(int)

In [None]:


# Train Random Forest for comparison
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

In [None]:
# Evaluation
print("Autoencoder Model Performance:")
print(classification_report(y_test, y_pred_autoencoder))
print("Random Forest Model Performance:")
print(classification_report(y_test, y_pred_rf))

Autoencoder Model Performance:
              precision    recall  f1-score   support

           0       0.70      1.00      0.82     56777
           1       0.94      0.14      0.25     28518

    accuracy                           0.71     85295
   macro avg       0.82      0.57      0.53     85295
weighted avg       0.78      0.71      0.63     85295

Random Forest Model Performance:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56777
           1       1.00      1.00      1.00     28518

    accuracy                           1.00     85295
   macro avg       1.00      1.00      1.00     85295
weighted avg       1.00      1.00      1.00     85295

