In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import average_precision_score
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.svm import OneClassSVM
from sklearn.ensemble import RandomForestClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import regularizers

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset (assuming creditcard.csv is in the working directory)
data = pd.read_csv('creditcard.csv')

# Split into features (X) and labels (y)
X = data.drop('Class', axis=1)  # 'Class' is the column with labels (0 for normal, 1 for fraud)
y = data['Class']  # The label column

# Split into training and test sets
X_train_sample, X_test_sample, y_train_sample, y_test_sample = train_test_split(X, y, test_size=0.2, random_state=42)

# Rescale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_sample)
X_test_scaled = scaler.transform(X_test_sample)

print("Training data and labels loaded and preprocessed successfully!")


In [None]:
# Isolation Forest model
print("Training Isolation Forest...")
iso_forest = IsolationForest(n_estimators=100, contamination=0.05, random_state=42)
iso_forest.fit(X_train_scaled)

# Predict anomalies (-1 for anomaly, 1 for normal)
y_pred_if = iso_forest.predict(X_train_scaled)
y_pred_if = [1 if p == -1 else 0 for p in y_pred_if]

# Calculate Average Precision Score for Isolation Forest
avg_precision_if = average_precision_score(y_train_sample, y_pred_if)
print(f"Isolation Forest Average Precision Score: {avg_precision_if:.4f}")

# Show sample predictions for Isolation Forest
print("Sample Isolation Forest Predictions (first 10):", y_pred_if[:10])

In [None]:
# Local Outlier Factor model
print("Training Local Outlier Factor (LOF)...")
lof = LocalOutlierFactor(n_neighbors=20, contamination=0.05)
y_pred_lof = lof.fit_predict(X_train_scaled)
y_pred_lof = [1 if p == -1 else 0 for p in y_pred_lof]

# Calculate Average Precision Score for LOF
avg_precision_lof = average_precision_score(y_train_sample, y_pred_lof)
print(f"LOF Average Precision Score: {avg_precision_lof:.4f}")

# Show sample predictions for LOF
print("Sample LOF Predictions (first 10):", y_pred_lof[:10])

In [None]:
# One-Class SVM model
print("Training One-Class SVM...")
ocsvm = OneClassSVM(nu=0.05, kernel='rbf', gamma='scale')
ocsvm.fit(X_train_scaled)

# Predict anomalies (1 for normal, -1 for anomaly)
y_pred_ocsvm = ocsvm.predict(X_train_scaled)
y_pred_ocsvm = [1 if p == -1 else 0 for p in y_pred_ocsvm]

# Calculate Average Precision Score for One-Class SVM
avg_precision_ocsvm = average_precision_score(y_train_sample, y_pred_ocsvm)
print(f"One-Class SVM Average Precision Score: {avg_precision_ocsvm:.4f}")

# Show sample predictions for One-Class SVM
print("Sample One-Class SVM Predictions (first 10):", y_pred_ocsvm[:10])

In [None]:
# Random Forest Classifier model (assuming balanced dataset)
print("Training Random Forest Classifier...")
rf_clf = RandomForestClassifier(n_estimators=150, max_features=1.0, random_state=42)
rf_clf.fit(X_train_scaled, y_train_sample)

# Predict anomalies
y_pred_rf = rf_clf.predict(X_train_scaled)
y_pred_rf = [1 if p == 1 else 0 for p in y_pred_rf]  # Adjust prediction mapping

# Calculate Average Precision Score for Random Forest
avg_precision_rf = average_precision_score(y_train_sample, y_pred_rf)
print(f"Random Forest Classifier Average Precision Score: {avg_precision_rf:.4f}")

# Show sample predictions for Random Forest Classifier
print("Sample Random Forest Classifier Predictions (first 10):", y_pred_rf[:10])

In [None]:
# Build Autoencoder model for anomaly detection
print("Training Autoencoder Model...")

autoencoder = Sequential()
autoencoder.add(Dense(64, activation='relu', input_dim=X_train_scaled.shape[1], 
                      activity_regularizer=regularizers.l2(0.001)))
autoencoder.add(Dense(32, activation='relu'))
autoencoder.add(Dense(16, activation='relu'))
autoencoder.add(Dense(32, activation='relu'))
autoencoder.add(Dense(64, activation='relu'))
autoencoder.add(Dense(X_train_scaled.shape[1], activation='sigmoid'))

autoencoder.compile(optimizer='adam', loss='mse')

# Train the Autoencoder
autoencoder.fit(X_train_scaled, X_train_scaled, epochs=50, batch_size=256, shuffle=True, validation_split=0.1)

# Predict reconstruction errors
reconstructions = autoencoder.predict(X_train_scaled)
reconstruction_error = np.mean(np.abs(reconstructions - X_train_scaled), axis=1)

# Set an anomaly threshold based on the reconstruction error
threshold = np.percentile(reconstruction_error, 95)  # This is a common heuristic for anomaly detection
y_pred_autoencoder = [1 if e > threshold else 0 for e in reconstruction_error]

# Calculate Average Precision Score for Autoencoder
avg_precision_autoencoder = average_precision_score(y_train_sample, y_pred_autoencoder)
print(f"Autoencoder Average Precision Score: {avg_precision_autoencoder:.4f}")

# Show sample predictions for Autoencoder
print("Sample Autoencoder Predictions (first 10):", y_pred_autoencoder[:10])

In [None]:
# Print all results in a summary
print("\n--- Model Performance Summary ---")
print(f"Isolation Forest Average Precision Score: {avg_precision_if:.4f}")
print(f"LOF Average Precision Score: {avg_precision_lof:.4f}")
print(f"One-Class SVM Average Precision Score: {avg_precision_ocsvm:.4f}")
print(f"Random Forest Classifier Average Precision Score: {avg_precision_rf:.4f}")
print(f"Autoencoder Average Precision Score: {avg_precision_autoencoder:.4f}")