# Credit Card Fraud Detection Analysis
This notebook demonstrates various machine learning models for detecting credit card fraud, including:
- Logistic Regression
- Random Forest
- XGBoost
- Neural Networks
- Isolation Forest (Anomaly Detection)
- SHAP for Interpretability
- LIME for Local Interpretability


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import shap
import lime
import lime.lime_tabular
import matplotlib.pyplot as plt


### Load Dataset and Preprocess

In [None]:
# Load the dataset
df = pd.read_csv('creditcard.csv')

# Separate features and target
X = df.drop(columns=['Class'])
y = df['Class']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### Logistic Regression

In [None]:
# Logistic Regression
log_reg = LogisticRegression(random_state=42)
log_reg.fit(X_train_scaled, y_train)
y_pred_log = log_reg.predict(X_test_scaled)
print("Logistic Regression Performance:")
print(confusion_matrix(y_test, y_pred_log))
print(classification_report(y_test, y_pred_log))
print("ROC-AUC Score:", roc_auc_score(y_test, y_pred_log))

### Random Forest

In [None]:
# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train_scaled, y_train)
y_pred_rf = rf.predict(X_test_scaled)
print("\nRandom Forest Performance:")
print(confusion_matrix(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))
print("ROC-AUC Score:", roc_auc_score(y_test, y_pred_rf))

### XGBoost

In [None]:
# XGBoost
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
xgb.fit(X_train_scaled, y_train)
y_pred_xgb = xgb.predict(X_test_scaled)
print("\nXGBoost Performance:")
print(confusion_matrix(y_test, y_pred_xgb))
print(classification_report(y_test, y_pred_xgb))
print("ROC-AUC Score:", roc_auc_score(y_test, y_pred_xgb))

### Neural Network (MLP)

In [None]:
# Neural Networks (MLP)
mlp = MLPClassifier(random_state=42, max_iter=300)
mlp.fit(X_train_scaled, y_train)
y_pred_mlp = mlp.predict(X_test_scaled)
print("\nNeural Network (MLP) Performance:")
print(confusion_matrix(y_test, y_pred_mlp))
print(classification_report(y_test, y_pred_mlp))
print("ROC-AUC Score:", roc_auc_score(y_test, y_pred_mlp))

### Isolation Forest

In [None]:
# Isolation Forest (Anomaly Detection)
iso_forest = IsolationForest(contamination=0.0017, random_state=42)  # 0.17% contamination
iso_forest.fit(X_train_scaled)
y_pred_iso = iso_forest.predict(X_test_scaled)
y_pred_iso = np.where(y_pred_iso == -1, 1, 0)  # Convert -1 to 1 (fraudulent), 1 to 0 (non-fraudulent)
print("\nIsolation Forest Performance (Anomaly Detection):")
print(confusion_matrix(y_test, y_pred_iso))
print(classification_report(y_test, y_pred_iso))
print("ROC-AUC Score:", roc_auc_score(y_test, y_pred_iso))

### SHAP Analysis

In [None]:
# SHAP Analysis for XGBoost
print("\nRunning SHAP Analysis for XGBoost...")
explainer = shap.TreeExplainer(xgb)
shap_values = explainer.shap_values(X_test_scaled)

# SHAP summary plot
shap.summary_plot(shap_values, X_test_scaled, feature_names=X.columns)

### LIME Explanation

In [None]:
# LIME Explanations
print("\nRunning LIME Explanations...")
lime_explainer = lime.lime_tabular.LimeTabularExplainer(X_train_scaled, feature_names=X.columns, class_names=['Non-Fraud', 'Fraud'], discretize_continuous=True)
lime_exp = lime_explainer.explain_instance(X_test_scaled[0], xgb.predict_proba)
lime_exp.show_in_notebook(show_table=True)