In [None]:
# 1. Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import shap
import warnings

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from xgboost import XGBClassifier


In [None]:
# 2. Load dataset and lowercase columns
df = pd.read_csv("creditcard.csv")
df.columns = df.columns.str.lower()
df.head()

In [None]:
# 3. Visualize class distribution
sns.countplot(x='class', data=df)
plt.title("Fraud vs Non-Fraud")
plt.xlabel("Class")
plt.ylabel("Count")
plt.show()

In [None]:
# 4. Preprocess - bin 'amount' and 'time', drop originals
df['amount_bin'] = pd.qcut(df['amount'], q=5, labels=False)
df['time_bin'] = pd.cut(df['time'], bins=6, labels=False)
df.drop(['amount', 'time'], axis=1, inplace=True)
df.head()

In [None]:
# 5. Split data into train and validation sets 
X = df.drop('class', axis=1)
y = df['class']

X_train, X_valid, y_train, y_valid = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

In [None]:
# 6. Train Model
model = XGBClassifier(
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=42,
    n_estimators=100
)
model.fit(X_train, y_train, verbose=False)

In [None]:
# 7. Evaluation 
y_pred = model.predict(X_valid)
y_prob = model.predict_proba(X_valid)[:, 1]

print("Confusion Matrix:")
print(confusion_matrix(y_valid, y_pred))

print("\nClassification Report:")
print(classification_report(y_valid, y_pred))

roc_auc = roc_auc_score(y_valid, y_prob)
print(f"ROC-AUC Score on validation: {roc_auc:.4f}")

In [None]:
# 8.  SHAP Feature Importance

explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_valid)

plt.figure(figsize=(10, 6))
shap.summary_plot(shap_values, X_valid, plot_type="bar")