In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc

from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
import joblib

# Load dataset
df = pd.read_csv('creditcard.csv')
print(df.shape)
df.head()

# Preprocessing
df = df.drop(['Time'], axis=1)
df['Amount'] = StandardScaler().fit_transform(df[['Amount']])

# Feature & label split
X = df.drop('Class', axis=1)
y = df['Class']

# ========================================
# 🔍 Anomaly Detection Section (Added here)
# ========================================

# Isolation Forest
iso = IsolationForest(contamination=0.0017, random_state=42)
y_pred_iso = iso.fit_predict(X)
y_pred_iso = [1 if x == -1 else 0 for x in y_pred_iso]

print("Isolation Forest Confusion Matrix:")
print(confusion_matrix(y, y_pred_iso))

# Local Outlier Factor
lof = LocalOutlierFactor(n_neighbors=20, contamination=0.0017)
y_pred_lof = lof.fit_predict(X)
y_pred_lof = [1 if x == -1 else 0 for x in y_pred_lof]

print("Local Outlier Factor Confusion Matrix:")
print(confusion_matrix(y, y_pred_lof))

# ========================================
# Continue with Supervised Learning
# ========================================

# Balance data using SMOTE
sm = SMOTE(random_state=42)
X_resampled, y_resampled = sm.fit_resample(X, y)

# Split into train/test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, random_state=42)

# Train XGBoost model
model = XGBClassifier(eval_metric='logloss')
model.fit(X_train, y_train)

# Predictions & Evaluation
y_pred = model.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

# ROC Curve
y_score = model.predict_proba(X_test)[:, 1]
fpr, tpr, _ = roc_curve(y_test, y_score)
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, label=f'AUC = {roc_auc:.2f}')
plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.grid()
plt.show()

# Save model
joblib.dump(model, 'models/model.pkl')  # Ensure 'models/' folder exists
