In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

df= pd.read_csv('dataset\cleaned_creditcard.csv')



X = df.drop('Class', axis=1)
y = df['Class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [2]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier

models = {
    "Logistic Regression": LogisticRegression(max_iter=500, solver='saga'),
    "Random Forest": RandomForestClassifier(n_estimators=50, n_jobs=-1, random_state=42), 
    "Gradient Boosting": HistGradientBoostingClassifier(max_iter=50, random_state=42)
}


In [4]:

from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score, recall_score, f1_score, roc_auc_score, classification_report

smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train_scaled, y_train)

results_smote = {}

for name, model in models.items():
    model.fit(X_train_smote, y_train_smote)
    y_pred = model.predict(X_test_scaled)
    y_prob = model.predict_proba(X_test_scaled)[:, 1] if hasattr(model, "predict_proba") else None

    results_smote[name] = {
        "Accuracy": accuracy_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1-score": f1_score(y_test, y_pred),
        "AUC-ROC": roc_auc_score(y_test, y_prob) if y_prob is not None else None
    }

    print(f"\n{name} with SMOTE:")
    print(classification_report(y_test, y_pred))







Logistic Regression with SMOTE:
              precision    recall  f1-score   support

           0       1.00      0.97      0.99     56651
           1       0.05      0.87      0.10        95

    accuracy                           0.97     56746
   macro avg       0.53      0.92      0.54     56746
weighted avg       1.00      0.97      0.99     56746


Random Forest with SMOTE:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56651
           1       0.91      0.76      0.83        95

    accuracy                           1.00     56746
   macro avg       0.96      0.88      0.91     56746
weighted avg       1.00      1.00      1.00     56746


Gradient Boosting with SMOTE:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56651
           1       0.32      0.85      0.46        95

    accuracy                           1.00     56746
   macro avg       0.66      0.92    

In [5]:
from imblearn.over_sampling import ADASYN

adasyn = ADASYN(random_state=42)
X_train_adasyn, y_train_adasyn = adasyn.fit_resample(X_train_scaled, y_train)

results_adasyn = {}

for name, model in models.items():
    model.fit(X_train_adasyn, y_train_adasyn)
    y_pred = model.predict(X_test_scaled)
    y_prob = model.predict_proba(X_test_scaled)[:, 1] if hasattr(model, "predict_proba") else None

    results_adasyn[name] = {
        "Accuracy": accuracy_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1-score": f1_score(y_test, y_pred),
        "AUC-ROC": roc_auc_score(y_test, y_prob) if y_prob is not None else None
    }

    print(f"\n{name} with ADASYN:")
    print(classification_report(y_test, y_pred))






Logistic Regression with ADASYN:
              precision    recall  f1-score   support

           0       1.00      0.91      0.95     56651
           1       0.02      0.92      0.03        95

    accuracy                           0.91     56746
   macro avg       0.51      0.91      0.49     56746
weighted avg       1.00      0.91      0.95     56746


Random Forest with ADASYN:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56651
           1       0.91      0.73      0.81        95

    accuracy                           1.00     56746
   macro avg       0.95      0.86      0.90     56746
weighted avg       1.00      1.00      1.00     56746


Gradient Boosting with ADASYN:
              precision    recall  f1-score   support

           0       1.00      0.99      1.00     56651
           1       0.14      0.84      0.24        95

    accuracy                           0.99     56746
   macro avg       0.57      0.92 

In [6]:
import pandas as pd

df_smote = pd.DataFrame(results_smote).T
df_adasyn = pd.DataFrame(results_adasyn).T

comparison = pd.concat(
    [df_smote[['Accuracy', 'Recall', 'F1-score', 'AUC-ROC']],
     df_adasyn[['Accuracy', 'Recall', 'F1-score', 'AUC-ROC']]],
    axis=1, keys=["SMOTE", "ADASYN"]
)

print("\n📈 Comparison Table:")
display(comparison)





📈 Comparison Table:


Unnamed: 0_level_0,SMOTE,SMOTE,SMOTE,SMOTE,ADASYN,ADASYN,ADASYN,ADASYN
Unnamed: 0_level_1,Accuracy,Recall,F1-score,AUC-ROC,Accuracy,Recall,F1-score,AUC-ROC
Logistic Regression,0.973531,0.873684,0.09952,0.962517,0.908469,0.915789,0.032414,0.960174
Random Forest,0.999471,0.757895,0.827586,0.956771,0.999418,0.726316,0.807018,0.941729
Gradient Boosting,0.996669,0.852632,0.461538,0.974608,0.991013,0.842105,0.238806,0.969337


In [None]:
import pandas as pd
import plotly.express as px

# Flatten the multi-level column index
comparison_flat = comparison.copy()
comparison_flat.columns = [f"{aug}_{metric}" for aug, metric in comparison.columns]

# Reset index so model names become a column
comparison_flat = comparison_flat.reset_index().rename(columns={"index": "Model"})

# Melt for Plotly
melted = comparison_flat.melt(id_vars='Model', var_name='Augmentation_Metric', value_name='Score')

# Split 'SMOTE_Accuracy' → 'SMOTE', 'Accuracy'
melted[['Augmentation', 'Metric']] = melted['Augmentation_Metric'].str.split('_', expand=True)

# Final plot
fig = px.bar(
    melted,
    x="Model",
    y="Score",
    color="Augmentation",
    facet_col="Metric",
    barmode="group",
    title="Model Performance Comparison: SMOTE vs ADASYN",
    height=500,
    width=1000
)

fig.update_layout(xaxis_title="Model", yaxis_title="Score")
fig.show()
