In [1]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import (
    precision_score, recall_score, accuracy_score, f1_score,
    confusion_matrix, roc_curve, roc_auc_score, average_precision_score
)
from imblearn.under_sampling import RandomUnderSampler
import matplotlib.pyplot as plt

# Load full dataset
df = pd.read_csv("cleaned_and_encoded_dataset.csv")

# Prepare features and target
X = df.drop(columns=["bad_flag", "customer_id"])
y = df["bad_flag"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=42
)

# Apply undersampling to training data only
rus = RandomUnderSampler(random_state=42)
X_train_res, y_train_res = rus.fit_resample(X_train, y_train)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_res)
X_test_scaled = scaler.transform(X_test)


In [2]:

# Train MLPClassifier
mlp = MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=1000, activation='relu', solver='adam', random_state=42)
mlp.fit(X_train_scaled, y_train_res)


In [3]:

# Evaluate model
y_train_pred = mlp.predict(X_train_scaled)
y_test_pred = mlp.predict(X_test_scaled)
y_test_proba = mlp.predict_proba(X_test_scaled)[:, 1]

tn, fp, fn, tp = confusion_matrix(y_test, y_test_pred).ravel()

precision = precision_score(y_test, y_test_pred)
recall = recall_score(y_test, y_test_pred)
accuracy = accuracy_score(y_test, y_test_pred)
f1 = f1_score(y_test, y_test_pred)
specificity = tn / (tn + fp)

fpr, tpr, thresholds = roc_curve(y_test, y_test_proba)
optimal_idx = (tpr - fpr).argmax()
optimal_threshold = thresholds[optimal_idx]

roc_auc = roc_auc_score(y_test, y_test_proba)
pr_auc = average_precision_score(y_test, y_test_proba)

train_acc = accuracy_score(y_train_res, y_train_pred)
test_acc = accuracy_score(y_test, y_test_pred)
acc_diff = abs(train_acc - test_acc)

print("=== Validation Metrics ===")
print(f"Precision:               {precision:.4f}")
print(f"Recall:                  {recall:.4f}")
print(f"Accuracy:                {accuracy:.4f}")
print(f"Specificity:             {specificity:.4f}")
print(f"F1 Score:                {f1:.4f}")
print(f"ROC AUC Score:           {roc_auc:.4f}")
print(f"PR AUC Score:            {pr_auc:.4f}")
print(f"Optimal Threshold (ROC): {optimal_threshold:.4f}")
print()
print(f"Train Accuracy:          {train_acc:.4f}")
print(f"Test Accuracy:           {test_acc:.4f}")
print(f"Accuracy Difference:     {acc_diff:.4f}")


=== Validation Metrics ===
Precision:               0.5835
Recall:                  0.8070
Accuracy:                0.8488
Specificity:             0.8590
F1 Score:                0.6773
ROC AUC Score:           0.8907
PR AUC Score:            0.7052
Optimal Threshold (ROC): 0.6409

Train Accuracy:          1.0000
Test Accuracy:           0.8488
Accuracy Difference:     0.1512


In [9]:
from imblearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from imblearn.under_sampling import RandomUnderSampler
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import (
    precision_score, recall_score, accuracy_score, f1_score,
    confusion_matrix, roc_curve, roc_auc_score, average_precision_score
)

# Define undersampler and model
rus = RandomUnderSampler(random_state=42)
mlp = MLPClassifier(max_iter=1000, random_state=42)

# Create pipeline
pipeline = Pipeline(steps=[
    ('undersample', rus),
    ('scale', StandardScaler()),
    ('clf', mlp)
])

# Parameter grid for tuning
param_grid = {
    'clf__hidden_layer_sizes': [(64, 32), (50, 50)],
    'clf__activation': ['relu', 'tanh'],
    'clf__alpha': [0.0001, 0.001],
    'clf__learning_rate': ['constant']
}

# GridSearchCV setup
grid_search = GridSearchCV(
    estimator=pipeline,
    param_grid=param_grid,
    scoring='roc_auc',
    cv=5,
    verbose=2,
    n_jobs=-1
)

# Fit GridSearch on training data
grid_search.fit(X_train, y_train)

# Best model
best_model = grid_search.best_estimator_

# Predict on test set
y_test_pred = best_model.predict(X_test)
y_test_proba = best_model.predict_proba(X_test)[:, 1]

# Predict on resampled + scaled train set for training accuracy
X_train_res, y_train_res = rus.fit_resample(X_train, y_train)
X_train_res_scaled = StandardScaler().fit_transform(X_train_res)
y_train_pred = best_model.named_steps['clf'].predict(X_train_res_scaled)

# Confusion matrix (test)
tn, fp, fn, tp = confusion_matrix(y_test, y_test_pred).ravel()

# Metrics
precision = precision_score(y_test, y_test_pred)
recall = recall_score(y_test, y_test_pred)
accuracy = accuracy_score(y_test, y_test_pred)
f1 = f1_score(y_test, y_test_pred)
specificity = tn / (tn + fp)

fpr, tpr, thresholds = roc_curve(y_test, y_test_proba)
optimal_idx = (tpr - fpr).argmax()
optimal_threshold = thresholds[optimal_idx]

roc_auc = roc_auc_score(y_test, y_test_proba)
pr_auc = average_precision_score(y_test, y_test_proba)

train_acc = accuracy_score(y_train_res, y_train_pred)
test_acc = accuracy_score(y_test, y_test_pred)
acc_diff = abs(train_acc - test_acc)

# Output
print("=== Grid Search Results ===")
print("Best Parameters:", grid_search.best_params_)
print("Best ROC AUC (CV):", grid_search.best_score_)
print("\n=== Validation Metrics ===")
print(f"Precision:               {precision:.4f}")
print(f"Recall:                  {recall:.4f}")
print(f"Accuracy:                {accuracy:.4f}")
print(f"Specificity:             {specificity:.4f}")
print(f"F1 Score:                {f1:.4f}")
print(f"ROC AUC Score:           {roc_auc:.4f}")
print(f"PR AUC Score:            {pr_auc:.4f}")
print(f"Optimal Threshold (ROC): {optimal_threshold:.4f}")
print()
print(f"Train Accuracy:          {train_acc:.4f}")
print(f"Test Accuracy:           {test_acc:.4f}")
print(f"Accuracy Difference:     {acc_diff:.4f}")


Fitting 5 folds for each of 8 candidates, totalling 40 fits
=== Grid Search Results ===
Best Parameters: {'clf__activation': 'relu', 'clf__alpha': 0.0001, 'clf__hidden_layer_sizes': (50, 50), 'clf__learning_rate': 'constant'}
Best ROC AUC (CV): 0.8747136780383995

=== Validation Metrics ===
Precision:               0.5940
Recall:                  0.8129
Accuracy:                0.8539
Specificity:             0.8640
F1 Score:                0.6864
ROC AUC Score:           0.9037
PR AUC Score:            0.7109
Optimal Threshold (ROC): 0.2726

Train Accuracy:          0.9994
Test Accuracy:           0.8539
Accuracy Difference:     0.1454
