In [1]:
# Imports
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, roc_auc_score

# Load pre-split SMOTE-balanced datasets
X_train = pd.read_csv("x_train_smote2.csv")
X_test = pd.read_csv("x_test_smote2.csv")
y_train = pd.read_csv("y_train_smote2.csv").squeeze()
y_test = pd.read_csv("y_test_smote2.csv").squeeze()


In [2]:

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [3]:
# Train the MLPClassifier
mlp = MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=300, activation='relu', solver='adam', random_state=42)
mlp.fit(X_train_scaled, y_train)

# Predict
y_pred = mlp.predict(X_test_scaled)
y_proba = mlp.predict_proba(X_test_scaled)[:, 1]

In [4]:
# Evaluate model
print(classification_report(y_test, y_pred))
roc_auc = roc_auc_score(y_test, y_proba)
print(f'ROC AUC Score: {roc_auc:.4f}')

              precision    recall  f1-score   support

           0       0.95      0.95      0.95       932
           1       0.81      0.81      0.81       228

    accuracy                           0.93      1160
   macro avg       0.88      0.88      0.88      1160
weighted avg       0.93      0.93      0.93      1160

ROC AUC Score: 0.9322


In [5]:
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier

# Define parameter grid
param_grid = {
    'hidden_layer_sizes': [(64, 32), (50, 50)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam'],
    'alpha': [0.0001, 0.001],
    'learning_rate': ['constant']
}

# Initialize model
mlp = MLPClassifier(max_iter=300, random_state=42)

# Run Grid Search using default K-Fold CV (cv=5 by default)
grid_search = GridSearchCV(estimator=mlp,
                           param_grid=param_grid,
                           cv=5,  # Default K-Fold (not stratified)
                           scoring='roc_auc',
                           verbose=2,
                           n_jobs=-1)

# Fit on SMOTE-scaled training data
grid_search.fit(X_train_scaled, y_train)

# Display results
print("Best Parameters:", grid_search.best_params_)
print("Best ROC AUC Score:", grid_search.best_score_)


Fitting 5 folds for each of 8 candidates, totalling 40 fits
Best Parameters: {'activation': 'tanh', 'alpha': 0.0001, 'hidden_layer_sizes': (64, 32), 'learning_rate': 'constant', 'solver': 'adam'}
Best ROC AUC Score: 0.9739760908764646




In [7]:
from sklearn.metrics import (
    precision_score, recall_score, accuracy_score, f1_score,
    confusion_matrix, roc_curve
)

# Check if grid search has run and has a best estimator
if hasattr(grid_search, 'best_estimator_'):
    best_model = grid_search.best_estimator_
    best_model.fit(X_train_scaled, y_train)

    y_train_pred = best_model.predict(X_train_scaled)
    y_test_pred = best_model.predict(X_test_scaled)
    y_test_proba = best_model.predict_proba(X_test_scaled)[:, 1]

    tn, fp, fn, tp = confusion_matrix(y_test, y_test_pred).ravel()

    precision = precision_score(y_test, y_test_pred)
    recall = recall_score(y_test, y_test_pred)
    accuracy = accuracy_score(y_test, y_test_pred)
    f1 = f1_score(y_test, y_test_pred)
    specificity = tn / (tn + fp)

    fpr, tpr, thresholds = roc_curve(y_test, y_test_proba)
    optimal_idx = (tpr - fpr).argmax()
    optimal_threshold = thresholds[optimal_idx]

    train_acc = accuracy_score(y_train, y_train_pred)
    test_acc = accuracy_score(y_test, y_test_pred)
    diff_acc = abs(train_acc - test_acc)

    print(f"Precision:               {precision:.4f}")
    print(f"Recall:                  {recall:.4f}")
    print(f"Accuracy:                {accuracy:.4f}")
    print(f"Specificity:             {specificity:.4f}")
    print(f"F1 Score:                {f1:.4f}")
    print(f"Optimal Threshold (ROC): {optimal_threshold:.4f}")
    print()
    print(f"Best Accuracy (Train):   {train_acc:.4f}")
    print(f"Best Accuracy (Test):    {test_acc:.4f}")
    print(f"Difference:              {diff_acc:.4f}")
else:
    print("Grid search was not successfully run or has no best_estimator_.")



Precision:               0.8235
Recall:                  0.7982
Accuracy:                0.9267
Specificity:             0.9582
F1 Score:                0.8107
Optimal Threshold (ROC): 0.2041

Best Accuracy (Train):   0.9981
Best Accuracy (Test):    0.9267
Difference:              0.0714


