In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_selection import SequentialFeatureSelector
import time
from sklearn.linear_model import RidgeCV
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, LogisticRegression
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score,auc,precision_score,recall_score,f1_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from imblearn.over_sampling import SMOTE
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report



## Taiwan dataset


In [2]:
Taiwan_datasets={
    "Original":pd.read_csv("../Datasets/Taiwan_credit_approval.csv"),
    "SMOTE":pd.read_csv("../Datasets/SMOTE/taiwan_SMOTE.csv"),
    "CFS + SMOTE":pd.read_csv("../Datasets/SMOTE/taiwan_CFS_SMOTE.csv"),
    "CFS": pd.read_csv("../Datasets/CFS/taiwan_CFS.csv"),
    "SFS": pd.read_csv("../Datasets/SFS/taiwan_SFS.csv")
}
Australian_datasets={
    "Original":pd.read_csv("../Datasets/Australian_credit_approval.csv"),
    "SMOTE":pd.read_csv("../Datasets/SMOTE/australia_SMOTE.csv"),
    "CFS + SMOTE":pd.read_csv("../Datasets/SMOTE/australia_CFS_SMOTE.csv"),
    "CFS": pd.read_csv("../Datasets/CFS/australian_CFS.csv"),
    "SFS": pd.read_csv("../Datasets/SFS/australian_SFS.csv")
}

## MODELS : MLP , LIBSVM , KNN

In [4]:

knn_pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("clf", KNeighborsClassifier(metric="manhattan"))
])

knn_param_grid = {
    "clf__n_neighbors": [3,5,7,9],
    "clf__weights": ["uniform", "distance"],
}



svm_pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("clf", SVC(kernel="rbf",C=0.1))
])

svm_param_grid = {
    "clf__gamma": ["scale", 0.01]
}

mlp_pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("clf", MLPClassifier(max_iter=500, random_state=404131029,activation="tanh"))
])

mlp_param_grid = {
    "clf__hidden_layer_sizes": [(50,), (100,)],
    "clf__alpha": [0.0001, 0.001, 0.01],
    "clf__learning_rate_init": [0.001, 0.01]
}



In [5]:
def train_and_evaluate(df, dataset_name,target_col="target"):
    print(f"\n================ Dataset: {dataset_name} ================\n")
    
    X = df.drop(columns=[target_col])
    y = df[target_col]
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=404131029
    )

    models = {
        "MLP": (mlp_pipeline, mlp_param_grid),
        "KNN": (knn_pipeline, knn_param_grid),
        "SVM": (svm_pipeline, svm_param_grid),
    }

    for model_name, (pipeline, param_grid) in models.items():
        print(f"--- {model_name} ---")

        grid = GridSearchCV(
            pipeline,
            param_grid,
            cv=5,
            scoring="accuracy",
            n_jobs=-1
        )

        grid.fit(X_train, y_train)

        best_model = grid.best_estimator_
        y_pred = best_model.predict(X_test)

        acc = accuracy_score(y_test, y_pred)

        print("Best Hyperparameters:")
        print(grid.best_params_)
        print(f"Test Accuracy: {acc:.4f}")
        print("Classification Report:")
        print(classification_report(y_test, y_pred))
        print("-"*50)


In [6]:
for dataset in Australian_datasets:
    train_and_evaluate(Australian_datasets[dataset], dataset,"Class")



--- MLP ---




Best Hyperparameters:
{'clf__alpha': 0.0001, 'clf__hidden_layer_sizes': (50,), 'clf__learning_rate_init': 0.001}
Test Accuracy: 0.8502
Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.84      0.86       116
           1       0.81      0.87      0.84        91

    accuracy                           0.85       207
   macro avg       0.85      0.85      0.85       207
weighted avg       0.85      0.85      0.85       207

--------------------------------------------------
--- KNN ---
Best Hyperparameters:
{'clf__n_neighbors': 7, 'clf__weights': 'uniform'}
Test Accuracy: 0.8599
Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.90      0.88       116
           1       0.86      0.81      0.84        91

    accuracy                           0.86       207
   macro avg       0.86      0.85      0.86       207
weighted avg       0.86      0.86      0.86       207

-------



Best Hyperparameters:
{'clf__alpha': 0.0001, 'clf__hidden_layer_sizes': (100,), 'clf__learning_rate_init': 0.001}
Test Accuracy: 0.8348
Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.85      0.83       109
           1       0.86      0.82      0.84       121

    accuracy                           0.83       230
   macro avg       0.83      0.84      0.83       230
weighted avg       0.84      0.83      0.83       230

--------------------------------------------------
--- KNN ---


[WinError 2] The system cannot find the file specified
  File "e:\project\RAG_pipline\RAG_PIPELINE\.env\Lib\site-packages\joblib\externals\loky\backend\context.py", line 247, in _count_physical_cores
    cpu_count_physical = _count_physical_cores_win32()
                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "e:\project\RAG_pipline\RAG_PIPELINE\.env\Lib\site-packages\joblib\externals\loky\backend\context.py", line 299, in _count_physical_cores_win32
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "C:\Users\mahboub\AppData\Local\Programs\Python\Python312\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\mahboub\AppData\Local\Programs\Python\Python312\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "C:\Users\mahboub\AppData\Local\Programs\Python\Python312\Lib\subprocess.py", line 1538, in _execute_child
  

Best Hyperparameters:
{'clf__n_neighbors': 9, 'clf__weights': 'uniform'}
Test Accuracy: 0.8391
Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.90      0.84       109
           1       0.90      0.79      0.84       121

    accuracy                           0.84       230
   macro avg       0.84      0.84      0.84       230
weighted avg       0.85      0.84      0.84       230

--------------------------------------------------
--- SVM ---
Best Hyperparameters:
{'clf__gamma': 'scale'}
Test Accuracy: 0.8522
Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.80      0.84       109
           1       0.83      0.90      0.87       121

    accuracy                           0.85       230
   macro avg       0.86      0.85      0.85       230
weighted avg       0.85      0.85      0.85       230

--------------------------------------------------


--- MLP ---




Best Hyperparameters:
{'clf__alpha': 0.0001, 'clf__hidden_layer_sizes': (100,), 'clf__learning_rate_init': 0.001}
Test Accuracy: 0.8522
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.86      0.85       109
           1       0.87      0.84      0.86       121

    accuracy                           0.85       230
   macro avg       0.85      0.85      0.85       230
weighted avg       0.85      0.85      0.85       230

--------------------------------------------------
--- KNN ---
Best Hyperparameters:
{'clf__n_neighbors': 7, 'clf__weights': 'distance'}
Test Accuracy: 0.8522
Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.89      0.85       109
           1       0.89      0.82      0.85       121

    accuracy                           0.85       230
   macro avg       0.85      0.85      0.85       230
weighted avg       0.86      0.85      0.85       230

-----



Best Hyperparameters:
{'clf__alpha': 0.0001, 'clf__hidden_layer_sizes': (100,), 'clf__learning_rate_init': 0.001}
Test Accuracy: 0.8696
Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.86      0.88       116
           1       0.83      0.88      0.86        91

    accuracy                           0.87       207
   macro avg       0.87      0.87      0.87       207
weighted avg       0.87      0.87      0.87       207

--------------------------------------------------
--- KNN ---
Best Hyperparameters:
{'clf__n_neighbors': 3, 'clf__weights': 'distance'}
Test Accuracy: 0.8551
Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.89      0.87       116
           1       0.85      0.81      0.83        91

    accuracy                           0.86       207
   macro avg       0.85      0.85      0.85       207
weighted avg       0.85      0.86      0.85       207

-----



Best Hyperparameters:
{'clf__alpha': 0.01, 'clf__hidden_layer_sizes': (100,), 'clf__learning_rate_init': 0.001}
Test Accuracy: 0.8647
Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.85      0.88       116
           1       0.82      0.88      0.85        91

    accuracy                           0.86       207
   macro avg       0.86      0.87      0.86       207
weighted avg       0.87      0.86      0.87       207

--------------------------------------------------
--- KNN ---
Best Hyperparameters:
{'clf__n_neighbors': 3, 'clf__weights': 'distance'}
Test Accuracy: 0.8551
Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.89      0.87       116
           1       0.85      0.81      0.83        91

    accuracy                           0.86       207
   macro avg       0.85      0.85      0.85       207
weighted avg       0.85      0.86      0.85       207

-------

In [7]:
for dataset in Taiwan_datasets:
    train_and_evaluate(Taiwan_datasets[dataset], dataset,"default.payment.next.month")




--- MLP ---
Best Hyperparameters:
{'clf__alpha': 0.01, 'clf__hidden_layer_sizes': (50,), 'clf__learning_rate_init': 0.01}
Test Accuracy: 0.8119
Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.94      0.89      7062
           1       0.61      0.35      0.45      1938

    accuracy                           0.81      9000
   macro avg       0.72      0.65      0.67      9000
weighted avg       0.79      0.81      0.79      9000

--------------------------------------------------
--- KNN ---
Best Hyperparameters:
{'clf__n_neighbors': 9, 'clf__weights': 'uniform'}
Test Accuracy: 0.8041
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.94      0.88      7062
           1       0.58      0.32      0.41      1938

    accuracy                           0.80      9000
   macro avg       0.71      0.63      0.65      9000
weighted avg       0.78      0.80      0.78      90