In [1]:
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the four datasets
dataset_names = ['C:/Users/awzma/Testosterone Deficiency/Datasets/dataset.xlsx', 
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RENN + SMOTE balanced_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RUS + ROS balanced_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RENN + RUS balanced_dataset.xlsx',  
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RUS + SMOTE balanced_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RUS balanced_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RENN balanced_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/ROS balanced_dataset.xlsx',  
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/SMOTE resampled_dataset.xlsx']
datasets = [pd.read_excel(name) for name in dataset_names]

# Train and test the MLP on each dataset
for i, dataset in enumerate(datasets):
    # Split the dataset into features and target variable
    X = dataset.iloc[:, :-1].values
    y = dataset.iloc[:, -1].values

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create the MLP classifier with 2 hidden layers
    mlp = MLPClassifier(hidden_layer_sizes=(10, 5), max_iter=1000, random_state=42)

    # Train the MLP classifier using the training set
    mlp.fit(X_train, y_train)

    # Make predictions on the testing set
    predictions = mlp.predict(X_test)

    # Calculate the accuracy on the testing set
    accuracy = accuracy_score(y_test, predictions)

    # Print the accuracy for this dataset
    print(f"Accuracy for dataset {i+1} = {accuracy:.4%}")


Accuracy for dataset 1 = 81.9118%
Accuracy for dataset 2 = 68.0162%
Accuracy for dataset 3 = 52.4715%
Accuracy for dataset 4 = 77.3006%
Accuracy for dataset 5 = 52.4715%
Accuracy for dataset 6 = 58.4112%
Accuracy for dataset 7 = 73.5065%
Accuracy for dataset 8 = 52.6316%
Accuracy for dataset 9 = 55.0343%


In [4]:
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

# Load the dataset names and create an empty list to store the accuracies
dataset_names = ['C:/Users/awzma/Testosterone Deficiency/Datasets/dataset.xlsx', 
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RUS balanced_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RENN balanced_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/ROS balanced_dataset.xlsx',  
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/SMOTE resampled_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RENN + SMOTE balanced_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RUS + ROS balanced_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RENN + RUS balanced_dataset.xlsx',  
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RUS + SMOTE balanced_dataset.xlsx']
accuracies = []

# Iterate over the datasets
for i, dataset_name in enumerate(dataset_names):
    # Load the dataset
    dataset = pd.read_excel(dataset_name)

    # Extract the features and target variable
    X = dataset.iloc[:, :-1].values
    y = dataset.iloc[:, -1].values

    # Create the MLP classifier with 2 hidden layers
    mlp = MLPClassifier(hidden_layer_sizes=(10, 5), max_iter=1000, random_state=42)

    # Create the stratified 10-fold cross-validator
    skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

    # Iterate over the cross-validation splits
    fold_accuracies = []
    for train_index, test_index in skf.split(X, y):
        # Split the dataset into training and testing sets
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        # Train the MLP classifier using the training set
        mlp.fit(X_train, y_train)

        # Make predictions on the testing set
        predictions = mlp.predict(X_test)

        # Calculate the accuracy on the testing set and store it
        fold_accuracy = accuracy_score(y_test, predictions)
        fold_accuracies.append(fold_accuracy)

    # Calculate the average accuracy across all folds and store it
    accuracy = np.mean(fold_accuracies)
    accuracies.append(accuracy)

    # Print the accuracy for this dataset
    print(f"Average accuracy for dataset {i+1} = {accuracy:.4%}")

# Print the average accuracy across all datasets
print(f"Mean accuracy = {np.mean(accuracies):.4%}")


Average accuracy for dataset 1 = 80.6301%
Average accuracy for dataset 2 = 55.7327%
Average accuracy for dataset 3 = 72.0982%
Average accuracy for dataset 4 = 57.5544%
Average accuracy for dataset 5 = 58.1268%
Average accuracy for dataset 6 = 68.5830%
Average accuracy for dataset 7 = 55.6396%
Average accuracy for dataset 8 = 80.7889%
Average accuracy for dataset 9 = 55.6396%
Mean accuracy = 64.9770%
