In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# Load the four datasets
dataset_names = ['C:/Users/awzma/Testosterone Deficiency/Datasets/dataset.xlsx', 
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RUS balanced_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RENN balanced_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/ROS balanced_dataset.xlsx',  
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/SMOTE resampled_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RENN + SMOTE balanced_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RUS + ROS balanced_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RENN + RUS balanced_dataset.xlsx',  
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RUS + SMOTE balanced_dataset.xlsx']

datasets = [pd.read_excel(name) for name in dataset_names]

# Train and test the ANN on each dataset
for i, dataset in enumerate(datasets):
    # Split the dataset into features and target variable
    X = dataset.iloc[:, :-1].values
    y = dataset.iloc[:, -1].values

    # Scale the features using StandardScaler
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create the ANN classifier with 2 hidden layers
    ann = MLPClassifier(hidden_layer_sizes=(10, 5), max_iter=1000, random_state=42)

    # Train the ANN classifier using the training set
    ann.fit(X_train, y_train)

    # Make predictions on the testing set
    predictions = ann.predict(X_test)

    # Calculate the accuracy on the testing set
    accuracy = accuracy_score(y_test, predictions)

    # Print the accuracy for this dataset
    print(f"Accuracy for dataset {i+1} = {accuracy:.4%}")


Accuracy for dataset 1 = 82.2059%
Accuracy for dataset 2 = 77.3279%
Accuracy for dataset 3 = 66.9202%
Accuracy for dataset 4 = 84.0491%
Accuracy for dataset 5 = 66.9202%
Accuracy for dataset 6 = 69.6262%
Accuracy for dataset 7 = 80.5195%
Accuracy for dataset 8 = 71.3959%
Accuracy for dataset 9 = 72.6545%


In [7]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# Load the four datasets
dataset_names = ['C:/Users/awzma/Testosterone Deficiency/Datasets/dataset.xlsx', 
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RUS balanced_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RENN balanced_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/ROS balanced_dataset.xlsx',  
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/SMOTE resampled_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RENN + SMOTE balanced_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RUS + ROS balanced_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RENN + RUS balanced_dataset.xlsx',  
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RUS + SMOTE balanced_dataset.xlsx']

all_accuracies = []

# Train and test the ANN on each dataset
for i, dataset_name in enumerate(dataset_names):
    dataset = pd.read_excel(dataset_name)
    
    # Split the dataset into features and target variable
    X = dataset.iloc[:, :-1].values
    y = dataset.iloc[:, -1].values

    # Scale the features using StandardScaler
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    
    # Initialize stratified 10-fold cross-validation
    kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
    
    accuracies = []
    
    # Train and test the ANN using stratified 10-fold cross-validation
    for train_index, test_index in kf.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        
        # Create the ANN classifier with 2 hidden layers
        ann = MLPClassifier(hidden_layer_sizes=(10, 5), max_iter=1000, random_state=42)

        # Train the ANN classifier using the training set
        ann.fit(X_train, y_train)

        # Make predictions on the testing set
        predictions = ann.predict(X_test)

        # Calculate the accuracy on the testing set
        accuracy = accuracy_score(y_test, predictions)
        
        accuracies.append(accuracy)
    
    # Print the accuracies for this dataset
    #for j, acc in enumerate(accuracies):
        #print(f"Accuracy for dataset {i+1}, fold {j+1} = {acc:.4%}")
    
    # Compute the mean accuracy for this dataset
    mean_accuracy = np.mean(accuracies)
    all_accuracies.append(mean_accuracy)
    print(f"Average accuracy for dataset {i+1} = {mean_accuracy:.4%}")

# Compute and print the mean accuracy over all datasets
print(f"Mean accuracy = {np.mean(all_accuracies):.4%}")


Average accuracy for dataset 1 = 80.7185%
Average accuracy for dataset 2 = 66.7087%
Average accuracy for dataset 3 = 81.2600%
Average accuracy for dataset 4 = 70.7422%
Average accuracy for dataset 5 = 71.1073%
Average accuracy for dataset 6 = 78.5425%
Average accuracy for dataset 7 = 66.3839%
Average accuracy for dataset 8 = 84.2337%
Average accuracy for dataset 9 = 66.3839%
Mean accuracy = 74.0090%
