In [4]:
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load the four datasets
dataset_names = ['C:/Users/awzma/Testosterone Deficiency/Datasets/dataset.xlsx', 
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RUS balanced_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RENN balanced_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/ROS balanced_dataset.xlsx',  
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/SMOTE resampled_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RENN + SMOTE balanced_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RUS + ROS balanced_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RENN + RUS balanced_dataset.xlsx',  
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RUS + SMOTE balanced_dataset.xlsx']
datasets = [pd.read_excel(name) for name in dataset_names]

# Train and test the MLP with AdaBoost on each dataset
for i, dataset in enumerate(datasets):
    # Split the dataset into features and target variable
    X = dataset.iloc[:, :-1].values
    y = dataset.iloc[:, -1].values

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create an MLP classifier with 2 hidden layers
    mlp = MLPClassifier(hidden_layer_sizes=(10, 5), max_iter=1000, random_state=42)

    # Create an AdaBoost classifier with the DecisionTreeClassifier as the base estimator
    ada_boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1), n_estimators=50, random_state=42)

    # Train the AdaBoost classifier using the training set
    ada_boost.fit(X_train, y_train)

    # Make predictions on the testing set
    predictions = ada_boost.predict(X_test)

    # Calculate the accuracy on the testing set
    accuracy = accuracy_score(y_test, predictions)

    # Print the accuracy for this dataset
    print(f"Accuracy for dataset {i+1} = {accuracy:.4%}")


Accuracy for dataset 1 = 80.5882%
Accuracy for dataset 2 = 77.7328%
Accuracy for dataset 3 = 68.4411%
Accuracy for dataset 4 = 84.6626%
Accuracy for dataset 5 = 68.4411%
Accuracy for dataset 6 = 67.7570%
Accuracy for dataset 7 = 81.2987%
Accuracy for dataset 8 = 71.0526%
Accuracy for dataset 9 = 72.8833%


In [2]:
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

# Load the datasets
dataset_names = ['C:/Users/awzma/Testosterone Deficiency/Datasets/dataset.xlsx', 
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RUS balanced_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RENN balanced_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/ROS balanced_dataset.xlsx',  
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/SMOTE resampled_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RENN + SMOTE balanced_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RUS + ROS balanced_dataset.xlsx',
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RENN + RUS balanced_dataset.xlsx',  
                 'C:/Users/awzma/Testosterone Deficiency/Datasets/RUS + SMOTE balanced_dataset.xlsx']
datasets = [pd.read_excel(name) for name in dataset_names]

# Train and test the MLP with AdaBoost on each dataset
for i, dataset in enumerate(datasets):
    # Split the dataset into features and target variable
    X = dataset.iloc[:, :-1].values
    y = dataset.iloc[:, -1].values

    # Create an MLP classifier with 2 hidden layers
    mlp = MLPClassifier(hidden_layer_sizes=(10, 5), max_iter=1000, random_state=42)

    # Create an AdaBoost classifier with the DecisionTreeClassifier as the base estimator
    ada_boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1), n_estimators=50, random_state=42)

    # Implement stratified 10-fold cross-validation and calculate accuracy
    cv_scores = cross_val_score(ada_boost, X, y, cv=10, scoring='accuracy')

    # Print the accuracy for this dataset
    print(f"Average accuracy for dataset {i+1} = {np.mean(cv_scores):.4%}")

# Calculate and print the mean accuracy for all datasets
all_cv_scores = [cross_val_score(AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1), n_estimators=50, random_state=42), dataset.iloc[:, :-1].values, dataset.iloc[:, -1].values, cv=10, scoring='accuracy') for dataset in datasets]
mean_accuracy = np.mean(all_cv_scores)
print(f"Mean accuracy = {mean_accuracy:.4%}")


Average accuracy for dataset 1 = 81.4247%
Average accuracy for dataset 2 = 69.0381%
Average accuracy for dataset 3 = 81.9889%
Average accuracy for dataset 4 = 70.7189%
Average accuracy for dataset 5 = 72.6433%
Average accuracy for dataset 6 = 79.1498%
Average accuracy for dataset 7 = 68.0644%
Average accuracy for dataset 8 = 84.7290%
Average accuracy for dataset 9 = 68.0644%
Mean accuracy = 75.0913%
