## Our initial analysis focused on evaluating the performance of the four selected classifiers using the complete DARWIN data set with all 450 features.

This approach served as our baseline, providing a fundamental understanding of each classifier's capability to differentiate between Alzheimer's patients and healthy controls.

### **This notebook contains the baseline for Random Forest**

In [4]:
import pandas as pd
from ucimlrepo import fetch_ucirepo
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import KFold

# Fetch dataset 
darwin = fetch_ucirepo(id=732)

# Data (as pandas dataframes)
X = darwin.data.features
y = darwin.data.targets

X = X.drop(columns=['ID'])  # Remove ID column as it's not needed

# Parameters for Random Forest
params = {'bootstrap': True, 'criterion': 'gini', 'max_depth': 2, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}

# Lists to store performance metrics
all_accuracies = []
all_precisions = []
all_recalls = []
all_f1_scores = []
all_sensitivities = []
all_specificities = []

# Set up 5-fold cross-validation
kf = KFold(n_splits=10, shuffle=True, random_state=42)

# Cross-validation loop
for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Ensure y_train and y_test are 1D
    y_train = y_train.values.ravel()
    y_test = y_test.values.ravel()

    # Create Random Forest classifier with the specified parameters
    clf_rf = RandomForestClassifier(**params, random_state=42)

    # Train the model
    clf_rf.fit(X_train, y_train)

    # Predict on test data
    y_pred = clf_rf.predict(X_test)

    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='binary', pos_label='P')  # Explicitly define pos_label
    recall = recall_score(y_test, y_pred, average='binary', pos_label='P')        # Explicitly define pos_label
    f1 = f1_score(y_test, y_pred, average='binary', pos_label='P')                # Explicitly define pos_label

    all_accuracies.append(accuracy)
    all_precisions.append(precision)
    all_recalls.append(recall)
    all_f1_scores.append(f1)

    # Calculate confusion matrix components
    cm = confusion_matrix(y_test, y_pred, labels=['H', 'P'])
    # Correctly identify the elements of the confusion matrix
    if cm.shape == (2, 2):  # Ensure it's a 2x2 matrix for binary classification
        tn, fp, fn, tp = cm.ravel()
        sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
        specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    else:
        # If it's not a 2x2 matrix, something went wrong
        print(f"Unexpected confusion matrix shape: {cm.shape}")
        sensitivity = specificity = 0

    all_sensitivities.append(sensitivity)
    all_specificities.append(specificity)

# Calculate overall mean and standard deviation for metrics
final_metrics = {
    'mean_accuracy': np.mean(all_accuracies),
    'std_accuracy': np.std(all_accuracies),
    'mean_precision': np.mean(all_precisions),
    'std_precision': np.std(all_precisions),
    'mean_recall': np.mean(all_recalls),
    'std_recall': np.std(all_recalls),
    'mean_f1_score': np.mean(all_f1_scores),
    'std_f1_score': np.std(all_f1_scores),
    'mean_sensitivity': np.mean(all_sensitivities),
    'std_sensitivity': np.std(all_sensitivities),
    'mean_specificity': np.mean(all_specificities),
    'std_specificity': np.std(all_specificities),
}

# Print the final aggregated metrics
print("Final Aggregated Performance Metrics with 5-Fold Cross-Validation:")
print(f"Mean Accuracy: {final_metrics['mean_accuracy']:.4f} ± {final_metrics['std_accuracy']:.4f}")
print(f"Mean Precision: {final_metrics['mean_precision']:.4f} ± {final_metrics['std_precision']:.4f}")
print(f"Mean Recall: {final_metrics['mean_recall']:.4f} ± {final_metrics['std_recall']:.4f}")
print(f"Mean F1 Score: {final_metrics['mean_f1_score']:.4f} ± {final_metrics['std_f1_score']:.4f}")
print(f"Mean Sensitivity: {final_metrics['mean_sensitivity']:.4f} ± {final_metrics['std_sensitivity']:.4f}")
print(f"Mean Specificity: {final_metrics['mean_specificity']:.4f} ± {final_metrics['std_specificity']:.4f}")


Final Aggregated Performance Metrics with 5-Fold Cross-Validation:
Mean Accuracy: 0.8503 ± 0.0856
Mean Precision: 0.8312 ± 0.1345
Mean Recall: 0.9075 ± 0.0869
Mean F1 Score: 0.8571 ± 0.0785
Mean Sensitivity: 0.9075 ± 0.0869
Mean Specificity: 0.8098 ± 0.1687


In [5]:
all_f1_scores

[np.float64(0.8888888888888888),
 np.float64(0.8571428571428571),
 np.float64(0.7058823529411765),
 np.float64(1.0),
 np.float64(0.9473684210526315),
 np.float64(0.875),
 np.float64(0.8571428571428571),
 np.float64(0.8),
 np.float64(0.782608695652174),
 np.float64(0.8571428571428571)]

In [6]:
all_accuracies

[0.8888888888888888,
 0.8333333333333334,
 0.7222222222222222,
 1.0,
 0.9411764705882353,
 0.8823529411764706,
 0.8235294117647058,
 0.8235294117647058,
 0.7058823529411765,
 0.8823529411764706]

In [7]:
all_precisions

[np.float64(0.8888888888888888),
 np.float64(0.9),
 np.float64(0.5454545454545454),
 np.float64(1.0),
 np.float64(1.0),
 np.float64(0.7777777777777778),
 np.float64(0.9),
 np.float64(0.8571428571428571),
 np.float64(0.6923076923076923),
 np.float64(0.75)]

In [8]:
all_recalls

[np.float64(0.8888888888888888),
 np.float64(0.8181818181818182),
 np.float64(1.0),
 np.float64(1.0),
 np.float64(0.9),
 np.float64(1.0),
 np.float64(0.8181818181818182),
 np.float64(0.75),
 np.float64(0.9),
 np.float64(1.0)]

In [9]:
all_sensitivities

[np.float64(0.8888888888888888),
 np.float64(0.8181818181818182),
 np.float64(1.0),
 np.float64(1.0),
 np.float64(0.9),
 np.float64(1.0),
 np.float64(0.8181818181818182),
 np.float64(0.75),
 np.float64(0.9),
 np.float64(1.0)]

In [10]:
all_specificities

[np.float64(0.8888888888888888),
 np.float64(0.8571428571428571),
 np.float64(0.5833333333333334),
 np.float64(1.0),
 np.float64(1.0),
 np.float64(0.8),
 np.float64(0.8333333333333334),
 np.float64(0.8888888888888888),
 np.float64(0.42857142857142855),
 np.float64(0.8181818181818182)]