In [1]:
import numpy as np
from scipy.stats import friedmanchisquare
import scikit_posthocs as sp
import pandas as pd

In [2]:
# Datasets
datasets = ['aids']

# Machine Learning models
models = ['LogisticRegression', 'SVC', 'RandomForestClassifier', 'GradientBoostingClassifier','ResidualNeuralNetwork', 'MultiLayerNeuralNetwork']

# Experiment options
subsampling_options = ['False']
feature_reduction_options = ['True']

results = []
for model in models:
    for ss_opt in subsampling_options:
        for fr_opt in feature_reduction_options:
            filename = f'aids_{ss_opt}_{fr_opt}_{model}'
            path = f'../results_new_linux_results/{filename}.csv'
            df = pd.read_csv(path, index_col=False)
            #results.append([])
            test = df['energy_pred'].tolist()
            results.append(test)

In [3]:
# Combine the data into a 2D array
data = np.array(results)

# Transpose the array so that rows represent datasets and columns represent models
data = data.T

# Perform the Friedman Test
statistic, p_value = friedmanchisquare(*data)

# Output the results
print(f"Friedman Test Statistic: {statistic}")
print(f"P-value: {p_value}")

# Check for statistical significance (common significance level is 0.05)
if p_value < 0.05:
    print("The Friedman Test indicates a significant difference among the models.")
else:
    print("The Friedman Test does not indicate a significant difference among the models.")


Friedman Test Statistic: 29.429268292682877
P-value: 0.8665940555682419
The Friedman Test does not indicate a significant difference among the models.


In [4]:
sp.posthoc_nemenyi_friedman(data)

Unnamed: 0,0,1,2,3,4,5
0,1.0,0.004535,0.9,0.015819,0.001,0.001
1,0.004535,1.0,0.033473,0.001,0.047415,0.001
2,0.9,0.033473,1.0,0.001822,0.001,0.001
3,0.015819,0.001,0.001822,1.0,0.001,0.001
4,0.001,0.047415,0.001,0.001,1.0,0.396014
5,0.001,0.001,0.001,0.001,0.396014,1.0
