In [21]:
# import kruskal

from scipy.stats import kruskal, mannwhitneyu
import pandas as pd

alpha = 0.05

In [20]:
# make a function that executes the Mann-Whitney U test for all pairs of models
def mannwhitneyu_test(df, model_columns, alpha):
    print("Mann-Whitney U test statistic")
    for i in range(len(model_columns)):
        for j in range(i + 1, len(model_columns)):
            u_stat, p_value = mannwhitneyu(
                df[model_columns[i]], df[model_columns[j]], alternative="two-sided"
            )
            print("===========================================")
            print(
                model_columns[i],
                "and",
                model_columns[j],
                ":",
                u_stat,
            )
            print("P-value:", p_value)
            if p_value < alpha:
                print(
                    "REJECT the null hypothesis: There are significant differences between the groups."
                )
            else:
                print(
                    "FAIL TO REJECT the null hypothesis: There are NO significant differences between the groups."
                )

# Análise de classificadores monolíticos

In [22]:
# Read the data from output.csv
output_df = pd.read_csv("output.csv")

# Extract the columns
knn = output_df["KNN"]
dt = output_df["DT"]
nb = output_df["NB"]
svm = output_df["SVM"]
mlp = output_df["MLP"]

mv = output_df["MV"]
sv = output_df["SV"]
bc = output_df["BC"]

# Apply the Kruskal-Wallis H-test
h_stat, p_value = kruskal(knn, dt, nb, svm, mlp)

print("Kruskal-Wallis H-test statistic:", h_stat)
print("P-value:", p_value)

# Interpretation
if p_value < alpha:
    print(
        "Reject the null hypothesis: There are significant differences between the groups.\n"
    )
    # call mannwhitneyu_test
    model_columns = ["KNN", "DT", "NB", "SVM", "MLP"]
    mannwhitneyu_test(output_df, model_columns, alpha)

else:
    print(
        "Fail to reject the null hypothesis: There are no significant differences between the groups."
    )

Kruskal-Wallis H-test statistic: 85.4147258916777
P-value: 1.2387467851629486e-17
Reject the null hypothesis: There are significant differences between the groups.

Mann-Whitney U test statistic
KNN and DT : 31.5
P-value: 5.3654307386196095e-06
REJECT the null hypothesis: There are significant differences between the groups.
KNN and NB : 400.0
P-value: 6.719330765889552e-08
REJECT the null hypothesis: There are significant differences between the groups.
KNN and SVM : 0.0
P-value: 6.681468439048667e-08
REJECT the null hypothesis: There are significant differences between the groups.
KNN and MLP : 60.5
P-value: 0.0001632113773261972
REJECT the null hypothesis: There are significant differences between the groups.
DT and NB : 400.0
P-value: 6.6720318790895e-08
REJECT the null hypothesis: There are significant differences between the groups.
DT and SVM : 3.0
P-value: 1.0357590984058813e-07
REJECT the null hypothesis: There are significant differences between the groups.
DT and MLP : 280.0

# Análise de classificadores compostos

In [23]:
# Read the data from output.csv
output_df = pd.read_csv("output.csv")

mv = output_df["MV"]
sv = output_df["SV"]
bc = output_df["BC"]

# Apply the Kruskal-Wallis H-test
h_stat, p_value = kruskal(mv, sv, bc)

print("Kruskal-Wallis H-test statistic:", h_stat)
print("P-value:", p_value)

# Interpretation
if p_value < alpha:
    print(
        "Reject the null hypothesis: There are significant differences between the groups.\n"
    )
    # call mannwhitneyu_test
    model_columns = ["MV", "SV", "BC"]
    mannwhitneyu_test(output_df, model_columns, alpha)

else:
    print(
        "Fail to reject the null hypothesis: There are no significant differences between the groups."
    )

Kruskal-Wallis H-test statistic: 39.68786513102988
P-value: 2.4092928264429973e-09
Reject the null hypothesis: There are significant differences between the groups.

Mann-Whitney U test statistic
MV and SV : 230.0
P-value: 0.4241435800592843
FAIL TO REJECT the null hypothesis: There are NO significant differences between the groups.
MV and BC : 400.0
P-value: 6.625022613183119e-08
REJECT the null hypothesis: There are significant differences between the groups.
SV and BC : 400.0
P-value: 6.59695545231667e-08
REJECT the null hypothesis: There are significant differences between the groups.
