In [1]:
# import kruskal

from scipy.stats import kruskal, mannwhitneyu
import pandas as pd

alpha = 0.05

In [2]:
# make a function that executes the Mann-Whitney U test for all pairs of models
def mannwhitneyu_test(df, model_columns, alpha):
    print("Mann-Whitney U test statistic")
    for i in range(len(model_columns)):
        for j in range(i + 1, len(model_columns)):
            u_stat, p_value = mannwhitneyu(
                df[model_columns[i]], df[model_columns[j]], alternative="two-sided"
            )
            print("===========================================")
            print(
                model_columns[i],
                "and",
                model_columns[j],
                ":",
                u_stat,
            )
            print("P-value:", p_value)
            if p_value < alpha:
                print(
                    "REJECT the null hypothesis: There are significant differences between the groups."
                )
            else:
                print(
                    "FAIL TO REJECT the null hypothesis: There are NO significant differences between the groups."
                )

# Análise dos Regressores

In [3]:
# Read the data from output.csv
output_df = pd.read_csv("output.csv")

# Extract the columns
knr = output_df["KNR"]
svr = output_df["SVR"]
mlp = output_df["MLP"]
rf = output_df["RF"]
gb = output_df["GB"]
mlr = output_df["MLR"]


# Apply the Kruskal-Wallis H-test
h_stat, p_value = kruskal(knr, svr, mlp, rf, gb, mlr)

print("Kruskal-Wallis H-test statistic:", h_stat)
print("P-value:", p_value)

# Interpretation
if p_value < alpha:
    print(
        "Reject the null hypothesis: There are significant differences between the groups.\n"
    )
    # call mannwhitneyu_test
    model_columns = ["KNR", "SVR", "MLP", "RF", "GB", "MLR"]
    mannwhitneyu_test(output_df, model_columns, alpha)

else:
    print(
        "Fail to reject the null hypothesis: There are no significant differences between the groups."
    )

Kruskal-Wallis H-test statistic: 84.84297520661158
P-value: 8.120608973429573e-17
Reject the null hypothesis: There are significant differences between the groups.

Mann-Whitney U test statistic
KNR and SVR : 394.0
P-value: 1.657079293385879e-07
REJECT the null hypothesis: There are significant differences between the groups.
KNR and MLP : 398.0
P-value: 9.172772711656482e-08
REJECT the null hypothesis: There are significant differences between the groups.
KNR and RF : 400.0
P-value: 6.79561512817336e-08
REJECT the null hypothesis: There are significant differences between the groups.
KNR and GB : 400.0
P-value: 6.79561512817336e-08
REJECT the null hypothesis: There are significant differences between the groups.
KNR and MLR : 390.0
P-value: 2.959753733675675e-07
REJECT the null hypothesis: There are significant differences between the groups.
SVR and MLP : 233.0
P-value: 0.379331935127312
FAIL TO REJECT the null hypothesis: There are NO significant differences between the groups.
SVR 