In [10]:
import pandas as pd
from scipy.stats import wilcoxon

In [11]:



def run_wilcoxon_test(csv_before, csv_after, model_name):
    df_before = pd.read_csv(csv_before)
    df_after = pd.read_csv(csv_after)

    print(f"=== Wilcoxon Signed-Rank Test for {model_name} ===\n")

    for metric in df_before.columns:
        before = df_before[metric]
        after = df_after[metric]

        stat, p = wilcoxon(before, after, alternative='two-sided')

        print(f"Metric: {metric}")
        print(f"  Test statistic: {stat:.4f}")
        print(f"  p-value: {p:.4f}")

        if p < 0.05:
            print("Statistically significant difference.")
        else:
            print("No statistically significant difference.")
        print()



# Comparison of regression and classification (without the resampling)

In [12]:
# svm

run_wilcoxon_test(
    "2a_classification_no_resample_SVC.csv",
    "3a_classification_no_resample_SVR.csv",
    "SVM"
)

=== Wilcoxon Signed-Rank Test for SVM ===

Metric: acc
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: prec
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: rec
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: f1
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.


In [13]:
# knn

run_wilcoxon_test(
    "2a_classification_no_resample_KNN.csv",
    "3a_classification_no_resample_KNN.csv",
    "KNN"
)

=== Wilcoxon Signed-Rank Test for KNN ===

Metric: acc
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: prec
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: rec
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: f1
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.


In [14]:
# dt

run_wilcoxon_test(
    "2a_classification_no_resample_DT.csv",
    "3a_classification_no_resample_DT.csv",
    "DT"
)

=== Wilcoxon Signed-Rank Test for DT ===

Metric: acc
  Test statistic: 1.0000
  p-value: 0.1250
No statistically significant difference.

Metric: prec
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: rec
  Test statistic: 6.0000
  p-value: 0.8125
No statistically significant difference.

Metric: f1
  Test statistic: 3.0000
  p-value: 0.3125
No statistically significant difference.


In [15]:
# mlp
run_wilcoxon_test(
    "2a_classification_no_resample_MLP.csv",
    "3a_classification_no_resample_MLP.csv",
    "MLP"
)

=== Wilcoxon Signed-Rank Test for MLP ===

Metric: acc
  Test statistic: 1.0000
  p-value: 0.1250
No statistically significant difference.

Metric: prec
  Test statistic: 5.0000
  p-value: 0.6250
No statistically significant difference.

Metric: rec
  Test statistic: 4.0000
  p-value: 0.4375
No statistically significant difference.

Metric: f1
  Test statistic: 3.0000
  p-value: 0.3125
No statistically significant difference.


# Comparison of regression and classification (with simple resampling)


In [16]:
run_wilcoxon_test(
    "2b_classification_simple_resample_SVC.csv",
    "3b_classification_simple_resample_SVR.csv",
    "SVM"
)

=== Wilcoxon Signed-Rank Test for SVM ===

Metric: acc
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: prec
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: rec
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: f1
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.


In [17]:
run_wilcoxon_test(
    "2b_classification_simple_resample_KNN.csv",
    "3b_classification_simple_resample_KNN.csv",
    "KNN"
)

=== Wilcoxon Signed-Rank Test for KNN ===

Metric: acc
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: prec
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: rec
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: f1
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.


In [18]:
run_wilcoxon_test(
    "2b_classification_simple_resample_DT.csv",
    "3b_classification_simple_resample_DT.csv",
    "DT"
)

=== Wilcoxon Signed-Rank Test for DT ===

Metric: acc
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: prec
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: rec
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: f1
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.


In [19]:
run_wilcoxon_test(
    "2b_classification_simple_resample_MLP.csv",
    "3b_classification_simple_resample_MLP.csv",
    "MLP"
)

=== Wilcoxon Signed-Rank Test for MLP ===

Metric: acc
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: prec
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: rec
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: f1
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.
