In [16]:
import pandas as pd
from scipy.stats import wilcoxon

# Regression

In [17]:
def run_wilcoxon_test(csv_before, csv_after, model_name):
    df_before = pd.read_csv(csv_before)
    df_after = pd.read_csv(csv_after)

    print(f"=== Wilcoxon Signed-Rank Test for {model_name} ===\n")

    for metric in df_before.columns:
        before = df_before[metric]
        after = df_after[metric]

        stat, p = wilcoxon(before, after, alternative='two-sided')

        print(f"Metric: {metric}")
        print(f"  Test statistic: {stat:.4f}")
        print(f"  p-value: {p:.4f}")

        if p < 0.05:
            print("Statistically significant difference.")
        else:
            print("No statistically significant difference.")
        print()

In [18]:
# svm

run_wilcoxon_test(
    "3a_regression_no_resample_SVR.csv",
    "3b_regression_simple_resample_SVR.csv",
    "SVM"
)


=== Wilcoxon Signed-Rank Test for SVM ===

Metric: MAE
  Test statistic: 6.0000
  p-value: 0.8125
No statistically significant difference.

Metric: RMSE
  Test statistic: 1.0000
  p-value: 0.1250
No statistically significant difference.

Metric: R2
  Test statistic: 4.0000
  p-value: 0.4375
No statistically significant difference.


In [19]:
# knn

run_wilcoxon_test(
    "3a_regression_no_resample_KNN.csv",
    "3b_regression_simple_resample_KNN.csv",
    "KNN"
)

=== Wilcoxon Signed-Rank Test for KNN ===

Metric: MAE
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: RMSE
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: R2
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.


In [20]:
# Decision Tree

run_wilcoxon_test(
    "3a_regression_no_resample_DT.csv",
    "3b_regression_simple_resample_DT.csv",
    "Decision Tree"
)

=== Wilcoxon Signed-Rank Test for Decision Tree ===

Metric: MAE
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: RMSE
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: R2
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.


In [21]:
# mlp

run_wilcoxon_test(
    "3a_regression_no_resample_MLP.csv",
    "3b_regression_simple_resample_MLP.csv",
    "MLP"
)

=== Wilcoxon Signed-Rank Test for MLP ===

Metric: MAE
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: RMSE
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: R2
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.


# Classification

In [22]:
# svm

run_wilcoxon_test(
    "2a_classification_no_resample_SVC.csv",
    "2b_classification_simple_resample_SVC.csv",
    "SVM"
)

=== Wilcoxon Signed-Rank Test for SVM ===

Metric: acc
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: prec
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: rec
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: f1
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.


In [23]:
# knn

run_wilcoxon_test(
    "2a_classification_no_resample_KNN.csv",
    "2b_classification_simple_resample_KNN.csv",
    "KNN"
)

=== Wilcoxon Signed-Rank Test for KNN ===

Metric: acc
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: prec
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: rec
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.

Metric: f1
  Test statistic: 0.0000
  p-value: 0.0625
No statistically significant difference.


In [24]:
# dt

run_wilcoxon_test(
    "2a_classification_no_resample_DT.csv",
    "2b_classification_simple_resample_DT.csv",
    "DT"
)

FileNotFoundError: [Errno 2] No such file or directory: '2b_classification_simple_resample_DT.csv'

In [ ]:
# MLP

run_wilcoxon_test(
    "2a_classification_no_resample_MLP.csv",
    "2b_classification_simple_resample_MLP.csv",
    "MLP"
)