In [12]:
import pandas as pd
from scipy.stats import wilcoxon

In [13]:
def run_wilcoxon_test(csv_before, csv_after, model_name):
    df_before = pd.read_csv(csv_before)
    df_after = pd.read_csv(csv_after)

    print(f"=== Wilcoxon Signed-Rank Test for {model_name} ===\n")

    for metric in df_before.columns:
        before = df_before[metric]
        after = df_after[metric]

        stat, p = wilcoxon(before, after, alternative='two-sided')

        print(f"Metric: {metric}")
        print(f"  Test statistic: {stat:.4f}")
        print(f"  p-value: {p:.4f}")

        if p < 0.05:
            print("Statistically significant difference.")
        else:
            print("No statistically significant difference.")
        print()

In [14]:
import pandas as pd
from scipy.stats import wilcoxon

def run_wilcoxon_test2(csv_before, csv_after, model_name):
    df_before = pd.read_csv(csv_before)
    df_after = pd.read_csv(csv_after)

    print(f"=== Wilcoxon Signed-Rank Test for {model_name} ===\n")

    for metric in df_before.columns:
        before = df_before[metric]
        after = df_after[metric]

        if len(before) != len(after):
            print(f"Skipping {metric} – unequal lengths.")
            continue

        print(f"Metric: {metric}")
        print(f"  Values BEFORE: {before.values}")
        print(f"  Values AFTER : {after.values}")

        try:
            stat, p = wilcoxon(before, after, alternative='two-sided')
            print(f"  Test statistic: {stat:.4f}")
            print(f"  p-value: {p:.4f}")

            if p < 0.05:
                print("  ✅ Statistically significant difference.")
            else:
                print("  ❌ No statistically significant difference.")
        except ValueError as e:
            print(f"  Could not compute Wilcoxon for {metric}: {e}")
        print()


# Regression

In [15]:
# svm

run_wilcoxon_test2(
    "3a_regression_no_resample_SVR.csv",
    "3b_regression_simple_resample_SVR.csv",
    "SVM"
)


=== Wilcoxon Signed-Rank Test for SVM ===

Metric: MAE
  Values BEFORE: [7.41312995 8.70832577 5.71051775 8.58182842 8.65899444 6.40285989
 6.01273858 7.26175136 6.88138969 7.85707292]
  Values AFTER : [5.85834047 6.46285381 7.63145296 7.94318166 6.08787581 6.32799783
 7.50406013 6.29945558 8.69552085 6.26401142]
  Test statistic: 19.0000
  p-value: 0.4316
  ❌ No statistically significant difference.

Metric: RMSE
  Values BEFORE: [19.5837638  35.7955423  12.30392317 21.97960549 41.43124672 24.7061999
 11.28490252 21.867351   24.24518249 24.7758711 ]
  Values AFTER : [38.30315837 34.18525579 41.27581248 45.84840985 35.0479071  32.07369797
 44.21192563 37.28575722 48.88236178 33.94745064]
  Test statistic: 3.0000
  p-value: 0.0098
  ✅ Statistically significant difference.

Metric: R2
  Values BEFORE: [0.49314609 0.22453251 0.70160318 0.45632369 0.17190444 0.36771562
 0.74090977 0.43473524 0.37126939 0.38430044]
  Values AFTER : [0.47238594 0.54605998 0.47690094 0.42433058 0.52795897 0.5

In [16]:
# knn

run_wilcoxon_test2(
    "3a_regression_no_resample_KNN.csv",
    "3b_regression_simple_resample_KNN.csv",
    "KNN"
)

=== Wilcoxon Signed-Rank Test for KNN ===

Metric: MAE
  Values BEFORE: [7.48062352 8.03448849 5.45212392 6.99238976 8.00698131 6.37144817
 5.82380937 6.38296506 6.09445574 6.86932174]
  Values AFTER : [0.7968201  0.83558545 0.79409441 0.87454329 0.8090399  0.78364206
 0.87093544 0.82990842 0.81754102 0.83999174]
  Test statistic: 0.0000
  p-value: 0.0020
  ✅ Statistically significant difference.

Metric: RMSE
  Values BEFORE: [17.45272908 25.02106627 10.10088048 14.13294886 34.96282069 20.76524989
  8.46017765 14.89654491 15.57328558 16.71920987]
  Values AFTER : [1.87900948 2.21332038 1.94999154 2.13433635 1.82717693 1.81719126
 2.02992435 2.00637866 2.00529929 1.9173503 ]
  Test statistic: 0.0000
  p-value: 0.0020
  ✅ Statistically significant difference.

Metric: R2
  Values BEFORE: [0.59745246 0.62110635 0.79889381 0.77521558 0.41029151 0.55334225
 0.85438223 0.73768045 0.74059748 0.71962294]
  Values AFTER : [0.99873029 0.99809713 0.9988325  0.99875247 0.99871703 0.99860826
 0.99

In [17]:
# Decision Tree

run_wilcoxon_test2(
    "3a_regression_no_resample_DT.csv",
    "3b_regression_simple_resample_DT.csv",
    "Decision Tree"
)

=== Wilcoxon Signed-Rank Test for Decision Tree ===

Metric: MAE
  Values BEFORE: [8.77181058 7.41643207 6.05169234 7.37747352 8.05422348 5.99301625
 5.56509307 6.61795412 6.26115135 7.26685145]
  Values AFTER : [0.22799019 0.23338395 0.21636508 0.22824545 0.21073824 0.22720004
 0.2364333  0.20558537 0.19771249 0.22234961]
  Test statistic: 0.0000
  p-value: 0.0020
  ✅ Statistically significant difference.

Metric: RMSE
  Values BEFORE: [37.88254441 19.42097738 13.00101404 19.00353787 35.14145323 17.33883281
 12.50037774 18.32722762 21.27555836 34.25752204]
  Values AFTER : [0.83536218 0.88006832 0.829315   0.89340119 0.8171202  0.88478606
 0.88177555 0.79766026 0.78210913 0.87023558]
  Test statistic: 0.0000
  p-value: 0.0020
  ✅ Statistically significant difference.

Metric: R2
  Values BEFORE: [-0.8965695   0.77173051  0.66683343  0.59358517  0.40425022  0.68858447
  0.68209183  0.60294277  0.51585481 -0.17712577]
  Values AFTER : [0.99974904 0.99969915 0.99978883 0.99978142 0.99974

In [18]:
# mlp

run_wilcoxon_test2(
    "3a_regression_no_resample_MLP.csv",
    "3b_regression_simple_resample_MLP.csv",
    "MLP"
)

=== Wilcoxon Signed-Rank Test for MLP ===

Metric: MAE
  Values BEFORE: [5.96696717 6.78838135 4.93859358 6.196451   6.20319591 5.38578873
 4.60520921 5.87495689 5.39477737 5.90558898]
  Values AFTER : [1.96801    1.94522376 2.13412556 2.00383003 1.96198738 2.11660391
 2.05496303 2.24822806 1.98087501 2.41227782]
  Test statistic: 0.0000
  p-value: 0.0020
  ✅ Statistically significant difference.

Metric: RMSE
  Values BEFORE: [15.06240694 25.20373694  9.90725798 12.69713022 31.87128395 17.69091372
  7.72316772 14.71226265 15.91726799 14.42429179]
  Values AFTER : [2.66244948 2.91158239 3.25032118 2.75399813 2.77792773 3.07847421
 2.97344934 3.18976017 3.38212784 3.45125996]
  Test statistic: 0.0000
  p-value: 0.0020
  ✅ Statistically significant difference.

Metric: R2
  Values BEFORE: [0.70016714 0.61555379 0.80652988 0.81856888 0.50996898 0.67580891
 0.87864817 0.74413052 0.72901157 0.7913108 ]
  Values AFTER : [0.99745076 0.9967071  0.99675627 0.99792293 0.9970345  0.99600582
 0.99

# Classification

In [19]:
# svm

run_wilcoxon_test2(
    "2a_classification_no_resample_SVC.csv",
    "2b_classification_simple_resample_SVC.csv",
    "SVM"
)

=== Wilcoxon Signed-Rank Test for SVM ===

Metric: acc
  Values BEFORE: [0.68333333 0.67083333 0.6875     0.63125    0.68125    0.68125
 0.68541667 0.67916667 0.69375    0.69375   ]
  Values AFTER : [0.66060904 0.66699411 0.66257367 0.65324165 0.66011788 0.66159136
 0.65962672 0.65618861 0.67387033 0.67829077]
  Test statistic: 6.0000
  p-value: 0.0273
  ✅ Statistically significant difference.

Metric: prec
  Values BEFORE: [0.46850683 0.38532401 0.37511091 0.3445748  0.42427792 0.42602384
 0.36801087 0.36980609 0.35715658 0.38193961]
  Values AFTER : [0.66605446 0.67585088 0.6711536  0.66296722 0.6752907  0.66528915
 0.66580718 0.6605424  0.6738258  0.68483931]
  Test statistic: 0.0000
  p-value: 0.0020
  ✅ Statistically significant difference.

Metric: rec
  Values BEFORE: [0.42560431 0.39289006 0.38364257 0.33538005 0.40103624 0.42771889
 0.41519201 0.3908697  0.38148296 0.37532626]
  Values AFTER : [0.6590192  0.66624517 0.66501143 0.65350707 0.66434812 0.66272088
 0.66140323 0.656

In [20]:
# knn

run_wilcoxon_test2(
    "2a_classification_no_resample_KNN.csv",
    "2b_classification_simple_resample_KNN.csv",
    "KNN"
)

=== Wilcoxon Signed-Rank Test for KNN ===

Metric: acc
  Values BEFORE: [0.575      0.56875    0.62291667 0.58125    0.55625    0.58125
 0.5625     0.58541667 0.6        0.60416667]
  Values AFTER : [0.89833006 0.8845776  0.9002947  0.89882122 0.89931238 0.90766208
 0.89390963 0.89440079 0.89931238 0.91257367]
  Test statistic: 0.0000
  p-value: 0.0020
  ✅ Statistically significant difference.

Metric: prec
  Values BEFORE: [0.39468687 0.34621439 0.39503158 0.32724418 0.30643721 0.45298992
 0.31655537 0.33724756 0.32669492 0.37131077]
  Values AFTER : [0.90068461 0.88082071 0.89818292 0.89730693 0.89758435 0.90018354
 0.89318116 0.89189231 0.89542306 0.91418781]
  Test statistic: 0.0000
  p-value: 0.0020
  ✅ Statistically significant difference.

Metric: rec
  Values BEFORE: [0.34502789 0.31514886 0.3450748  0.32024551 0.28477988 0.3354685
 0.30402106 0.31407672 0.31053364 0.32621119]
  Values AFTER : [0.90008527 0.88381982 0.89919545 0.90058926 0.89957458 0.90232493
 0.89791131 0.8936

In [21]:
# dt

run_wilcoxon_test2(
    "2a_classification_no_resample_DT.csv",
    "2b_classification_simple_resample_DT.csv",
    "DT"
)

=== Wilcoxon Signed-Rank Test for DT ===

Metric: acc
  Values BEFORE: [0.62708333 0.68541667 0.65416667 0.64375    0.62916667 0.67083333
 0.68125    0.64375    0.6625     0.65833333]
  Values AFTER : [0.97642436 0.97937132 0.97544204 0.97642436 0.98182711 0.98526523
 0.96856582 0.97789784 0.97937132 0.97789784]
  Test statistic: 0.0000
  p-value: 0.0020
  ✅ Statistically significant difference.

Metric: prec
  Values BEFORE: [0.40052189 0.45408946 0.42616583 0.41840351 0.411961   0.37676859
 0.40678991 0.38076014 0.39603301 0.42605216]
  Values AFTER : [0.97749581 0.9791211  0.97532309 0.97780914 0.98232868 0.9844248
 0.96920043 0.97778357 0.98006477 0.97860322]
  Test statistic: 0.0000
  p-value: 0.0020
  ✅ Statistically significant difference.

Metric: rec
  Values BEFORE: [0.39270738 0.45374814 0.44783125 0.39683203 0.36899766 0.36740798
 0.41518692 0.37702162 0.39364635 0.39251771]
  Values AFTER : [0.9769004  0.97899291 0.97447417 0.97685598 0.98130971 0.98451084
 0.97006335 0.97

In [22]:
# MLP

run_wilcoxon_test2(
    "2a_classification_no_resample_MLP.csv",
    "2b_classification_simple_resample_MLP.csv",
    "MLP"
)

=== Wilcoxon Signed-Rank Test for MLP ===

Metric: acc
  Values BEFORE: [0.7        0.73333333 0.69583333 0.7125     0.6875     0.7125
 0.69166667 0.725      0.73125    0.73541667]
  Values AFTER : [0.86394892 0.87573674 0.86886051 0.86444008 0.87131631 0.89685658
 0.87131631 0.87966601 0.89685658 0.89685658]
  Test statistic: 0.0000
  p-value: 0.0020
  ✅ Statistically significant difference.

Metric: prec
  Values BEFORE: [0.39945686 0.39700082 0.34088972 0.38240271 0.41042568 0.46123985
 0.39817053 0.40338982 0.43706119 0.4554337 ]
  Values AFTER : [0.86619385 0.8729429  0.87081574 0.86313239 0.8743764  0.89304277
 0.87073872 0.8786982  0.88961477 0.89846309]
  Test statistic: 0.0000
  p-value: 0.0020
  ✅ Statistically significant difference.

Metric: rec
  Values BEFORE: [0.42706485 0.4454863  0.37931611 0.40716634 0.39103116 0.47477755
 0.43034933 0.41978532 0.45051478 0.44579267]
  Values AFTER : [0.86617    0.87328998 0.87269896 0.86626297 0.87342414 0.89226039
 0.8739047  0.8800