In [28]:
import pandas as pd
import scipy.stats as stats
import numpy as np

In [29]:
def process_csv(file):
    return pd.read_csv(file).set_index('fold').drop(columns=['Unnamed: 0']).apply(lambda x: x*100 if x.name == 'fpr' else x)

In [30]:
isa_rpn = process_csv('isa-rpn.csv')
vanilla_rpn = process_csv('vanilla-rpn.csv')

In [31]:
pb_isa_vit = process_csv('perfect-bbox-isa-vit.csv')
pb_vanilla_vit = process_csv('perfect-bbox-vanilla-vit.csv')

In [32]:
isa_rpn_isa_vit = process_csv('isa-rpn_isa-vit.csv')
rpn_vit = process_csv('rpn_vit.csv')

In [33]:
ALPHA = 0.05

### Functions


In [34]:
def compute_wilcoxon(df1, df2, metrics, alpha=0.05):
    results = {}
    
    for metric in metrics:
        diff = df1[metric] - df2[metric]
        
        if len(diff[diff != 0]) == 0:
            results[metric] = {'stat': 0, 'p_value': 1}
        else:
            alternative = 'less' if metric == 'fpr' else 'greater'
            stat, p_value = stats.wilcoxon(diff, method='exact', alternative=alternative)
            results[metric] = {'stat': stat, 'p_value': p_value}
            
    df = pd.DataFrame(results).T
    df['significant'] = df['p_value'] < alpha
    
    return df

### ISA-RPN vs RPN


In [35]:
isa_rpn.mean(), isa_rpn.std()

(iou          0.201635
 precision    0.257323
 recall       0.468507
 f1           0.294992
 dtype: float64,
 iou          0.132874
 precision    0.119832
 recall       0.287291
 f1           0.187523
 dtype: float64)

In [36]:
vanilla_rpn.mean(), vanilla_rpn.std()

(iou          0.228331
 precision    0.270117
 recall       0.550820
 f1           0.340814
 dtype: float64,
 iou          0.127852
 precision    0.152080
 recall       0.318608
 f1           0.191259
 dtype: float64)

In [37]:
compute_wilcoxon(isa_rpn, vanilla_rpn, isa_rpn.columns.tolist())

Unnamed: 0,stat,p_value,significant
iou,8.0,0.5,False
precision,7.0,0.59375,False
recall,4.0,0.84375,False
f1,6.0,0.6875,False


### ISA-RPN + ISA-ViT vs RPN + ViT


In [38]:
isa_rpn_isa_vit.mean(), isa_rpn_isa_vit.std()

(dice              0.000920
 precision         0.000460
 recall            0.994444
 f1                0.000920
 fpr          442143.157895
 dtype: float64,
 dice             0.000069
 precision        0.000035
 recall           0.012423
 f1               0.000069
 fpr          34017.154382
 dtype: float64)

In [39]:
rpn_vit.mean(), rpn_vit.std()

(dice           0.005859
 precision      0.003843
 recall         0.014407
 f1             0.005859
 fpr          577.894737
 dtype: float64,
 dice           0.006057
 precision      0.004287
 recall         0.013160
 f1             0.006057
 fpr          600.904212
 dtype: float64)

In [40]:
compute_wilcoxon(isa_rpn_isa_vit, rpn_vit, isa_rpn_isa_vit.columns.tolist())

Unnamed: 0,stat,p_value,significant
dice,3.0,0.90625,False
precision,3.0,0.90625,False
recall,15.0,0.03125,True
f1,3.0,0.90625,False
fpr,15.0,1.0,False


### ISA-ViT vs ViT (Perfect Bounding Boxes)


In [41]:
pb_isa_vit.mean(), pb_isa_vit.std()

(dice         0.773333
 precision    0.915789
 recall       0.669231
 f1           0.773333
 fpr          8.421053
 dtype: float64,
 dice         0.024343
 precision    0.028828
 recall       0.021066
 f1           0.024343
 fpr          2.882750
 dtype: float64)

In [42]:
pb_vanilla_vit.mean(), pb_vanilla_vit.std()

(dice           0.333333
 precision      0.223684
 recall         0.653846
 f1             0.333333
 fpr          310.526316
 dtype: float64,
 dice         0.0
 precision    0.0
 recall       0.0
 f1           0.0
 fpr          0.0
 dtype: float64)

In [43]:
compute_wilcoxon(pb_isa_vit, pb_vanilla_vit, pb_isa_vit.columns.tolist())

  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)


Unnamed: 0,stat,p_value,significant
dice,15.0,0.03125,True
precision,15.0,0.03125,True
recall,3.0,0.07865,False
f1,15.0,0.03125,True
fpr,0.0,0.03125,True
