In [373]:
import pandas as pd
import scipy.stats as stats
import numpy as np

In [374]:
def process_csv(file):
    return pd.read_csv(file).set_index('fold').drop(columns=['Unnamed: 0']).apply(lambda x: x*100)

In [375]:
isa_rpn = process_csv('isa-rpn.csv')
vanilla_rpn = process_csv('vanilla-rpn.csv')

In [376]:
isa_rpn_vit = process_csv('isa-rpn_vit.csv')
isa_rpn_isa_vit = process_csv('isa-rpn_isa-vit.csv')

In [377]:
rpn_vit = process_csv('rpn_vit.csv')
rpn_isa_vit = process_csv('rpn_isa-vit.csv')

In [378]:
pb_isa_vit = process_csv('perfect-bbox-isa-vit.csv')
pb_vanilla_vit = process_csv('perfect-bbox-vanilla-vit.csv')

In [379]:
ALPHA = 0.05

### Functions


In [380]:
def compute_wilcoxon(df1, df2, metrics, alpha=0.05):
    results = {}
    
    for metric in metrics:
        diff = df1[metric] - df2[metric]
        
        if len(diff[diff != 0]) == 0:
            results[metric] = {'stat': 0, 'p_value': 1}
        else:
            stat, p_value = stats.wilcoxon(diff, method='exact', alternative='greater')
            results[metric] = {'stat': stat, 'p_value': p_value}
            
    df = pd.DataFrame(results).T
    df['significant'] = df['p_value'] < alpha
    
    return df

### ISA-RPN vs RPN


In [381]:
isa_rpn.mean(), isa_rpn.std()

(iou          20.163493
 precision    25.732271
 recall       46.850660
 f1           29.499180
 dtype: float64,
 iou          13.287390
 precision    11.983179
 recall       28.729077
 f1           18.752343
 dtype: float64)

In [382]:
vanilla_rpn.mean(), vanilla_rpn.std()

(iou          22.833132
 precision    27.011666
 recall       55.082039
 f1           34.081384
 dtype: float64,
 iou          12.785155
 precision    15.208007
 recall       31.860801
 f1           19.125895
 dtype: float64)

In [383]:
compute_wilcoxon(isa_rpn, vanilla_rpn, isa_rpn.columns.tolist())

Unnamed: 0,stat,p_value,significant
iou,8.0,0.5,False
precision,7.0,0.59375,False
recall,4.0,0.84375,False
f1,6.0,0.6875,False


### ISA-ViT vs ViT (ISA-RPN)


In [384]:
isa_rpn_isa_vit.mean(), isa_rpn_isa_vit.std()

(dice         0.0
 precision    0.0
 recall       0.0
 f1           0.0
 fpr          0.0
 dtype: float64,
 dice         0.0
 precision    0.0
 recall       0.0
 f1           0.0
 fpr          0.0
 dtype: float64)

In [385]:
isa_rpn_vit.mean(), isa_rpn_vit.std()

(dice         0.0
 precision    0.0
 recall       0.0
 f1           0.0
 fpr          0.0
 dtype: float64,
 dice         0.0
 precision    0.0
 recall       0.0
 f1           0.0
 fpr          0.0
 dtype: float64)

In [386]:
compute_wilcoxon(isa_rpn_isa_vit, isa_rpn_vit, isa_rpn_isa_vit.columns.tolist())

Unnamed: 0,stat,p_value,significant
dice,0,1,False
precision,0,1,False
recall,0,1,False
f1,0,1,False
fpr,0,1,False


### ISA-ViT vs ViT (RPN)


In [387]:
rpn_isa_vit.mean(), rpn_isa_vit.std()

(dice         0.0
 precision    0.0
 recall       0.0
 f1           0.0
 fpr          0.0
 dtype: float64,
 dice         0.0
 precision    0.0
 recall       0.0
 f1           0.0
 fpr          0.0
 dtype: float64)

In [388]:
rpn_vit.mean(), rpn_vit.std()

(dice         0.0
 precision    0.0
 recall       0.0
 f1           0.0
 fpr          0.0
 dtype: float64,
 dice         0.0
 precision    0.0
 recall       0.0
 f1           0.0
 fpr          0.0
 dtype: float64)

In [389]:
compute_wilcoxon(rpn_isa_vit, rpn_vit, rpn_isa_vit.columns.tolist())

Unnamed: 0,stat,p_value,significant
dice,0,1,False
precision,0,1,False
recall,0,1,False
f1,0,1,False
fpr,0,1,False


### ISA-ViT vs ViT (Perfect Bounding Boxes)


In [390]:
pb_isa_vit.mean(), pb_isa_vit.std()

(dice         49.684211
 precision    61.052632
 recall       45.456140
 f1           49.684211
 fpr           0.013464
 dtype: float64,
 dice         22.562931
 precision    29.679731
 recall       20.025703
 f1           22.562931
 fpr           0.010098
 dtype: float64)

In [391]:
pb_vanilla_vit.mean(), pb_vanilla_vit.std()

(dice         0.0
 precision    0.0
 recall       0.0
 f1           0.0
 fpr          0.0
 dtype: float64,
 dice         0.0
 precision    0.0
 recall       0.0
 f1           0.0
 fpr          0.0
 dtype: float64)

In [392]:
compute_wilcoxon(pb_isa_vit, pb_vanilla_vit, pb_isa_vit.columns.tolist())

  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)


Unnamed: 0,stat,p_value,significant
dice,15.0,0.03125,True
precision,15.0,0.03125,True
recall,15.0,0.03125,True
f1,15.0,0.03125,True
fpr,10.0,0.029391,True
