In [1]:
import sys
from pathlib import Path
import helpers as lib
project_root = Path.cwd().parent
sys.path.insert(0, str(project_root))
from common.helpers import bootstrap_paired_diff, bootstrap_method_mean

results = lib.load_pickle('results.pkl')


In [2]:
len(results)


42

In [3]:
list(results[0].keys())


['pair', 'gender', 'labels', 'model', 'direct', 'ri_abim', 'ri_leeds']

In [4]:
list(results[0]['model'].keys())


['volume',
 'volume_standard',
 'coverage',
 'bhattacharyya_coefficient',
 'iou',
 'vertices',
 'precision',
 'recall',
 'f1score',
 'support',
 'reference_frac',
 'labels',
 'model',
 'coverage_area_ratio']

In [5]:
for error_metric in ['coverage', 'volume_standard', 'coverage_area_ratio', 'iou']:
    print(f"Mean CIs for metric: {error_metric}")
    errors = {}
    for model in ['model', 'ri_abim', 'ri_leeds', 'direct']:
        errors[model] = [i[model][error_metric] for i in results]
        print(f"{model} - {bootstrap_method_mean(errors[model])}")
    print('\n')
    

Mean CIs for metric: coverage
model - {'mean': 0.9379361514398011, 'ci_lower': 0.9302330772286455, 'ci_upper': 0.9444210573055725}
ri_abim - {'mean': 0.8231588536593749, 'ci_lower': 0.7762239693743863, 'ci_upper': 0.8663922517350566}
ri_leeds - {'mean': 0.9157270432035812, 'ci_lower': 0.8943345431940323, 'ci_upper': 0.9340404381853807}
direct - {'mean': 0.9524544798267426, 'ci_lower': 0.9488593458158111, 'ci_upper': 0.9557919096891988}


Mean CIs for metric: volume_standard
model - {'mean': 2.659774652190825, 'ci_lower': 2.117626524545963, 'ci_upper': 3.3011388152051944}
ri_abim - {'mean': 4.0462364352722044, 'ci_lower': 2.72109078654093, 'ci_upper': 5.78510831750635}
ri_leeds - {'mean': 3.2604401730613852, 'ci_lower': 2.6424649828346283, 'ci_upper': 4.0316745599700425}
direct - {'mean': 2.6904340999865544, 'ci_lower': 2.145154864055488, 'ci_upper': 3.2950781070537647}


Mean CIs for metric: coverage_area_ratio
model - {'mean': 0.5725806010522303, 'ci_lower': 0.46282408222499827, 'ci_u

In [7]:
for error_metric in ['coverage', 'volume_standard', 'coverage_area_ratio', 'iou']:
    print(f"Pairwise comparison for metric: {error_metric}")
    errors = {}
    for model in ['model', 'ri_abim', 'ri_leeds', 'direct']:
        errors[model] = [i[model][error_metric] for i in results]
    print(bootstrap_paired_diff(errors))
    print('\n')
    

Pairwise comparison for metric: coverage
   method_1  method_2  stat_diff  ci_lower  ci_upper  significant
0     model   ri_abim   0.114777  0.070741  0.157468         True
1     model  ri_leeds   0.022209  0.003162  0.045213         True
2     model    direct  -0.014518 -0.022555 -0.007392         True
3   ri_abim  ri_leeds  -0.092568 -0.140266 -0.047129         True
4   ri_abim    direct  -0.129296 -0.177609 -0.085128         True
5  ri_leeds    direct  -0.036727 -0.059925 -0.016258         True


Pairwise comparison for metric: volume_standard
   method_1  method_2  stat_diff  ci_lower  ci_upper  significant
0     model   ri_abim  -1.386462 -2.624382 -0.430788         True
1     model  ri_leeds  -0.600666 -0.932264 -0.302699         True
2     model    direct  -0.030659 -0.182317  0.118109        False
3   ri_abim  ri_leeds   0.785796 -0.100768  1.914645        False
4   ri_abim    direct   1.355802  0.332571  2.534695         True
5  ri_leeds    direct   0.570006  0.266594  0.88680