### **Imports**

In [1]:
from bsa_core.io import load
from joblib import load as load_prob
from scipy.stats import wilcoxon
import numpy as np
import pylab as pl
pl.gray()

### **Auxiliary Function**

_ Inputs_

* __pred -__ the  binary segmentation;
* __gt -__ the ground truth; 
* __mask -__ the binary mask; 
* __ prob -__ the probability map (if available).

_ Outputs _

* __sen -__ Sensibility;
* __spec -__ Specificity; 
* __acc -__ Accuracy; 
* __ auc -__ Area Under the ROC curve.

In [2]:
from sklearn.metrics import confusion_matrix, roc_auc_score

def perform_metrics (pred, gt, mask, prob = []):
    
    # Suppressing background regions.
    pred = pred[mask > 0]
    gt = gt[mask > 0]

    # Building confusion matrix.
    # # Cij is the number of observations known to be in group i but predicted to be in group j.
    c_matrix = confusion_matrix(gt, pred)
    
    # Calculating ratios.
    tn = c_matrix[0,0]
    tp = c_matrix[1,1]
    fn = c_matrix[1,0]
    fp = c_matrix[0,1]
    
    # Finding the metrics.
    sen  = tp/(tp + fn)
    spec = tn/(tn + fp)
    acc  = (tp + tn)/(tp + tn + fp + fn)
    
    if len (prob) != 0:
        
        prob = prob[mask > 0]
        auc  = roc_auc_score (gt, prob)
        return sen, spec, acc, auc
    
    return sen, spec, acc

### **Loading**

In [3]:
test_cases = ['01R', '01L', '02R', '02L', '03R', '03L', '04R', '04L', '05R', '05L', '06R', '06L', '07R', '07L', '08R', '08L', '09R', '09L', '10R', '10L', '11R', '11L', '12R', '12L', '13R', '13L', '14R', '14L']
r_width, r_height = 999, 960

In [4]:
gts = np.zeros((len(test_cases), r_height, r_width))
masks = np.zeros ((len(test_cases), r_height, r_width))

In [5]:
# Azzopardi (2015)
segs_azzopardi = np.zeros((len(test_cases), r_height, r_width))

In [6]:
# Fu (2016)
segs_fu = np.zeros((len(test_cases), r_height, r_width))
probs_fu = np.zeros((len(test_cases), r_height, r_width))

In [7]:
# This work (2018)
segs = np.zeros((len(test_cases), r_height, r_width))
probs = np.zeros((len(test_cases), r_height, r_width))

In order for you to run the cell below, please download the resources from the links below and change the paths accordingly.

* [Azzopardi (2015)](http://www.cs.rug.nl/~nick/results/)
* [Fu (2016)](http://hzfu.github.io/proj_deepvessel.htmlSH)

In [8]:
#Change these paths
gts_path = '../resources/gts/CHASE_DB1/'
masks_path = '../resources/masks/CHASE_DB1/'
azzopardi_path = '../resources/azzopardi_2015/CHASE_DB1/'
fu_path = '../resources/fu_2016/CHASE_DB1/'

for i, case in enumerate(test_cases):
    
    # Ground-truth and masks
    gt = load(gts_path + 'Image_%s_1stHO.png' %case, normalize = True)
    gts[i] = gt
    mask = load(masks_path + 'mask_%s.png' %case)
    masks[i] = mask
    
    # Azzopardi (2015)
    seg_azzopardi = load(azzopardi_path + 'Image_%s.gif' %case, normalize = True)[:,:,0]
    segs_azzopardi[i] = seg_azzopardi
    
    # Fu (2016)
    seg_fu = load(fu_path + 'Image_%s_seg_result.png' %case, normalize = True)
    segs_fu[i] = seg_fu
    prob_fu = load(fu_path + 'Image_%s_CRF_result.png' %case, normalize = True)
    probs_fu[i] = prob_fu
    
    # This work (2018)
    seg = load('../resources/binary_segmentations/CHASE_DB1/seg_%s.png' %case, normalize = True)
    prob = load_prob('../resources/probability_maps/CHASE_DB1/prob_%s.npy' %case)[1] 
    segs[i] = seg
    probs[i] = prob

### **Calculating metrics**

In [9]:
def get_metrics (segs, gts, masks, probs = []):

    if len(probs) != 0:

        sen_list, spec_list, acc_list, auc_list = [], [], [], []

        for i in range (len(gts)):
            sen, spec, acc, auc = perform_metrics (segs[i], gts[i], masks[i], probs[i])
            sen_list.append(sen)
            spec_list.append(spec)
            acc_list.append(acc)
            auc_list.append(auc)
            
        return sen_list, spec_list, acc_list, auc_list
            
    else:
    
        sen_list, spec_list, acc_list = [], [], []

        for i in range (len(gts)):
            sen, spec, acc = perform_metrics (segs[i], gts[i], masks[i])
            sen_list.append(sen)
            spec_list.append(spec)
            acc_list.append(acc)
            
        return sen_list, spec_list, acc_list


In [10]:
sen_list_azzopardi, spec_list_azzopardi, acc_list_azzopardi = get_metrics (segs_azzopardi, gts, masks)

In [11]:
sen_list_fu, spec_list_fu, acc_list_fu, auc_list_fu = get_metrics (segs_fu, gts, masks, probs_fu)

In [12]:
sen_list, spec_list, acc_list, auc_list = get_metrics (segs, gts, masks, probs)

### **Making Statistical Comparison**

In [13]:
# Azzopardi (2015)
_, pvalue = wilcoxon(sen_list_azzopardi, sen_list)
pvalue

0.050189847248778868

In [14]:
_, pvalue = wilcoxon(spec_list_azzopardi, spec_list)
pvalue

3.7896194415808708e-06

In [15]:
_, pvalue = wilcoxon(acc_list_azzopardi, acc_list)
pvalue

3.7896194415808708e-06

In [16]:
auc_list_azzopardi = [0.9544, 0.9459, 0.9295, 0.9325, 0.9513,
                      0.9561, 0.9477, 0.9431, 0.9494, 0.9610,
                      0.9424, 0.9453, 0.9433, 0.9425, 0.9499,
                      0.9542, 0.9583, 0.9604, 0.9354, 0.9408,
                      0.9630, 0.9654, 0.9507, 0.9566, 0.9416,
                      0.9377, 0.9631, 0.9429]

In [17]:
_, pvalue = wilcoxon(auc_list_azzopardi, auc_list)
pvalue

3.7896194415808708e-06

In [18]:
# Fu (2016)
_, pvalue = wilcoxon(sen_list_fu, sen_list)
pvalue

9.977698887788448e-06

In [19]:
_, pvalue = wilcoxon(acc_list_fu, acc_list)
pvalue

3.7896194415808708e-06