# Comparison Between Different Raters

In [1]:
import os
import shutil
import pandas as pd
import numpy as np
from PIL import Image
from pathlib import Path

In [5]:
def make_comparison_dir(root, gt_rater, comparison_raters):
    root_path = Path(root)
    Path(root_path / gt_rater / 'gt' / 'masks' / 'o').mkdir(parents=True, exist_ok=True)
    Path(root_path / gt_rater / 'gt' / 'masks' / 'rc').mkdir(parents=True, exist_ok=True)
    Path(root_path / gt_rater / 'gt' / 'images' / 'o').mkdir(parents=True, exist_ok=True)
    Path(root_path / gt_rater / 'gt' / 'images' / 'rc').mkdir(parents=True, exist_ok=True)
    
    for cr in comparison_raters:
        Path(root_path / gt_rater / 'pred' / cr / 'masks' / 'o' ).mkdir(parents=True, exist_ok=True)
        Path(root_path / gt_rater / 'pred' / cr / 'masks' / 'rc' ).mkdir(parents=True, exist_ok=True)    

def dice_score(a, b):
    
    out_dict = {}
    
    labels = np.union1d(np.unique(a), np.unique(b))
    
    for l in labels:
        mask_a = a == l
        mask_b = b == l
        
        mask_a = mask_a.astype('float')
        mask_b = mask_b.astype('float')
        
        intersection = np.sum(mask_a * mask_b)
        union = np.sum(mask_a + mask_b)
    
        if union == 0: 
            union = 1
        
        out_dict[l] = 2*intersection / union
    
    out_dict['dim_a_0'] = a.shape[0]
    out_dict['dim_a_1'] = a.shape[1]
    out_dict['dim_b_0'] = b.shape[0]
    out_dict['dim_b_1'] = b.shape[1]

    return out_dict

def accuracy(gt_array, pred_array):
    return np.mean(gt_array==pred_array)

def make_comparison_dataframe(dir_a, dir_b):
    
    def get_file_df(directory):
        
        file_dict = {"file": [], "file_path": []}
        
        for root, dir, file in os.walk(directory):
            for f in file: 
                if 'consolidated' in f: 
                    file_dict['file'].append(f)
                    file_dict['file_path'].append(os.path.join(root, f))        
        return pd.DataFrame(file_dict)
    
    df_a = get_file_df(dir_a)
    df_b = get_file_df(dir_b)
    
    return df_a.merge(df_b, how='inner', on=['file'])

def evaluate_comparison_dataframe(dataframe):
    
    df = dataframe
    
    for i, row in df.iterrows():
        a, b = np.array(Image.open(row['file_path_x'])), np.array(Image.open(row['file_path_y']))
        for key, val in dice_score(a,b).items():
            df.loc[i, key] = val
    
    return df
        

## Inter- Intra-Rater Analysis

In [45]:
comparison_directory = Path('/path/')
gt_raters = ['jessica', 'phillip', 'alex']
pred_raters = ['phillip', 'fastai', 'nnunet2']

output = []
for gtr in gt_raters:
    for pr in pred_raters:
        
        gt_directory = comparison_directory / gtr / 'gt' / 'masks'
        pred_directory = comparison_directory / gtr / 'pred' / pr / 'masks'
        
        for pred_mask in pred_directory.rglob('*.png'):
            gt_mask  = gt_directory / pred_mask.parent.stem / pred_mask.name
            
            pred_array = np.array(Image.open(pred_mask))
            gt_array = np.array(Image.open(gt_mask))
            
            out_dict = dice_score(gt_array, pred_array)
            out_dict['ground_truth_rater'] = gtr
            out_dict['prediction_rater'] = pr
            out_dict['pr_path'] = str(pred_mask)
            out_dict['gt_path'] = str(gt_mask)
            out_dict['accuracy'] = accuracy(pred_array, gt_array)
            output.append(out_dict)        

df = pd.DataFrame(output)
# df.to_csv('ground_truth_vs_raters_new.csv')

dff = df[['accuracy', 'ground_truth_rater', 'prediction_rater']]
dff_pivot_mean = pd.pivot_table(dff, aggfunc=np.mean, values=['accuracy'], columns='prediction_rater', index='ground_truth_rater')
dff_pivot_std = pd.pivot_table(dff, aggfunc=np.std, values=['accuracy'], columns='prediction_rater', index='ground_truth_rater')
dff_pivot_cnt = pd.pivot_table(dff, aggfunc='count', values=['accuracy'], columns='prediction_rater', index='ground_truth_rater')

publication_table = pd.DataFrame(columns=pred_raters, index=gt_raters)
for gtr in gt_raters:
    for pr in pred_raters:
        mean = dff_pivot_mean.loc[gtr, ('accuracy', pr)]
        std = dff_pivot_std.loc[gtr, ('accuracy', pr)]
        publication_table.loc[gtr, pr] = '{:.3f}±{:.3f}'.format(round(mean,3), round(std,3))
        

publication_table = publication_table.reindex(['fastai', 'nnunet2', 'phillip'], axis=1)
publication_table = publication_table.reindex(['alex', 'jessica', 'phillip'], axis=0)