In [1]:
import pandas as pd
from sklearn.metrics import f1_score, cohen_kappa_score, confusion_matrix, recall_score
from irrCAC.raw import CAC

In [2]:
data = pd.read_csv('./miccai_data.csv')
data

Unnamed: 0,Glaucoma_0_label,Glaucoma_1_label,Glaucoma_2_label,Glaucoma_3_label,Glaucoma_4_label,Glaucoma_adjudicated_label,RVO_0_label,RVO_1_label,RVO_2_label,RVO_3_label,...,MMD_diagnostic_label,DR_diagnostic_label,ERM_diagnostic_label,Optic pallor_diagnostic_label,age_label,sex_label,DM_label,HTN_label,camera,img_path
0,0.0,0.0,0.0,0.0,0.0,False,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,56.0,0.0,0.0,0.0,NW500,./images/0.jpg
1,0.0,0.0,0.0,0.0,0.0,False,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,56.0,0.0,0.0,0.0,NW500,./images/1.jpg
2,0.0,0.0,0.0,0.0,0.0,False,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,58.0,1.0,0.0,1.0,NW500,./images/2.jpg
3,0.0,0.0,0.0,0.0,0.0,False,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,58.0,1.0,0.0,1.0,NW500,./images/3.jpg
4,0.0,0.0,0.0,0.0,0.0,False,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,57.0,0.0,1.0,1.0,NW500,./images/4.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4031,0.0,0.0,0.0,0.0,0.5,False,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,64.0,1.0,0.0,1.0,UWF,./images/4031.jpg
4032,0.5,0.0,0.0,1.0,1.0,True,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,78.0,0.0,0.0,0.0,UWF,./images/4032.jpg
4033,0.5,0.0,0.0,1.0,1.0,True,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,78.0,0.0,0.0,0.0,UWF,./images/4033.jpg
4034,0.0,0.0,0.0,0.0,0.0,False,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,68.0,0.0,0.0,1.0,UWF,./images/4034.jpg


In [3]:
data.columns

Index(['Glaucoma_0_label', 'Glaucoma_1_label', 'Glaucoma_2_label',
       'Glaucoma_3_label', 'Glaucoma_4_label', 'Glaucoma_adjudicated_label',
       'RVO_0_label', 'RVO_1_label', 'RVO_2_label', 'RVO_3_label',
       'RVO_4_label', 'RVO_adjudicated_label', 'RAO_0_label', 'RAO_1_label',
       'RAO_2_label', 'RAO_3_label', 'RAO_4_label', 'RAO_adjudicated_label',
       'Optic pallor_0_label', 'Optic pallor_1_label', 'Optic pallor_2_label',
       'Optic pallor_3_label', 'Optic pallor_4_label',
       'Optic pallor_adjudicated_label', 'DR_0_label', 'DR_1_label',
       'DR_2_label', 'DR_3_label', 'DR_4_label', 'DR_adjudicated_label',
       'AMD_0_label', 'AMD_1_label', 'AMD_2_label', 'AMD_3_label',
       'AMD_4_label', 'AMD_adjudicated_label', 'MMD_0_label', 'MMD_1_label',
       'MMD_2_label', 'MMD_3_label', 'MMD_4_label', 'MMD_adjudicated_label',
       'ERM_0_label', 'ERM_1_label', 'ERM_2_label', 'ERM_3_label',
       'ERM_4_label', 'ERM_adjudicated_label', 'Glaucoma_diagnostic_lab

In [4]:
disease_list = {
    'Glaucoma_adjudicated_label' : 'Glaucoma_diagnostic_label',
    'RVO_adjudicated_label' : 'RVO_diagnostic_label',
    'RAO_adjudicated_label' : 'RAO_diagnostic_label',
    'AMD_adjudicated_label' : 'AMD_diagnostic_label',
    'DR_adjudicated_label' : 'DR_diagnostic_label',
    'Optic pallor_adjudicated_label' : 'Optic pallor_diagnostic_label',
    'MMD_adjudicated_label' : 'MMD_diagnostic_label',
    'ERM_adjudicated_label' : 'ERM_diagnostic_label',
}

for k, v in disease_list.items():
    print(f"[{k} : {v}]")
    
    y_pred = data[k]
    y_true = data[v]
    

    f1 = f1_score(y_true, y_pred) 
    kappa = cohen_kappa_score(y_true, y_pred) 

    gwet_ac2_data = pd.DataFrame({
        "Rater1": y_true.tolist(),
        "Rater2": y_pred.tolist()
    })
    cac = CAC(gwet_ac2_data, weights="quadratic")
    gwet_ac2 = cac.gwet()['est']['coefficient_value'] 
    
    print(f"F1 Score: {f1:.3f} | Cohen's Kappa: {kappa:.3f} | Gwet's AC2: {gwet_ac2:.3f}")

[Glaucoma_adjudicated_label : Glaucoma_diagnostic_label]
F1 Score: 0.423 | Cohen's Kappa: 0.339 | Gwet's AC2: 0.811
[RVO_adjudicated_label : RVO_diagnostic_label]
F1 Score: 0.752 | Cohen's Kappa: 0.742 | Gwet's AC2: 0.979
[RAO_adjudicated_label : RAO_diagnostic_label]
F1 Score: 0.371 | Cohen's Kappa: 0.363 | Gwet's AC2: 0.985
[AMD_adjudicated_label : AMD_diagnostic_label]
F1 Score: 0.366 | Cohen's Kappa: 0.333 | Gwet's AC2: 0.914
[DR_adjudicated_label : DR_diagnostic_label]
F1 Score: 0.411 | Cohen's Kappa: 0.360 | Gwet's AC2: 0.841
[Optic pallor_adjudicated_label : Optic pallor_diagnostic_label]
F1 Score: 0.573 | Cohen's Kappa: 0.544 | Gwet's AC2: 0.938
[MMD_adjudicated_label : MMD_diagnostic_label]
F1 Score: 0.833 | Cohen's Kappa: 0.820 | Gwet's AC2: 0.973
[ERM_adjudicated_label : ERM_diagnostic_label]
F1 Score: 0.288 | Cohen's Kappa: 0.271 | Gwet's AC2: 0.953


In [5]:
from statsmodels.stats.inter_rater import fleiss_kappa, aggregate_raters

disease_list = ['Glaucoma', 'RVO', 'RAO', 'AMD', 'DR', 'Optic pallor', 'MMD', 'ERM']

for disease in disease_list:
    analysis_df = data[[f'{disease}_0_label', f'{disease}_1_label', f'{disease}_2_label',
                        f'{disease}_3_label', f'{disease}_4_label']]

    #0 : Non-Referable, 0.5 : Indeterminate, 1 : Referable
    analysis_df = (analysis_df * 2).astype(int)
    #class1 : Non-Referable, class2 : Indeterminate, class3 : Referable

    cac_raw = CAC(analysis_df, weights="quadratic")
    gwet_raw = cac_raw.gwet()['est']['coefficient_value']

    table_raw, _ = aggregate_raters(analysis_df.values)
    fleiss_raw = fleiss_kappa(table_raw)
    calc_data_raw = analysis_df.to_numpy(dtype=int)
    
    print(f"{disease} Fleiss' Kappa: {fleiss_raw:.3f} | {disease} Gwet's AC2: {gwet_raw:.3f}")

Glaucoma Fleiss' Kappa: 0.246 | Glaucoma Gwet's AC2: 0.701
RVO Fleiss' Kappa: 0.540 | RVO Gwet's AC2: 0.960
RAO Fleiss' Kappa: 0.286 | RAO Gwet's AC2: 0.971
AMD Fleiss' Kappa: 0.238 | AMD Gwet's AC2: 0.873
DR Fleiss' Kappa: 0.347 | DR Gwet's AC2: 0.894
Optic pallor Fleiss' Kappa: 0.396 | Optic pallor Gwet's AC2: 0.905
MMD Fleiss' Kappa: 0.512 | MMD Gwet's AC2: 0.915
ERM Fleiss' Kappa: 0.354 | ERM Gwet's AC2: 0.964


In [6]:
# for review-only masked data (not full release)
import numpy as np

mask = np.random.rand(*data.shape) < 0.9
data = data.mask(mask)
data.to_csv('./miccai_review_sanitized.csv', index=False)