In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats
from sklearn.metrics import confusion_matrix, f1_score

from draw_figures.open_tools import CalculateAUC

In [2]:
results_path_all = {
    'DL_inves': './resources/results/DL/inves_2022_mk.csv',
    'DL_test': './resources/results/DL/test.csv',
    'DL_val': './resources/results/DL/validation.csv',

    'ML_RF_inves': './resources/results/ML/Mean_PCC_KW_24_RF/inves_2023_mk.csv',
    'ML_RF_test': './resources/results/ML/Mean_PCC_KW_24_RF/random_test_results_mk.csv',
    'ML_RF_val': './resources/results/ML/Mean_PCC_KW_24_RF/random_val_results_mk.csv',

    'ML_AE_inves': './resources/results/ML/Zscore_PCC_KW_29_AE/inves_2023_mk.csv',
    'ML_AE_test': './resources/results/ML/Zscore_PCC_KW_29_AE/random_test_results_mk.csv',
    'ML_AE_val': './resources/results/ML/Zscore_PCC_KW_29_AE/random_val_results_mk.csv',

    'External_junior_E': './resources/results/External/junior_E/mk_2022_inves.csv',
    'External_junior_NE': './resources/results/External/junior_NE/mk_2022_inves.csv',

    'External_middle_E': './resources/results/External/middle_E/mk_2022_inves.csv',
    'External_middle_NE': './resources/results/External/middle_NE/mk_2022_inves.csv',

    'External_senior_E': './resources/results/External/senior_E/mk_2022_inves.csv',
    'External_senior_NE': './resources/results/External/senior_NE/mk_2022_inves.csv',

    'outline_SVM_p1': './resources/results/outlines/SVM/val/p1.csv',
    'outline_SVM_p2': './resources/results/outlines/SVM/val/p2.csv',
    'outline_SVM_p3': './resources/results/outlines/SVM/val/p3.csv',
    'outline_SVM_p4': './resources/results/outlines/SVM/val/p4.csv',
    'outline_SVM_p5': './resources/results/outlines/SVM/val/p5.csv',
    'outline_SVM_p6': './resources/results/outlines/SVM/val/p6.csv',

    'outline_ResNet_p1': './resources/results/outlines/ResNet/val/p1.csv',
    'outline_ResNet_p2': './resources/results/outlines/ResNet/val/p2.csv',
    'outline_ResNet_p3': './resources/results/outlines/ResNet/val/p3.csv',
    'outline_ResNet_p4': './resources/results/outlines/ResNet/val/p4.csv',
    'outline_ResNet_p5': './resources/results/outlines/ResNet/val/p5.csv',
    'outline_ResNet_p6': './resources/results/outlines/ResNet/val/p6.csv',
}

In [3]:
def clopper_pearson(successes, trials):
    alpha = 0.05
    lower = stats.beta.ppf(alpha / 2, successes, trials - successes + 1)
    upper = stats.beta.ppf(1 - alpha / 2, successes + 1, trials - successes)
    return lower, upper

In [4]:
def normal_approximation(successes, trials):
    z = 1.96
    p = successes / trials if trials != 0 else 0
    interval = z * np.sqrt(p * (1 - p) / trials) if trials != 0 else 0
    return max(0, p - interval), min(1, p + interval)

In [5]:
def compute_CI(tn, fp, fn, tp, method='clopper_pearson'):
    if method == 'clopper_pearson':
        sensitivity_CI = clopper_pearson(tp, tp + fn)
        specificity_CI = clopper_pearson(tn, tn + fp)
        accuracy_CI = clopper_pearson(tp + tn, tp + tn + fp + fn)
        ppv_CI = clopper_pearson(tp, tp + fp)
        npv_CI = clopper_pearson(tn, tn + fn)
    else:
        sensitivity_CI = normal_approximation(tp, tp + fn)
        specificity_CI = normal_approximation(tn, tn + fp)
        accuracy_CI = normal_approximation(tp + tn, tp + tn + fp + fn)
        ppv_CI = normal_approximation(tp, tp + fp)
        npv_CI = normal_approximation(tn, tn + fn)
    return sensitivity_CI, specificity_CI, accuracy_CI, ppv_CI, npv_CI

In [6]:
def get_diagnosis_performance(res_path, method='clopper_pearson'):
    if type(res_path) == str:
        df = pd.read_csv(res_path)
    else:
        df = res_path
    y_true = df['Label']
    y_score = df['Score']
    y_pred = np.where(y_score >= 0.5, 1, 0)

    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()

    # 计算指标
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    accuracy = (tp + tn) / (tp + tn + fp + fn)
    ppv = tp / (tp + fp)
    npv = tn / (tn + fn)
    sensitivity_CI, specificity_CI, accuracy_CI, ppv_CI, npv_CI = compute_CI(tn, fp, fn, tp, method)
    f1Score = f1_score(y_true, y_pred)

    auc, std_, auc_ci = CalculateAUC(y_true, y_score)

    return sensitivity, specificity, accuracy, ppv, npv, sensitivity_CI, specificity_CI, accuracy_CI, ppv_CI, npv_CI, f1Score, auc, auc_ci

In [7]:
def get_printed_results(value, low=None, high=None):
    if low is not None:
        return f'{value:.3f}\n({low:.3f}-{high:.3f})'
    else:
        return f'{value:.3f}'

In [8]:
results_all = {
    'Sen_(95% CI)': [],
    'Spe_(95% CI)': [],
    'Acc_(95% CI)': [],
    'PPV_(95% CI)': [],
    'NPV_(95% CI)': [],
    'F1_score': [],
    'AUC_(95% CI)': [],
}

In [77]:
indices = ['DL_val', 'ML_RF_val', 'ML_AE_val', 'DL_test', 'ML_RF_test', 'ML_AE_test', 'DL_inves', 'ML_RF_inves',
           'ML_AE_inves']

In [78]:
for index in indices:
    res_path = results_path_all[index]

    if 'inves' in index:
        method = 'clopper_pearson'
    else:
        method = 'normal_approximation'

    sensitivity, specificity, accuracy, ppv, npv, sensitivity_CI, specificity_CI, accuracy_CI, ppv_CI, npv_CI, f1Score, auc, auc_ci = get_diagnosis_performance(
        res_path, method=method)
    results_all['Sen_(95% CI)'].append(get_printed_results(sensitivity, sensitivity_CI[0], sensitivity_CI[1]))
    results_all['Spe_(95% CI)'].append(get_printed_results(specificity, specificity_CI[0], specificity_CI[1]))
    results_all['Acc_(95% CI)'].append(get_printed_results(accuracy, accuracy_CI[0], accuracy_CI[1]))
    results_all['PPV_(95% CI)'].append(get_printed_results(ppv, ppv_CI[0], ppv_CI[1]))
    results_all['NPV_(95% CI)'].append(get_printed_results(npv, npv_CI[0], npv_CI[1]))
    results_all['F1_score'].append(get_printed_results(f1Score))
    results_all['AUC_(95% CI)'].append(get_printed_results(auc, auc_ci[0], auc_ci[1]))

In [80]:
results_all_df = pd.DataFrame(results_all, index=indices)

In [81]:
results_all_df

Unnamed: 0,Sen_(95% CI),Spe_(95% CI),Acc_(95% CI),PPV_(95% CI),NPV_(95% CI),F1_score,AUC_(95% CI)
DL_val,0.859\n(0.846-0.872),0.792\n(0.773-0.811),0.834\n(0.823-0.845),0.873\n(0.861-0.885),0.772\n(0.752-0.791),0.866,0.912\n(0.903-0.920)
ML_RF_val,0.893\n(0.881-0.904),0.646\n(0.623-0.668),0.800\n(0.788-0.811),0.807\n(0.793-0.821),0.783\n(0.762-0.805),0.848,0.873\n(0.863-0.883)
ML_AE_val,0.897\n(0.886-0.908),0.665\n(0.642-0.687),0.810\n(0.799-0.821),0.816\n(0.803-0.830),0.796\n(0.775-0.816),0.855,0.877\n(0.867-0.887)
DL_test,0.863\n(0.851-0.876),0.810\n(0.792-0.829),0.844\n(0.834-0.854),0.889\n(0.877-0.900),0.771\n(0.752-0.791),0.876,0.907\n(0.898-0.916)
ML_RF_test,0.893\n(0.882-0.904),0.672\n(0.649-0.694),0.813\n(0.802-0.824),0.827\n(0.814-0.840),0.782\n(0.761-0.803),0.859,0.871\n(0.861-0.881)
ML_AE_test,0.876\n(0.864-0.888),0.693\n(0.671-0.715),0.810\n(0.799-0.821),0.834\n(0.821-0.847),0.761\n(0.740-0.782),0.854,0.867\n(0.857-0.878)
DL_inves,0.767\n(0.577-0.901),0.967\n(0.828-0.999),0.867\n(0.754-0.941),0.958\n(0.789-0.999),0.806\n(0.640-0.918),0.852,0.974\n(0.942-1.000)
ML_RF_inves,0.867\n(0.693-0.962),0.833\n(0.653-0.944),0.850\n(0.734-0.929),0.839\n(0.663-0.945),0.862\n(0.683-0.961),0.852,0.904\n(0.821-0.988)
ML_AE_inves,0.900\n(0.735-0.979),0.767\n(0.577-0.901),0.833\n(0.715-0.917),0.794\n(0.621-0.913),0.885\n(0.698-0.976),0.844,0.932\n(0.871-0.993)


In [85]:
results_all_df.to_csv('./resources/results/results_all_models.csv')

### 真人组的混淆矩阵

In [116]:
indices = ['All_Mean_radiologists', 'External_senior_E', 'External_middle_E', 'External_junior_E', 'Mean_HEA',
           'External_senior_NE', 'External_middle_NE', 'External_junior_NE', 'Mean_LEA']
results_all = {
    'Sen_(95% CI)': [],
    'Spe_(95% CI)': [],
    'Acc_(95% CI)': [],
    'PPV_(95% CI)': [],
    'NPV_(95% CI)': [],
    'F1_score': [],
    'AUC_(95% CI)': [],
}

In [117]:
for index in indices:
    if 'Mean' not in index:
        method = 'clopper_pearson'
        res_path = results_path_all[index]
    else:
        method = 'normal_approximation'
        if 'All_Mean_radiologists' == index:
            res_path = pd.concat([pd.read_csv(results_path_all['External_senior_E']),
                                  pd.read_csv(results_path_all['External_middle_E']),
                                  pd.read_csv(results_path_all['External_junior_E']),
                                  pd.read_csv(results_path_all['External_senior_NE']),
                                  pd.read_csv(results_path_all['External_middle_NE']),
                                  pd.read_csv(results_path_all['External_junior_NE'])
                                  ], ignore_index=True)
        elif 'Mean_HEA' == index:
            res_path = pd.concat([pd.read_csv(results_path_all['External_senior_E']),
                                  pd.read_csv(results_path_all['External_middle_E']),
                                  pd.read_csv(results_path_all['External_junior_E']),
                                  ], ignore_index=True)
        elif 'Mean_LEA' == index:
            res_path = pd.concat([pd.read_csv(results_path_all['External_senior_NE']),
                                  pd.read_csv(results_path_all['External_middle_NE']),
                                  pd.read_csv(results_path_all['External_junior_NE'])
                                  ], ignore_index=True)
        else:
            raise ValueError(f'Wrong index! {index}')

    sensitivity, specificity, accuracy, ppv, npv, sensitivity_CI, specificity_CI, accuracy_CI, ppv_CI, npv_CI, f1Score, auc, auc_ci = get_diagnosis_performance(
        res_path, method=method)
    results_all['Sen_(95% CI)'].append(get_printed_results(sensitivity, sensitivity_CI[0], sensitivity_CI[1]))
    results_all['Spe_(95% CI)'].append(get_printed_results(specificity, specificity_CI[0], specificity_CI[1]))
    results_all['Acc_(95% CI)'].append(get_printed_results(accuracy, accuracy_CI[0], accuracy_CI[1]))
    results_all['PPV_(95% CI)'].append(get_printed_results(ppv, ppv_CI[0], ppv_CI[1]))
    results_all['NPV_(95% CI)'].append(get_printed_results(npv, npv_CI[0], npv_CI[1]))
    results_all['F1_score'].append(get_printed_results(f1Score))
    results_all['AUC_(95% CI)'].append(get_printed_results(auc, auc_ci[0], auc_ci[1]))

In [118]:
results_all_df = pd.DataFrame(results_all, index=indices)
results_all_df

Unnamed: 0,Sen_(95% CI),Spe_(95% CI),Acc_(95% CI),PPV_(95% CI),NPV_(95% CI),F1_score,AUC_(95% CI)
All_Mean_radiologists,0.833\n(0.779-0.888),0.617\n(0.546-0.688),0.725\n(0.679-0.771),0.685\n(0.623-0.746),0.787\n(0.720-0.855),0.752,0.795\n(0.750-0.840)
External_senior_E,0.900\n(0.735-0.979),0.767\n(0.577-0.901),0.833\n(0.715-0.917),0.794\n(0.621-0.913),0.885\n(0.698-0.976),0.844,0.886\n(0.801-0.971)
External_middle_E,0.800\n(0.614-0.923),0.667\n(0.472-0.827),0.733\n(0.603-0.839),0.706\n(0.525-0.849),0.769\n(0.564-0.910),0.75,0.817\n(0.708-0.926)
External_junior_E,0.800\n(0.614-0.923),0.533\n(0.343-0.717),0.667\n(0.533-0.783),0.632\n(0.460-0.782),0.727\n(0.498-0.893),0.706,0.748\n(0.624-0.873)
Mean_HEA,0.833\n(0.756-0.910),0.656\n(0.557-0.754),0.744\n(0.681-0.808),0.708\n(0.621-0.794),0.797\n(0.706-0.889),0.765,0.815\n(0.753-0.877)
External_senior_NE,0.833\n(0.653-0.944),0.667\n(0.472-0.827),0.750\n(0.621-0.853),0.714\n(0.537-0.854),0.800\n(0.593-0.932),0.769,0.814\n(0.707-0.921)
External_middle_NE,0.833\n(0.653-0.944),0.600\n(0.406-0.773),0.717\n(0.586-0.825),0.676\n(0.502-0.820),0.783\n(0.563-0.925),0.746,0.778\n(0.659-0.896)
External_junior_NE,0.833\n(0.653-0.944),0.467\n(0.283-0.657),0.650\n(0.516-0.769),0.610\n(0.445-0.758),0.737\n(0.488-0.909),0.704,0.734\n(0.608-0.861)
Mean_LEA,0.833\n(0.756-0.910),0.578\n(0.476-0.680),0.706\n(0.639-0.772),0.664\n(0.577-0.751),0.776\n(0.676-0.876),0.739,0.776\n(0.710-0.842)


In [119]:
results_all_df.to_csv('./resources/results/results_all_radiologists.csv')

### 不同勾画者的混淆矩阵

In [9]:
indices = ['outline_SVM_p1', 'outline_SVM_p2', 'outline_SVM_p3', 'outline_SVM_p4', 'outline_SVM_p5', 'outline_SVM_p6',
           'outline_ResNet_p1', 'outline_ResNet_p2', 'outline_ResNet_p3', 'outline_ResNet_p4', 'outline_ResNet_p5',
           'outline_ResNet_p6']
results_all = {
    'Sen_(95% CI)': [],
    'Spe_(95% CI)': [],
    'Acc_(95% CI)': [],
    'PPV_(95% CI)': [],
    'NPV_(95% CI)': [],
    'F1_score': [],
    'AUC_(95% CI)': [],
}

In [10]:
for index in indices:
    method = 'normal_approximation'
    res_path = pd.read_csv(results_path_all[index])

    sensitivity, specificity, accuracy, ppv, npv, sensitivity_CI, specificity_CI, accuracy_CI, ppv_CI, npv_CI, f1Score, auc, auc_ci = get_diagnosis_performance(
        res_path, method=method)
    results_all['Sen_(95% CI)'].append(get_printed_results(sensitivity, sensitivity_CI[0], sensitivity_CI[1]))
    results_all['Spe_(95% CI)'].append(get_printed_results(specificity, specificity_CI[0], specificity_CI[1]))
    results_all['Acc_(95% CI)'].append(get_printed_results(accuracy, accuracy_CI[0], accuracy_CI[1]))
    results_all['PPV_(95% CI)'].append(get_printed_results(ppv, ppv_CI[0], ppv_CI[1]))
    results_all['NPV_(95% CI)'].append(get_printed_results(npv, npv_CI[0], npv_CI[1]))
    results_all['F1_score'].append(get_printed_results(f1Score))
    results_all['AUC_(95% CI)'].append(get_printed_results(auc, auc_ci[0], auc_ci[1]))

In [11]:
results_all_df = pd.DataFrame(results_all, index=indices)
results_all_df

Unnamed: 0,Sen_(95% CI),Spe_(95% CI),Acc_(95% CI),PPV_(95% CI),NPV_(95% CI),F1_score,AUC_(95% CI)
outline_SVM_p1,0.856\n(0.825-0.887),0.490\n(0.446-0.534),0.673\n(0.644-0.702),0.627\n(0.590-0.663),0.773\n(0.727-0.819),0.724,0.799\n(0.772-0.826)
outline_SVM_p2,0.854\n(0.823-0.885),0.484\n(0.440-0.528),0.669\n(0.640-0.698),0.623\n(0.587-0.660),0.768\n(0.722-0.815),0.721,0.793\n(0.766-0.821)
outline_SVM_p3,0.854\n(0.823-0.885),0.486\n(0.442-0.530),0.670\n(0.641-0.699),0.624\n(0.588-0.661),0.769\n(0.723-0.815),0.721,0.799\n(0.772-0.826)
outline_SVM_p4,0.880\n(0.852-0.908),0.506\n(0.462-0.550),0.693\n(0.664-0.722),0.640\n(0.605-0.676),0.808\n(0.765-0.852),0.741,0.814\n(0.788-0.840)
outline_SVM_p5,0.864\n(0.834-0.894),0.522\n(0.478-0.566),0.693\n(0.664-0.722),0.644\n(0.608-0.680),0.793\n(0.750-0.837),0.738,0.804\n(0.778-0.831)
outline_SVM_p6,0.852\n(0.821-0.883),0.492\n(0.448-0.536),0.672\n(0.643-0.701),0.626\n(0.590-0.663),0.769\n(0.723-0.815),0.722,0.808\n(0.781-0.834)
outline_ResNet_p1,0.930\n(0.908-0.952),0.660\n(0.618-0.702),0.795\n(0.770-0.820),0.732\n(0.698-0.767),0.904\n(0.874-0.934),0.819,0.894\n(0.875-0.913)
outline_ResNet_p2,0.928\n(0.905-0.951),0.648\n(0.606-0.690),0.788\n(0.763-0.813),0.725\n(0.690-0.760),0.900\n(0.869-0.931),0.814,0.897\n(0.878-0.916)
outline_ResNet_p3,0.914\n(0.889-0.939),0.646\n(0.604-0.688),0.780\n(0.754-0.806),0.721\n(0.686-0.756),0.883\n(0.850-0.916),0.806,0.887\n(0.867-0.906)
outline_ResNet_p4,0.950\n(0.931-0.969),0.678\n(0.637-0.719),0.814\n(0.790-0.838),0.747\n(0.713-0.781),0.931\n(0.905-0.957),0.836,0.913\n(0.896-0.930)


In [12]:
results_all_df.to_csv('./resources/results/results_all_outliners.csv')