In [None]:
import pandas as pd
import numpy as np

from sklearn.metrics import cohen_kappa_score, roc_auc_score, recall_score, confusion_matrix, precision_score, accuracy_score
from sklearn.utils import resample

# Compare EHR and ENDO diagnoses

* Metrics include: kappa, sensitivity, specificity, AUC, PPV, NPV

In [None]:
combined = pd.read_pickle('../data/combined_data.pkl')

# pos = dx with endo in EHR after study, neg = dx with endo in EHR prior to study
combined['months_between'] = ((combined['EHR_Dx_Date'] - combined['ENDO_study_date']) / 
                              np.timedelta64(1, 'M')).fillna(0).astype(int)

In [None]:
def get_diagnosis_dataframe(original, threshold, cohort):
    if threshold is None:
        keep_dx = original.groupby(['Masked_PersonID','EndoID']).max().reset_index()
    else: 
        keep_dx = original.copy()

        keep_dx.loc[(keep_dx['months_between'] > threshold), 'endo_dx_EHR'] = 0 
        keep_dx.loc[(keep_dx['months_between'] > threshold), 'SE_EHR'] = 0 
        keep_dx.loc[(keep_dx['months_between'] > threshold), 'OE_EHR'] = 0 
        keep_dx.loc[(keep_dx['months_between'] > threshold), 'DE_EHR'] = 0 
        keep_dx.loc[(keep_dx['months_between'] > threshold), 'other_EHR'] = 0

        keep_dx = keep_dx.groupby(['Masked_PersonID','EndoID']).max().reset_index()
        
    keep_dx = keep_dx.loc[keep_dx['Cohort_final'] == cohort]
    return keep_dx

def bootstrap_performance_metrics(prediction_data):
    # Bootstrap the data
    boot_data = resample(prediction_data, stratify=prediction_data['endo_dx_ENDO'])

    # Performance metrics
    acc = accuracy_score(boot_data['endo_dx_ENDO'], boot_data['endo_dx_EHR'])
    auc = roc_auc_score(boot_data['endo_dx_ENDO'], boot_data['endo_dx_EHR'])
    ppv = precision_score(boot_data['endo_dx_ENDO'], boot_data['endo_dx_EHR'])
    sensitivity = recall_score(boot_data['endo_dx_ENDO'], boot_data['endo_dx_EHR'])
    tn, fp, fn, tp = confusion_matrix(boot_data['endo_dx_ENDO'], boot_data['endo_dx_EHR']).ravel()
    specificity = tn / (tn + fp)
    npv = tn / (tn + fn)
    kappa = cohen_kappa_score(boot_data['endo_dx_ENDO'], boot_data['endo_dx_EHR'])
    
    # Collect metrics in dataframe
    bootstrap_df = pd.DataFrame({'AGREEMENT': [acc],
                                 'AUC': [auc],
                                 'PPV': [ppv],
                                 'NPV': [npv],
                                 'SENSITIVITY': [sensitivity],
                                 'SPECIFICITY': [specificity],
                                 'KAPPA': [kappa]})
    return bootstrap_df

def summarize_bootstrap_results(bootstrap_results):    
    alpha = 100-95
    metrics = []
    medians = []
    ci_low = []
    ci_high = []
    
    for col in bootstrap_results.columns:
        metrics.append(col)
        medians.append(np.percentile(bootstrap_results[col], 50))
        ci_low.append(np.percentile(bootstrap_results[col], alpha/2))
        ci_high.append(np.percentile(bootstrap_results[col], 100-alpha/2))

    metrics = pd.DataFrame({'METRIC': metrics, 'MEDIAN': medians, 'CI_LOW': ci_low, 'CI_HIGH': ci_high})
    return metrics

In [None]:
n = 1000

In [None]:
# EHR dx more than 1 month after study date are voided
threshold = 1
keep_dx = get_diagnosis_dataframe(combined, threshold, 1)
bootstrap_results = pd.DataFrame()
for i in range(n):
    bootstrap_results = pd.concat([bootstrap_results, bootstrap_performance_metrics(keep_dx)])
metrics = summarize_bootstrap_results(bootstrap_results)


In [None]:
metrics.to_csv('../results/diagnosis/metrics.csv', index=False)