This notebook takes the selected final model to evaluate its performance on the "test" dataset (300 pts). It requires feat_transformer.py, which contains the encoded feat model for resistant hypertension.

In [None]:
import numpy as np
import pandas as pd

#Read test dataset (300 random patients)
targets = {
            'htn_dx_ia':'Htndx',
            'res_htn_dx_ia':'ResHtndx', 
            'htn_hypok_dx_ia':'HtnHypoKdx', 
            'HTN_heuristic':'HtnHeuri', 
            'res_HTN_heuristic':'ResHtnHeuri',
            'hypoK_heuristic_v4':'HtnHypoKHeuri'
            }
drop_cols = ['UNI_ID'] + list(targets.keys())

df_test = pd.read_csv(
            '../Dataset' + str(101) + '/' + 'ResHtndx' + '/' + 'ResHtndxATest.csv')
# filter just to random patients
rand_df_test = pd.read_csv(
            '../Dataset101/redcap_data_access_group.csv')
rand_ids_test = rand_df_test.loc[rand_df_test['redcap_data_access_group'] == 'pc_200','UNI_ID']
# rand_ids = rand_df.loc[rand_df['redcap_data_access_group'] == 'reshtn_op_v3','UNI_ID']
random_mask = df_test['UNI_ID'].isin(rand_ids_test)
flagged_mask = ~df_test['UNI_ID'].isin(rand_ids_test)
df_test_flagged = df_test.loc[flagged_mask]
df_test = df_test.loc[random_mask]
X_test = df_test.drop(drop_cols,axis=1)  
y_test = df_test['res_htn_dx_ia'].values
y_heu = df_test['res_HTN_heuristic']

X_test_flagged = df_test_flagged.drop(drop_cols,axis=1)  
y_test_flagged = df_test_flagged['res_htn_dx_ia'].values
y_heu_flagged = df_test_flagged['res_HTN_heuristic']

df_train = pd.read_csv(
            '../Dataset' + str(101) + '/' + 'ResHtndx' + '/' + 'ResHtndxATrain.csv')
# filter training samples to random set
rand_df_train = pd.read_csv(
            '../Dataset101/redcap_data_access_group_train.csv')
rand_ids_train = rand_df_train.loc[rand_df_train['redcap_data_access_group'] == 'pc_200','UNI_ID']
df_train = df_train.loc[df_train['UNI_ID'].isin(rand_ids_train)]
X_train = df_train.drop(drop_cols,axis=1)
y_train = df_train['res_htn_dx_ia'].values


In [None]:
rand_df_train['redcap_data_access_group'].unique()

In [None]:
print('training samples:',len(X_train),
'disease prevalence:',np.sum(y_train==1)/len(y_train))
print('test samples:',len(X_test),
'disease prevalence:',np.sum(y_test==1)/len(y_test))


In [None]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from feat_transformer import FeatTransformer

ft_lr_estimator = Pipeline( [
    ('prep', FeatTransformer()),
    ('est', LogisticRegression(C=1.0, penalty='l2', intercept_scaling=1.0, solver='liblinear'))
]
)

ft_lr_estimator.fit(X_train, y_train)

In [None]:
#Evaluating everything on Training first:

from sklearn.metrics import precision_recall_curve
import matplotlib.pyplot as plt
from scipy import interp

y_pred_proba = ft_lr_estimator.predict_proba(X_train)[:,1]

def prc_values(y,y_pred_proba):
    precision, recall, prcthresholds = precision_recall_curve(y, y_pred_proba, pos_label=1)
    max_prec = np.max(precision)
    first = pd.DataFrame({"precision":[max_prec], 
                        "recall":[0],
                        'thresholds':[0]})
    prc = pd.DataFrame(list(zip(precision, recall, prcthresholds)), columns =['precision', 'recall', 'thresholds']) 
    prc = prc.append(first, sort=False)
    prc = prc.sort_values(by='recall')
    precision = prc['precision']
    recall = prc['recall']
    mean_recall = np.linspace(0, 1, 100)
    precision = interp(mean_recall, recall, precision)
    return mean_recall, precision, prc

mean_recall, mean_precisions, prc_df = prc_values(y_train,y_pred_proba)

#Back calculate confusion matrix to get sensitivity 
#tp = recall * total positives
pos = np.sum(y_train)
neg = len(y_train)-pos
prc_df['tp'] = prc_df['recall']*pos
prc_df['fn'] = pos-prc_df['tp']
prc_df['fp'] = (prc_df['tp']-(prc_df['precision']*prc_df['tp']))/(prc_df['precision'])
prc_df['tn'] = neg-prc_df['fp']
prc_df['specificity'] = prc_df['tn']/(prc_df['tn']+prc_df['fp'])


In [None]:
from sklearn.metrics import average_precision_score
average_precision_score(y_train,y_pred_proba)

In [None]:
#Let's adjust by global prevalence of resistant hypertension (7.5%)

prevs = [0.075]
for prev in prevs:
    prc_df['adj_precision'+str(prev)] = (prc_df['recall']*prev)/((prc_df['recall']*prev)+((1-prc_df['specificity'])*(1-prev)))


## Choosing a threshold

We have a target precision of **0.7**. What probability threshold would we choose to hit this on our training set?


In [None]:
TARGET_PPV = 0.7

In [None]:
# adjustment
chosen_prc = prc_df.loc[prc_df['adj_precision0.075']>=TARGET_PPV].sort_values(by='adj_precision0.075').iloc[0]
# without adjusting
# chosen_prc = prc_df.loc[prc_df['precision']>=TARGET_PPV].sort_values(by='precision').iloc[0]


In [None]:
chosen_prc

In [None]:
Chosen_Threshold = chosen_prc['thresholds']
# Chosen_Threshold = 0.5
print('Chosen Threshold:',Chosen_Threshold)

### We use target precision of 0.7; our probability threshold based on the training set is 0.595807. This gives a precision recall pair of (0.8,0.621). (0.715 adjusted PPV, 0.64  recall)

In [None]:
plt.figure(figsize=(10, 6))
plt.title("Feat Final Model Adjusted Precision/Recall Resistant Hypertension (random subset)", 
          loc='left', fontsize=14)
plt.plot(mean_recall, mean_precisions, ':b',alpha=0.75,label = "Unadjusted")
for prev in prevs:
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.plot(prc_df['recall'], prc_df['adj_precision'+str(prev)], 'b', label = 'Prevalence = '+str(prev))
    for i, (r, ap,thresh) in enumerate(
        zip(prc_df['recall'], prc_df['adj_precision'+str(prev)], prc_df['thresholds'])):
        if i % 10 == 0:
            plt.text(r,ap+0.03,str(round(thresh,3)), rotation=30)
    chosen_prc = prc_df.iloc[(prc_df['thresholds']-Chosen_Threshold).abs().argsort().values[0]]
    plt.plot([chosen_prc['recall'],chosen_prc['recall']], 
             [0, chosen_prc['adj_precision'+str(prev)]], ':k', alpha=1)
    plt.text(x= chosen_prc['recall'], 
             y=0.5*chosen_prc['adj_precision'+str(prev)],
             s="Chosen Threshold = "+str(round(Chosen_Threshold,3)) 
                +"\nRecall = " +str(round(chosen_prc['recall'],3)))
plt.plot([0,1],[TARGET_PPV,TARGET_PPV],'--r', label='Target precision')
plt.legend()
plt.savefig('figs/adjusted_AUPRC_training.pdf',dpi=400)

# Repeat everything on the testing data set. 

In [None]:

y_pred_proba = ft_lr_estimator.predict_proba(X_test)[:,1]

mean_recall, mean_precisions, prc_df = prc_values(y_test,y_pred_proba)

#Back calculate confusion matrix to get sensitivity 
#tp = recall * total positives
pos = np.sum(y_test)
neg = len(y_test)-pos
prc_df['tp'] = prc_df['recall']*pos
prc_df['fn'] = pos-prc_df['tp']
prc_df['fp'] = (prc_df['tp']-(prc_df['precision']*prc_df['tp']))/(prc_df['precision'])
prc_df['tn'] = neg-prc_df['fp']
prc_df['specificity'] = prc_df['tn']/(prc_df['tn']+prc_df['fp'])


## now for the heuristic-flagged patients

In [None]:

y_pred_proba_flagged = ft_lr_estimator.predict_proba(X_test_flagged)[:,1]

mean_recall_flagged, mean_precisions_flagged, prc_df_flagged = prc_values(y_test_flagged,y_pred_proba_flagged)

#Back calculate confusion matrix to get sensitivity 
#tp = recall * total positives
pos = np.sum(y_test)
neg = len(y_test)-pos
prc_df_flagged['tp'] = prc_df_flagged['recall']*pos
prc_df_flagged['fn'] = pos-prc_df_flagged['tp']
prc_df_flagged['fp'] = (prc_df_flagged['tp']-(prc_df_flagged['precision']*prc_df_flagged['tp']))/(prc_df_flagged['precision'])
prc_df_flagged['tn'] = neg-prc_df_flagged['fp']
prc_df_flagged['specificity'] = prc_df_flagged['tn']/(prc_df_flagged['tn']+prc_df_flagged['fp'])


In [None]:
for prev in prevs:
    prc_df['adj_precision'+str(prev)] = (prc_df['recall']*prev)/((prc_df['recall']*prev)+((1-prc_df['specificity'])*(1-prev)))
    prc_df_flagged['adj_precision'+str(prev)] = (prc_df_flagged['recall']*prev)/(
        (prc_df_flagged['recall']*prev)+((1-prc_df_flagged['specificity'])*(1-prev)))


In [None]:
#Examine similar adjusted precision values at similar thresholds
display(prc_df.iloc[(prc_df['thresholds']-Chosen_Threshold).argsort().values[0]])

### A model with a probabliity threshold as determined in the training set would have adjusted PPV of 0.535 on general population at 7.5% prevalence. Compare to training (adjusted PPV 0.696, recall 0.64)

## repeat adjustment for heuristic


In [None]:
from sklearn.metrics import average_precision_score, roc_auc_score, confusion_matrix, ConfusionMatrixDisplay
        
heu = {
       'average_precision_score_test': average_precision_score(y_test, y_heu),
       'precision': np.sum((y_heu==1) & (y_test == 1))/ np.sum(y_heu==1),
       'recall': np.sum((y_heu==1) & (y_test == 1))/ np.sum(y_test==1),
       'specificity': np.sum((y_heu==0) & (y_test == 0))/ np.sum(y_test==0),
       'roc_auc_score_test': roc_auc_score(y_test, y_heu)
      }
heu_recall = heu['recall']
heu_spec = heu['specificity']
for prev in prevs:
    heu_ppv_adj = heu_recall * prev / (heu_recall*prev + (1-heu_spec)*(1-prev))
    print('Heuristic Adjusted PPV for prevalence=',prev*100,'%:',heu_ppv_adj)
print('Heuristc performance:',heu)
print('Heuristic confusion matrix')
ConfusionMatrixDisplay(confusion_matrix(y_test, y_heu)).plot()

## Compare test set FEAT model to heuristic

In [None]:
plt.figure(figsize=(10, 6))
plt.title("Feat Final Model Adjusted Precision/Recall Resistant Hypertension ", 
          loc='left', fontsize=14)
plt.plot(mean_recall, mean_precisions, ':b',alpha=0.75,label = "Unadjusted")
for prev in prevs:
    plt.title("Feat Final Model, Random Test Patients, Resistant Hypertension (Prevalence ="
#     plt.title("Feat Final Model, Random Test Patients, Resistant Hypertension (Prevalence ="
              +str(prev)+")", loc='left', fontsize=14)
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.plot(prc_df['recall'], prc_df['adj_precision'+str(prev)], '-bx',
             label = 'Prevalence = '+str(prev))
    chosen_prc = prc_df.loc[prc_df['thresholds']>=Chosen_Threshold].sort_values(by='thresholds').iloc[0]
#     print('prc_df thresholds:',prc_df['thresholds'])
    print(chosen_prc)
    plt.plot([chosen_prc['recall'],chosen_prc['recall']], 
             [0, 1], ':k', alpha=1)
    plt.text(x= chosen_prc['recall']+0.01, 
             y=0.3*chosen_prc['adj_precision'+str(prev)],
#              y=chosen_prc['adj_precision'+str(prev)]+0.2,
             s="Chosen Threshold = "+str(round(Chosen_Threshold,3))
               +"\nPPV = "+str(round(chosen_prc['adj_precision'+str(prev)],4))
               +"\nRecall = "+str(round(chosen_prc['recall'],4)))
    plt.plot(chosen_prc['recall'], chosen_prc['adj_precision'+str(prev)], 'Xk')

plt.plot(heu_recall, heu_ppv_adj, 'sr', label='Heuristic')
plt.text(heu_recall-0.01, heu_ppv_adj,
         s= 'PPV = '+str(round(heu_ppv_adj,2))+'\nRecall = '+str(round(heu_recall,2)),
        label='Heuristic',horizontalalignment='right')

plt.legend()
plt.savefig('figs/adjusted_AUPRC_test_random_pts_thresh_random.pdf',dpi=400)

## comparison on flagged patients 

## adjusted heuristic

In [None]:
from sklearn.metrics import average_precision_score, roc_auc_score, confusion_matrix, ConfusionMatrixDisplay
        
heu_flagged = {
       'average_precision_score_test': average_precision_score(y_test_flagged, y_heu_flagged),
       'precision': np.sum((y_heu_flagged==1) & (y_test_flagged == 1))/ np.sum(y_heu_flagged==1),
       'recall': np.sum((y_heu_flagged==1) & (y_test_flagged == 1))/ np.sum(y_test_flagged==1),
       'specificity': np.sum((y_heu_flagged==0) & (y_test_flagged == 0))/ np.sum(y_test_flagged==0),
       'roc_auc_score_test': roc_auc_score(y_test_flagged, y_heu_flagged)
      }
heu_recall_flagged = heu_flagged['recall']
heu_spec_flagged = heu_flagged['specificity']
for prev in prevs:
    heu_ppv_adj_flagged = heu_recall_flagged * prev / (heu_recall_flagged*prev + (1-heu_spec_flagged)*(1-prev))
    print('Heuristic Adjusted PPV for prevalence=',prev*100,'%:',heu_ppv_adj_flagged)
print('Heuristc performance:',heu_flagged)
print('Heuristic confusion matrix')
ConfusionMatrixDisplay(confusion_matrix(y_test_flagged, y_heu_flagged)).plot()

In [None]:
plt.figure(figsize=(10, 6))
plt.title("Feat Final Model Adjusted Precision/Recall Resistant Hypertension ", 
          loc='left', fontsize=14)
# plt.plot(mean_recall_flagged, mean_precisions_flagged, ':b',alpha=0.75,label = "Unadjusted")
for prev in prevs:
    plt.title("Feat Final Model, Flagged Test Patients, Resistant Hypertension (Prevalence ="
#     plt.title("Feat Final Model, Random Test Patients, Resistant Hypertension (Prevalence ="
              +str(prev)+")", loc='left', fontsize=14)
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.plot(prc_df_flagged['recall'], prc_df_flagged['adj_precision'+str(prev)], '-bx',
             label = 'Prevalence = '+str(prev))
    #TODO: pick closest, but above or equal to threshold
#     chosen_prc = prc_df_flagged.iloc[(prc_df_flagged['thresholds']-Chosen_Threshold).abs().argsort().values[0]]
    chosen_prc = prc_df_flagged.iloc[(prc_df_flagged['thresholds']-Chosen_Threshold).abs().argsort().values[0]]
    
    print('prc_df thresholds:',prc_df_flagged['thresholds'])
    print(chosen_prc)
    plt.plot([chosen_prc['recall'],chosen_prc['recall']], 
             [0, 1], ':k', alpha=1)
    plt.text(x= chosen_prc['recall']+0.01, 
             y=0.3*chosen_prc['adj_precision'+str(prev)],
#              y=chosen_prc['adj_precision'+str(prev)]+0.2,
             s="Chosen Threshold = "+str(round(Chosen_Threshold,3))
               +"\nPPV = "+str(round(chosen_prc['adj_precision'+str(prev)],2))
               +"\nRecall = "+str(round(chosen_prc['recall'],2)))
    plt.plot(chosen_prc['recall'], chosen_prc['adj_precision'+str(prev)], 'Xk')

plt.plot(heu_flagged['recall'], heu_flagged['precision'], 'sr', label='Heuristic')

plt.text(heu_flagged['recall']-0.01, heu_flagged['precision'],
         s= 'PPV = '+str(round(heu_flagged['precision'],2))
         +'\nRecall = '+str(round(heu_flagged['recall'],2)),
        label='Heuristic',horizontalalignment='right')

plt.legend(loc='lower left')
plt.savefig('figs/adjusted_AUPRC_test_flagged_pts_thresh_random.pdf',dpi=400)