# PCE Calculations of Testing Dataset

In [1]:
import ASCVD_Calc_PCE
import pandas as pd
import numpy as np
from importlib import reload
reload(ASCVD_Calc_PCE)
from sklearn.metrics import roc_auc_score, roc_curve, RocCurveDisplay, accuracy_score

## Import Data

In [3]:
df = pd.read_csv('/Volumes/fsmresfiles/PrevMed/Projects/MESA_RiskPred/LRPP data/LRPP_select.csv')
indecies_test = pd.read_csv('/Volumes/fsmresfiles/PrevMed/Projects/MESA_RiskPred/LRPP data/random_index_select.csv')

indecies_test.columns = ['index', 'label', 'study_index']

indecies_test = indecies_test.loc[indecies_test['study_index'] == False, :]
df = df.loc[df['id'].isin(indecies_test['index']), :]

In [4]:
# get labels and tte
te_label = np.array(df.loc[:,['id', 'label']].drop_duplicates().label)
te_time = np.array(df.loc[:, ['id', 'tte']].drop_duplicates().tte)

## PCE Calculation

In [5]:
# get risk measurements from PCE data set (Multiple)
pce_pred_df, pce_df = ASCVD_Calc_PCE.pce_pred_df_tab(df, 8, 10)

In [6]:
# calculate AUC 
def prediction_auc_PCE_df(pred_time, pred_time_index, pce_pred_df, cluster):
    """
    Calculates the AUC of the PCE predictions at a given time
    pred_time: pred time list of prediction times
    pred_time_index: which prediction time in terms of position in the list 
    pce_pred_df: df of probability of PCE dataset
    """
    global true, time_horizon
    
    time_horizon = pred_time + 10 
    true = (te_time <= time_horizon) * (te_label == 1).astype(int)
    
    pce_pred_df['true_label'] = true
    pce_pred_df = pce_pred_df.loc[~pce_pred_df.risk.isnull(),:]
    
    if cluster != 'none':
        pce_pred_df = pce_pred_df.loc[pce_pred_df['labels'] == cluster, :]
        
    return pce_pred_df

def prediction_auc_PCE(pred_time, pred_time_index, pce_pred_df, cluster):
    
    pce_pred_df = prediction_auc_PCE_df(pred_time, pred_time_index, pce_pred_df, cluster)
    auc = roc_auc_score(pce_pred_df['true_label'].tolist(), pce_pred_df['risk'].tolist())
    
    return auc 

def prediction_ROC(pred_time, pred_time_index, pce_pred_df, cluster):
    
    pce_pred_df = prediction_auc_PCE_df(pred_time, pred_time_index, pce_pred_df, cluster)
    fpr, tpr, thresh = roc_curve(pce_pred_df['true_label'].tolist(), pce_pred_df['risk'].tolist())
    
    return fpr, tpr, thresh

In [7]:
# Get PCE AUC for Each Prediction Year
print('NULL Risk Values from PCE: ', pce_pred_df.risk.isna().sum())
print('PCE AUC:', np.round(prediction_auc_PCE(8, 10, pce_pred_df, 'none'),3))

NULL Risk Values from PCE:  0
PCE AUC: 0.801


## Get Risk Categories

#### Create Risk Categories
1. Low: < 5% 
2. Borderline: 5% ~ 7.5% 
3. Intermediate: 7.5% ~ 20%
4. High: > 20%

In [9]:
pce_pred_df['risk_cat'] = 'Low Risk'
pce_pred_df.loc[(pce_pred_df['risk'] >= 0.05) & (pce_pred_df['risk'] < 0.075), 'risk_cat'] = 'Borderline Risk'
pce_pred_df.loc[(pce_pred_df['risk'] >= 0.075) & (pce_pred_df['risk'] < 0.2), 'risk_cat'] = 'Intermediate Risk'
pce_pred_df.loc[(pce_pred_df['risk'] >= 0.2), 'risk_cat'] = 'High Risk'

#### DDH Risk Breakdown by PCE Standards

In [10]:
round(pce_pred_df.risk_cat.value_counts() / len(pce_pred_df),3)*100

Low Risk             47.7
Intermediate Risk    28.6
Borderline Risk      13.9
High Risk             9.9
Name: risk_cat, dtype: float64

In [11]:
test = pd.DataFrame(pce_pred_df.groupby('risk_cat').true_label.value_counts()).rename(columns = {'true_label' : 'counts'}).reset_index()
test2 = pd.DataFrame(pce_pred_df.risk_cat.value_counts()).reset_index().rename(columns = {'risk_cat' : 'total_counts', 'index' : 'risk_cat'})
test = pd.merge(test, test2)
test['percentage'] = round(test['counts'] / test['total_counts'],3) * 100
test

Unnamed: 0,risk_cat,true_label,counts,total_counts,percentage
0,Borderline Risk,0,1659,1727,96.1
1,Borderline Risk,1,68,1727,3.9
2,High Risk,0,980,1228,79.8
3,High Risk,1,248,1228,20.2
4,Intermediate Risk,0,3216,3557,90.4
5,Intermediate Risk,1,341,3557,9.6
6,Low Risk,0,5874,5940,98.9
7,Low Risk,1,66,5940,1.1


In [12]:
pce_pred_df.to_csv('pce_pred_training_df.csv', index = False)





-----------
## Compare Risk Categories

In [None]:
ddh_pred_df = pd.read_csv('/Users/excenity/Dropbox/HSIP/Research/MESA/Output/DDH_pred_df.csv')

ddh_pred_df = ddh_pred_df.loc[:, ['pt_id', 'value']].rename(columns = {'value' : 'ddh_risk'})

pred_df = pd.merge(ddh_pred_df, pce_pred_df.loc[:,['risk', 'true_label', 'risk_cat']].reset_index().rename(columns = {'index' : 'pt_id'})).rename(columns = {'risk' : 'pce_risk'})

In [None]:
# Mean PCE Risk
round(pred_df.groupby('risk_cat').pce_risk.mean(),3)

In [None]:
# Mean DDH Risk
round(pred_df.groupby('risk_cat').ddh_risk.mean(),3)

### PCE AUC by Risk Category

In [None]:
# get accuracy scores for each risk category
print(round(roc_auc_score(pred_df.loc[pred_df['risk_cat'] == 'Low Risk', 'true_label'], pred_df.loc[pred_df['risk_cat'] == 'Low Risk', 'pce_risk']), 3))
print(round(roc_auc_score(pred_df.loc[pred_df['risk_cat'] == 'Borderline Risk', 'true_label'], pred_df.loc[pred_df['risk_cat'] == 'Borderline Risk', 'pce_risk']), 3))
print(round(roc_auc_score(pred_df.loc[pred_df['risk_cat'] == 'Intermediate Risk', 'true_label'], pred_df.loc[pred_df['risk_cat'] == 'Intermediate Risk', 'pce_risk']),3))
print(round(roc_auc_score(pred_df.loc[pred_df['risk_cat'] == 'High Risk', 'true_label'], pred_df.loc[pred_df['risk_cat'] == 'High Risk', 'pce_risk']), 3))

### DDH AUC by Risk Category

In [None]:
# get accuracy scores for each risk category
print(round(roc_auc_score(pred_df.loc[pred_df['risk_cat'] == 'Low Risk', 'true_label'], pred_df.loc[pred_df['risk_cat'] == 'Low Risk', 'ddh_risk']), 3))
print(round(roc_auc_score(pred_df.loc[pred_df['risk_cat'] == 'Borderline Risk', 'true_label'], pred_df.loc[pred_df['risk_cat'] == 'Borderline Risk', 'ddh_risk']), 3))
print(round(roc_auc_score(pred_df.loc[pred_df['risk_cat'] == 'Intermediate Risk', 'true_label'], pred_df.loc[pred_df['risk_cat'] == 'Intermediate Risk', 'ddh_risk']),3))
print(round(roc_auc_score(pred_df.loc[pred_df['risk_cat'] == 'High Risk', 'true_label'], pred_df.loc[pred_df['risk_cat'] == 'High Risk', 'ddh_risk']), 3))

In [None]:
RocCurveDisplay.from_predictions()