In [58]:
import pandas as pd
from sklearn.metrics import confusion_matrix

In [59]:
# Import historic data on first prediction
data = pd.read_csv("../historic_appts_first_pred_trans.csv")
data

Unnamed: 0,appointment_id,age_deid,bmi_deid,ethnicity,ethnic_category_black,ethnic_category_unknown,gender_male,imd19_quintile,main_spoken_language_other,current_smoker,...,prediction_type,case_control,use_case,appt_date,appt_month_trans,hospital_service_display_trans,service_mapping_trans,reason_display_trans,appointment_type_trans,did_not_attend_risk_group_trans
0,10079786,52,27.45,,0,1,1,2,0,0,...,First prediction,1.0,3,2024-03-20,3,15,2,76,0,3
1,10164094,52,27.45,,1,0,1,2,0,1,...,First prediction,1.0,3,2023-11-24,11,71,2,376,0,3
2,10211058,32,39.45,,0,1,0,2,0,0,...,First prediction,1.0,3,2024-01-19,1,71,2,376,0,1
3,10550310,72,39.45,,1,0,1,4,0,0,...,First prediction,1.0,3,2024-01-19,1,71,2,376,0,1
4,10670198,32,39.45,,1,0,0,2,0,1,...,First prediction,1.0,3,2023-12-01,12,71,2,376,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139239,13147714,2,27.45,,0,0,0,1,0,0,...,First prediction,1.0,3,2023-12-29,12,95,8,499,0,2
139240,13257461,67,40.00,,0,0,0,1,0,0,...,First prediction,1.0,3,2024-02-01,2,125,2,694,0,2
139241,13390142,17,27.45,,1,0,1,3,0,0,...,First prediction,1.0,3,2024-03-11,3,95,8,452,0,3
139242,13440971,77,21.70,,0,1,1,2,0,0,...,First prediction,1.0,3,2024-03-20,3,36,2,215,1,2


In [60]:
total_appts = data.shape[0]
total_appts

139244

### Risk category scenario 1
- Predicted no show: very high risk
- Predicted show: high, moderate, low risk

In [61]:
# Transform risk categories to binary
risk_mapping ={
    1: 0,
    2: 0,
    3: 0,
    4: 1,
}

# Map the month names to numbers
trans_data1 = data.copy()
trans_data1["pred_no_show"] = trans_data1["did_not_attend_risk_group_trans"].map(risk_mapping)

trans_data1[["no_show", "did_not_attend_risk_group_trans", "pred_no_show"]]

Unnamed: 0,no_show,did_not_attend_risk_group_trans,pred_no_show
0,0.0,3,0
1,1.0,3,0
2,1.0,1,0
3,0.0,1,0
4,1.0,3,0
...,...,...,...
139239,1.0,2,0
139240,0.0,2,0
139241,1.0,3,0
139242,1.0,2,0


In [62]:
# Calculate confusion matrix
cm1 = confusion_matrix(trans_data1["no_show"], trans_data1["pred_no_show"])
cm1

array([[122990,    716],
       [ 14883,    655]])

In [63]:
# Calculate PPV (Precision) ie. TP / (TP + FP)
TN1, FP1, FN1, TP1 = cm1.ravel()

PPV1 = TP1 / (TP1 + FP1)
PPV1

0.4777534646243618

In [64]:
# Calculate true no-show %
true_no_show1 = TP1/total_appts
true_no_show1

0.004703972882134957

### Risk category scenario 2
- Predicted no show: very high, high risk
- Predicted show: moderate, low risk

In [65]:
# Transform risk categories to binary
risk_mapping ={
    1: 0,
    2: 0,
    3: 1,
    4: 1,
}

# Map the month names to numbers
trans_data2 = data.copy()
trans_data2["pred_no_show"] = trans_data2["did_not_attend_risk_group_trans"].map(risk_mapping)

trans_data2[["no_show", "did_not_attend_risk_group_trans", "pred_no_show"]]

Unnamed: 0,no_show,did_not_attend_risk_group_trans,pred_no_show
0,0.0,3,1
1,1.0,3,1
2,1.0,1,0
3,0.0,1,0
4,1.0,3,1
...,...,...,...
139239,1.0,2,0
139240,0.0,2,0
139241,1.0,3,1
139242,1.0,2,0


In [66]:
# Calculate confusion matrix
cm2 = confusion_matrix(trans_data2["no_show"], trans_data2["pred_no_show"])
cm2

array([[110706,  13000],
       [ 10937,   4601]])

In [67]:
# Calculate PPV (Precision) ie. TP / (TP + FP)
TN2, FP2, FN2, TP2 = cm2.ravel()

PPV2 = TP2 / (TP2 + FP2)
PPV2

0.2614056019544344

In [68]:
# Calculate true no-show %
true_no_show2 = TP2/total_appts
true_no_show2

0.03304271638275258

### Risk category scenario 3
- Predicted no show: very high, high risk, moderate
- Predicted show: low risk

In [69]:
# Transform risk categories to binary
risk_mapping ={
    1: 0,
    2: 1,
    3: 1,
    4: 1,
}

# Map the month names to numbers
trans_data3 = data.copy()
trans_data3["pred_no_show"] = trans_data3["did_not_attend_risk_group_trans"].map(risk_mapping)

trans_data3[["no_show", "did_not_attend_risk_group_trans", "pred_no_show"]]

Unnamed: 0,no_show,did_not_attend_risk_group_trans,pred_no_show
0,0.0,3,1
1,1.0,3,1
2,1.0,1,0
3,0.0,1,0
4,1.0,3,1
...,...,...,...
139239,1.0,2,1
139240,0.0,2,1
139241,1.0,3,1
139242,1.0,2,1


In [70]:
# Calculate confusion matrix
cm3 = confusion_matrix(trans_data3["no_show"], trans_data3["pred_no_show"])
cm3

array([[80161, 43545],
       [ 6033,  9505]])

In [71]:
# Calculate PPV (Precision) ie. TP / (TP + FP)
TN3, FP3, FN3, TP3 = cm3.ravel()

PPV3 = TP3 / (TP3 + FP3)
PPV3

0.17917059377945335

In [72]:
# Calculate true no-show %
true_no_show3 = TP3/total_appts
true_no_show3

0.06826146907586682