In [1]:
import pandas as pd
from sklearn.metrics import confusion_matrix

In [2]:
# Import historic data on first prediction
data = pd.read_csv("../historic_first_pred.csv")
data

Unnamed: 0,appointment_id,age_deid,bmi_deid,ethnicity,ethnic_category_black,ethnic_category_unknown,gender_male,imd19_quintile,main_spoken_language_other,current_smoker,...,service_mapping_paeds,service_mapping_priority,service_mapping_surgical,weekswaiting,no_show,did_not_attend_risk,did_not_attend_risk_group,prediction_type,case_control,use_case
0,10079786,52,27.45,,0,1,1,2,0,0,...,0,0,0,,0.0,0.264963,High Risk,First prediction,1.0,3
1,10164094,52,27.45,,1,0,1,2,0,1,...,0,0,0,,1.0,0.266140,High Risk,First prediction,1.0,3
2,10193130,57,21.70,,0,0,1,3,0,1,...,0,0,0,,0.0,0.202084,High Risk,First prediction,1.0,3
3,10211058,32,39.45,,0,1,0,2,0,0,...,0,0,0,,1.0,0.062844,Low Risk,First prediction,1.0,3
4,10550310,72,39.45,,1,0,1,4,0,0,...,0,0,0,,0.0,0.040898,Low Risk,First prediction,1.0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139396,6850436,57,39.45,,0,0,1,4,0,0,...,0,0,0,,0.0,0.236635,High Risk,First prediction,1.0,3
139397,8776815,27,27.45,,1,0,1,2,0,0,...,0,0,0,,0.0,0.561522,Very High Risk,First prediction,1.0,3
139398,9143144,32,27.45,,1,0,1,2,0,0,...,0,0,0,,0.0,0.393163,High Risk,First prediction,1.0,3
139399,9421052,62,39.45,,1,0,0,4,0,0,...,0,0,0,,0.0,0.250568,High Risk,First prediction,1.0,3


In [3]:
total_appts = data.shape[0]
total_appts

139401

### Risk category scenario 1
- Predicted no show: very high risk
- Predicted show: high, moderate, low risk

In [4]:
# Transform risk categories to binary
risk_mapping ={
    "Low Risk": 0,
    "Moderate Risk": 0,
    "High Risk": 0,
    "Very High Risk": 1,
}

# Map the month names to numbers
trans_data1 = data.copy()
trans_data1["pred_no_show"] = trans_data1["did_not_attend_risk_group"].map(risk_mapping)

trans_data1[["no_show", "did_not_attend_risk_group", "pred_no_show"]]

Unnamed: 0,no_show,did_not_attend_risk_group,pred_no_show
0,0.0,High Risk,0
1,1.0,High Risk,0
2,0.0,High Risk,0
3,1.0,Low Risk,0
4,0.0,Low Risk,0
...,...,...,...
139396,0.0,High Risk,0
139397,0.0,Very High Risk,1
139398,0.0,High Risk,0
139399,0.0,High Risk,0


In [5]:
# Calculate confusion matrix
cm1 = confusion_matrix(trans_data1["no_show"], trans_data1["pred_no_show"])
cm1

array([[123079,    717],
       [ 14947,    658]])

In [6]:
# Calculate PPV (Precision) ie. TP / (TP + FP)
TN1, FP1, FN1, TP1 = cm1.ravel()

PPV1 = TP1 / (TP1 + FP1)
PPV1

0.47854545454545455

In [7]:
# Calculate true no-show %
true_no_show1 = TP1/total_appts
true_no_show1

0.004720195694435478

### Risk category scenario 2
- Predicted no show: very high, high risk
- Predicted show: moderate, low risk

In [8]:
# Transform risk categories to binary
risk_mapping ={
    "Low Risk": 0,
    "Moderate Risk": 0,
    "High Risk": 1,
    "Very High Risk": 1,
}

# Map the month names to numbers
trans_data2 = data.copy()
trans_data2["pred_no_show"] = trans_data2["did_not_attend_risk_group"].map(risk_mapping)

trans_data2[["no_show", "did_not_attend_risk_group", "pred_no_show"]]

Unnamed: 0,no_show,did_not_attend_risk_group,pred_no_show
0,0.0,High Risk,1
1,1.0,High Risk,1
2,0.0,High Risk,1
3,1.0,Low Risk,0
4,0.0,Low Risk,0
...,...,...,...
139396,0.0,High Risk,1
139397,0.0,Very High Risk,1
139398,0.0,High Risk,1
139399,0.0,High Risk,1


In [9]:
# Calculate confusion matrix
cm2 = confusion_matrix(trans_data2["no_show"], trans_data2["pred_no_show"])
cm2

array([[110783,  13013],
       [ 10986,   4619]])

In [10]:
# Calculate PPV (Precision) ie. TP / (TP + FP)
TN2, FP2, FN2, TP2 = cm2.ravel()

PPV2 = TP2 / (TP2 + FP2)
PPV2

0.26196687840290384

In [11]:
# Calculate true no-show %
true_no_show2 = TP2/total_appts
true_no_show2

0.033134626006987036

### Risk category scenario 3
- Predicted no show: very high, high risk, moderate
- Predicted show: low risk

In [12]:
# Transform risk categories to binary
risk_mapping ={
    "Low Risk": 0,
    "Moderate Risk": 1,
    "High Risk": 1,
    "Very High Risk": 1,
}

# Map the month names to numbers
trans_data3 = data.copy()
trans_data3["pred_no_show"] = trans_data3["did_not_attend_risk_group"].map(risk_mapping)

trans_data3[["no_show", "did_not_attend_risk_group", "pred_no_show"]]

Unnamed: 0,no_show,did_not_attend_risk_group,pred_no_show
0,0.0,High Risk,1
1,1.0,High Risk,1
2,0.0,High Risk,1
3,1.0,Low Risk,0
4,0.0,Low Risk,0
...,...,...,...
139396,0.0,High Risk,1
139397,0.0,Very High Risk,1
139398,0.0,High Risk,1
139399,0.0,High Risk,1


In [13]:
# Calculate confusion matrix
cm3 = confusion_matrix(trans_data3["no_show"], trans_data3["pred_no_show"])
cm3

array([[80215, 43581],
       [ 6061,  9544]])

In [14]:
# Calculate PPV (Precision) ie. TP / (TP + FP)
TN3, FP3, FN3, TP3 = cm3.ravel()

PPV3 = TP3 / (TP3 + FP3)
PPV3

0.17965176470588234

In [15]:
# Calculate true no-show %
true_no_show3 = TP3/total_appts
true_no_show3

0.0684643582183772