In [11]:
# run pre-trained SOFA-LR model
# Load model (2 stages)
import joblib
import pandas as pd
import numpy as np
from scipy.stats import zscore

clf0 = joblib.load('LR_model.pkl')
clf_cal = joblib.load('LR_model_cal.pkl')

In [12]:
# load some data and prepare it for the model
df = pd.read_csv("Xdata_non.csv")
df = df.iloc[:,1:]
df = df.loc[:, ['Age', 'SexDSC', 'GCS', 'PaO2_FiO2',
       'mech_vent_cpap', 'Total_Bilirubin_mg_per_dL', 'Platelets_10_3_per_ml',
       'Creatinine_mg_per_dL', 'Urine_output_mL', 'MAP', 'Dobutamine_any_dose',
       'Dopamine_μg_kg_min', 'Epinephrine_μg_kg_min',
       'Norepinephrine_μg_kg_min']]

isnan_df = np.isnan(df)

# concatenate the arrays to have pairs of cols to show which are missing.
X = pd.DataFrame(np.concatenate((df.loc[:, ['Age', 'SexDSC', 'GCS', 'PaO2_FiO2',
       'mech_vent_cpap', 'Total_Bilirubin_mg_per_dL', 'Platelets_10_3_per_ml',
       'Creatinine_mg_per_dL', 'Urine_output_mL', 'MAP', 'Dobutamine_any_dose',
       'Dopamine_μg_kg_min', 'Epinephrine_μg_kg_min',
       'Norepinephrine_μg_kg_min']].to_numpy(), isnan_df[["GCS", "PaO2_FiO2", "Total_Bilirubin_mg_per_dL", "Platelets_10_3_per_ml", "Creatinine_mg_per_dL", "Urine_output_mL", "MAP"]].to_numpy()), axis = 1))
X = X.fillna(0)
x = X.iloc[:,0]

# normalize columns of X
X_norm = X.apply(zscore)
X_norm = np.array(X_norm,dtype='float')

In [13]:
# function that prepares the data for calibration of the model
def prepareDataForCalibration(pred):
    z1 = np.zeros_like(pred) 
    z2 = np.zeros_like(pred)
    z3 = np.zeros_like(pred)
    z4 = np.zeros_like(pred)
    z5 = np.zeros_like(pred)

    logits = np.log(pred/(1-pred))
    m_train = np.mean(logits)
    sd_train = np.std(logits) 
    logits = (logits - m_train) / sd_train

    mask1 = pred<0.2
    mask2 = (pred>0.2)*(pred<0.4)
    mask3 = (pred>0.4)*(pred<0.6)
    mask4 = (pred>0.6)*(pred<0.8)
    mask5 = pred>0.8

    z1[mask1] = logits[mask1]  
    z2[mask2] = logits[mask2]
    z3[mask3] = logits[mask3]  
    z4[mask4] = logits[mask4]
    z5[mask5] = logits[mask5]
    
    z = np.concatenate([z1[:,None], z2[:,None], z3[:,None], z4[:,None], z5[:,None]], axis=1) 
    
    return logits, z

In [14]:
# run the 2-stage model to get predictions
pred1 = clf0.predict_proba(X_norm)[::,1]
logits, z = prepareDataForCalibration(pred1)
pred2 = clf_cal.predict_proba(z)
# note: output has 2 columns. col1 is prob(death), col2 is prob(survive)