In [1]:
# tools
import numpy as np
import pandas as pd
from xgboost import XGBClassifier
from matplotlib import pyplot as plt
from numpy import sqrt
from numpy import argmax
import joblib

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_curve
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
import sklearn.metrics as metrics
from sklearn.metrics import plot_roc_curve
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import confusion_matrix
from sklearn.utils import resample
from pickle import load

# version check
import sys
print("python version")
print(sys.version)
import sklearn
print("sklearn version = ", sklearn.__version__)
print("numpy version = ", np.__version__)
print("pandas version =", pd.__version__)

python version
3.8.5 (default, Sep  4 2020, 07:30:14) 
[GCC 7.3.0]
sklearn version =  0.23.2
numpy version =  1.19.2
pandas version = 1.2.3


In [2]:
# load UA5(60)model and scaler, sample data

## xgb models
model60 = joblib.load('ua60_model')

## scaler
scaler60 = joblib.load('scaler60.pkl')

## sample data
sample60_non = pd.read_csv('sample60_non.csv')
sample60_pro = pd.read_csv('sample60_pro.csv')

In [3]:
# sample data preview
sample60_non.head()

Unnamed: 0,eGFR_ab,eGFR_ckd,male,age,he_uph,he_unitr,he_usg,he_upro,he_uglu,he_uket,he_ubil,he_ubld,he_uro
0,0,90.4929,1,45,6.5,0,1.025,0,0,0,0,1,0
1,0,89.95331,0,44,7.5,0,1.01,0,0,0,0,3,0
2,0,100.4339,1,63,6.5,0,1.02,0,0,0,0,0,0
3,0,73.21807,1,30,6.5,0,1.025,1,0,0,0,0,0
4,0,107.2089,0,42,5.5,0,1.03,1,0,1,0,1,0


In [4]:
# sample cases from sample data
sample_case0 = sample60_non.iloc[[2],:]
sample_case1 = sample60_non.iloc[[4],:]
sample_case2 = sample60_pro.iloc[[5],:]
sample_case3 = sample60_pro.iloc[[6],:]
sample_case4 = sample60_pro.iloc[[12],:]

In [5]:
# custom def : standardization and prediction
def model_prediction(
    sample_case,
    scaler,
    model
):
 
    """
    UA5 type model
    he_usg = Urine specific gravity
    he_uph = Urine pH
    he_ubld = Urine blood
    he_uglu = Urine glucose
    he_upro = Urine protein
    """
    
    # print UA5 and eGFR from input case
    print("Urine specific gravity :", sample_case['he_usg'].item())
    print("Urine pH :", sample_case['he_uph'].item())
    print("Urine blood :", sample_case['he_ubld'].item())
    print("Urine glucose :", sample_case['he_uglu'].item())
    print("Urine protein :", sample_case['he_upro'].item())
    print("real eGFR :", sample_case['eGFR_ckd'].item())
    
    # standardization columns
    std_cols=['age','he_uph','he_usg']    
    # feature extraction from input data UA 
    sample_case_features = sample_case.loc[:,['male', 'he_usg', 'he_uph', 'he_ubld', 'he_uglu', 'he_upro', 'age']]
    sample_case_features[std_cols] = scaler.transform(sample_case_features[std_cols])
    
    # predict probability by model
    prob = model.predict_proba(sample_case_features)[:,1]
    
    # applying different threshold according to proteinuria
    # set threshold by proteinuria
    threshold_nonproteinuria = 0.44
    threshold_proteinuria = 0.77
    
    if sample_case_features['he_upro'].item()>1 :
        pred = prob > threshold_nonproteinuria
        if pred == True :
            print("prediction : eGFR<60, abnormal")
        else:
            print("prediction : eGFR>=60, normal")
    else:
        pred = prob > threshold_proteinuria
        if pred == True :
            print("prediction : eGFR<60, abnormal")
        else:
            print("prediction : eGFR>=60, normal")
            
    return prob

In [6]:
# sample0 test
model_prediction(sample_case0, scaler60, model60)

Urine specific gravity : 1.02
Urine pH : 6.5
Urine blood : 0
Urine glucose : 0
Urine protein : 0
real eGFR : 100.4339
prediction : eGFR>=60, normal


array([0.43377733], dtype=float32)

In [7]:
# sample1 test
model_prediction(sample_case1, scaler60, model60)

Urine specific gravity : 1.03
Urine pH : 5.5
Urine blood : 1
Urine glucose : 0
Urine protein : 1
real eGFR : 107.2089
prediction : eGFR>=60, normal


array([0.10492791], dtype=float32)

In [8]:
# sample2 test
model_prediction(sample_case2, scaler60, model60)

Urine specific gravity : 1.03
Urine pH : 6.5
Urine blood : 0
Urine glucose : 0
Urine protein : 2
real eGFR : 124.442
prediction : eGFR>=60, normal


array([0.07031757], dtype=float32)

In [9]:
# sample3 test
model_prediction(sample_case3, scaler60, model60)

Urine specific gravity : 1.01
Urine pH : 6.0
Urine blood : 3
Urine glucose : 3
Urine protein : 2
real eGFR : 7.545275
prediction : eGFR<60, abnormal


array([0.95431], dtype=float32)

In [10]:
# sample4 test
model_prediction(sample_case4, scaler60, model60)

Urine specific gravity : 1.01
Urine pH : 6.0
Urine blood : 2
Urine glucose : 2
Urine protein : 3
real eGFR : 34.98301
prediction : eGFR<60, abnormal


array([0.94983214], dtype=float32)