In [1]:
import numpy as np
# import matplotlib.pyplot as plt
import pandas as pd
import xgboost as xgb
import lightgbm as lgb
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
import joblib

In [2]:
''' Please define the required variables below '''


''' To replace the value with your own indicator in each item below '''

user_indicators = {'Sex': [0], # Patient's biological gender: 0 for Female and 1 for Male
                   'WC': [80], # Waist circumference (cm)
                   'Age': [45], # Patient's age (year)
                   'FBS': [5], # Fasting blood glucose level (mmol/L)
                   'TG': [1.0], # Triglycerides (mmol/L)
                   'TCH': [4.5], # Total cholesterol (mmol/L)
                   'LDL': [2.7], # Low-density lipoprotein cholesterol (mmol/L)
                   'HDL': [1.6], # High-density lipoprotein cholesterol (mmol/L)
                   'UA': [300], # Uric acid (umol/L)
                   'ALT': [28], # Alanine aminotransferase (U/L)
                   'AST': [16], # Aspartate aminotransferase (U/L)
                   'GGT': [20], # Gamma-glutamyl transpeptidase (U/L)
                   'Cr': [66], # Creatine (umol/L)
                   'BMI': [20.8], # Body mass index (kg/m2) = Body weight (kg) / Body height (m) / Body height (m)
                   'MAP': [93.3] # Mean arterial pressure (mmHg) = [Systolic BP + (2 × Diastolic BP)]/3
}

''' To select a model. 

Options: XGB, LGB, LR, RF

Default: XGB

'''
model_name = 'XGB' 

In [3]:
''' Just run everything below without any change '''

thresholds = {'LGB': 0.5565,
              'XGB': 0.579,
              'RF': 0.550,
              'LR': 0.49,   
}

In [4]:
df = pd.DataFrame.from_dict(user_indicators)
df = df.add_suffix('_bsl')

log_trans = ['FBS_bsl', 'TG_bsl', 'TCH_bsl', 'LDL_bsl', 'HDL_bsl', 'UA_bsl', 'ALT_bsl', 'AST_bsl', 'GGT_bsl', 'Cr_bsl', 'BMI_bsl', 'MAP_bsl']
for col in log_trans:
    df[col + '_log'] = np.log10(df[col])
df.drop(log_trans, axis=1, inplace=True)


In [5]:
clf = joblib.load(r'Pre-trained models\{}.pkl'.format(model_name))
threshold = thresholds[model_name]

risk = clf.predict_proba(df)[0,1]

if risk>=threshold:
    print('The 3-year risk of thyroid nodule is estimated to be: {:.1%}\nRecommendation of thyroid ultrasound for this patient.'.format(risk))
else:
    print('The 3-year risk of thyroid nodule is estimated to be: {:.1%}\nThyroid ultrasound may not be necessary at the moment.'.format(risk))

The 3-year risk of thyroid nodule is estimated to be: 65.3%
Recommendation of thyroid ultrasound for this patient.
