In [1]:
import logging

logging.basicConfig(level = logging.INFO)

from pycaret.classification import setup, create_model, tune_model
import pandas as pd

## 载入数据

In [2]:
data = pd.read_csv('../data/raw/patient_data.csv')
data.drop('INPATIENT_ID', axis = 1, inplace = True)

numeric_cols = [
    'AGE', 'ISS', 'CAPRINI_SCORE', 'T', 'P', 'R', 'MBP', 'SHOCK_INDEX', 'HEIGHT', 'WEIGHT',
    'BMI', 'RBC', 'HGB', 'PLT', 'WBC', 'ALB', 'CRE', 'UA', 'AST', 'ALT', 'GLU', 'TG', 'CHO',
    'CA', 'MG', 'LDL', 'NA', 'K', 'CL', 'GFR', 'PT', 'FIB', 'DD', 'CK', 'INR'
]

In [5]:
# 创建模型和训练调参.
# ID          Name
# --------    ----------
# 'lr'        Logistic Regression
# 'knn'       K Nearest Neighbour
# 'nb'        Naive Bayes
# 'dt'        Decision Tree Classifier
# 'svm'       SVM - Linear Kernel
# 'rbfsvm'    SVM - Radial Kernel
# 'gpc'       Gaussian Process Classifier
# 'mlp'       Multi Level Perceptron
# 'ridge'     Ridge Classifier
# 'rf'        Random Forest Classifier
# 'qda'       Quadratic Discriminant Analysis
# 'ada'       Ada Boost Classifier
# 'gbc'       Gradient Boosting Classifier
# 'lda'       Linear Discriminant Analysis
# 'et'        Extra Trees Classifier
# 'xgboost'   Extreme Gradient Boosting
# 'lightgbm'  Light Gradient Boosting
# 'catboost'  CatBoost Classifier
setup(data, target = 'VTE', numeric_features = numeric_cols)
clf = create_model('rf')

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.7656,0.6864,0.0714,0.3333,0.1176,0.0438,0.0615
1,0.7969,0.7264,0.2143,0.6,0.3158,0.2268,0.2685
2,0.8254,0.7715,0.2308,0.75,0.3529,0.2834,0.3498
3,0.7778,0.6931,0.1538,0.4,0.2222,0.1215,0.1405
4,0.8571,0.8438,0.3077,1.0,0.4706,0.4137,0.5106
5,0.7619,0.7485,0.1538,0.3333,0.2105,0.0922,0.1018
6,0.7937,0.6223,0.1538,0.5,0.2353,0.1531,0.1889
7,0.7937,0.6844,0.0714,1.0,0.1333,0.1069,0.2376
8,0.7778,0.6538,0.0714,0.5,0.125,0.0735,0.121
9,0.7619,0.7267,0.0,0.0,0.0,-0.0305,-0.0679


INFO:logs:create_model_container: 1
INFO:logs:master_model_container: 1
INFO:logs:display_container: 1
INFO:logs:RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=-1,
                       oob_score=False, random_state=1627, verbose=0,
                       warm_start=False)
INFO:logs:create_model() succesfully completed......................................


In [6]:
clf_tuned = tune_model(clf, optimize = 'F1')

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.8281,0.6857,0.2857,0.8,0.4211,0.3457,0.4093
1,0.75,0.6671,0.1429,0.3333,0.2,0.0791,0.0891
2,0.8413,0.8985,0.2308,1.0,0.375,0.3226,0.4385
3,0.8095,0.7854,0.2308,0.6,0.3333,0.247,0.2856
4,0.7937,0.85,0.2308,0.5,0.3158,0.2133,0.2354
5,0.7778,0.7962,0.0769,0.3333,0.125,0.0516,0.0702
6,0.8095,0.6438,0.2308,0.6,0.3333,0.247,0.2856
7,0.8254,0.7274,0.2857,0.8,0.4211,0.3444,0.408
8,0.8095,0.7478,0.1429,1.0,0.25,0.2059,0.3388
9,0.8413,0.6706,0.2857,1.0,0.4444,0.3836,0.4871


INFO:logs:create_model_container: 2
INFO:logs:master_model_container: 2
INFO:logs:display_container: 2
INFO:logs:RandomForestClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=30, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=50, n_jobs=-1,
                       oob_score=False, random_state=1627, verbose=0,
                       warm_start=False)
INFO:logs:tune_model() succesfully completed......................................
