In [22]:
# basis package
import os
import pandas as pd
import numpy as np
# model package
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV
from sklearn import metrics

## 函数
### 预处理

In [23]:
def Data_preprocessing(df):
        # Move label to first column
        cols = list(df)
        cols.insert(0, cols.pop(cols.index('label')))
        df = df.loc[:, cols]

        #split train data and test data
        X = df.iloc[:, 1:]
        y = df.iloc[:, 0]
        X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,test_size=0.2)

        return X,y,X_train, X_test, y_train, y_test

### auto_adjustment

In [24]:
# cv select best num_boost_round(xg)
def get_num_rounds(dtrain, params, useTrainCV=True, cv_folds=5, early_stopping_rounds=50):
    if useTrainCV:
        cvresult = xgb.cv(params, dtrain, num_boost_round=params['n_estimators'], nfold=5,
                          metrics='auc', early_stopping_rounds=early_stopping_rounds, verbose_eval=True)
        params['n_estimators'] = cvresult.shape[0]
    num_rounds = params['n_estimators']
    print(num_rounds)
    return params,num_rounds

In [25]:
# Set default parameters or convert parameters from xgb format to xgb_sk format
def set_params(gsearch, X_train, y_train, n_estimators=100):   
    # Set default parameters
    if gsearch == None:
        value_label = pd.value_counts(y_train)
        # false/ture
        F = list(value_label)[0]
        T = list(value_label)[1]
        F_T = F/T
        params = {
            'objective': 'binary:logistic',  # 学习目标：二元分类的逻辑回归，输出概率
            'colsample_bytree': 0.8,  # 子采样率
            'eta': 0.3,  # 学习速率
            'max_depth': 9,  # 最大深度9
            'n_estimators': n_estimators,  # 最大迭代次数
            'scale_pos_weight': F_T,  # 正负权重平衡
            'max_delta_step': 0,  # 子叶输出最大步长
            'subsample': 0.8,  # 训练实例的子样本比率
            'gamma': 0.0,  # 节点分裂所需的最小损失函数下降值
            'min_child_weight': 3,  # 决定最小叶子节点样本权重和
            'nthread': 4,  # 线程数
            'alpha': 1e-05,  # L1正则化速率
            'lambda': 1e-05  # L2正则化速率

        }
    # convert parameters from xgb format to xgb_sk format
    else:
            # convert parameters from xgb format to xgb_sk format function
        def get_sk_params(gsearch):
            params = gsearch.best_estimator_.get_params()
            params_sk = {
                'objective': 'binary:logistic',  # 学习目标：二元分类的逻辑回归，输出概率
                'colsample_bytree': 0.55,  # 子采样率
                'eta': 0.3,  # 学习速率
                'max_depth': 9,  # 最大深度9
                'n_estimators': 1000,  # 最大迭代次数
                'scale_pos_weight': 1,  # 正负权重平衡
                'max_delta_step': 0,  # 子叶输出最大步长
                'subsample': 0.74,  # 训练实例的子样本比率
                'gamma': 0.0,  # 节点分裂所需的最小损失函数下降值
                'min_child_weight': 3,  # 决定最小叶子节点样本权重和
                'nthread': 4,  # 线程数
                'alpha': 1e-05,  # L1正则化速率
                'lambda': 1e-05  # L2正则化速率
            }
            params["alpha"] = params.pop("reg_alpha")
            params["lambda"] = params.pop("reg_lambda")
            params_list = list(params)
            for param in params_list:
                if param not in params_sk:
                    params.pop(param)
            return params
        params = get_sk_params(gsearch)
    # set model
    dtrain = xgb.DMatrix(X_train, y_train)
    # dtest = xgb.DMatrix(X_test, y_test)
    num_rounds = params['n_estimators']
    return params, dtrain, num_rounds

In [26]:
# xgb_gsearch
def xgb_gsearch(X_train, y_train, num_rounds):
    #get scale_pos_weight
    value_label = pd.value_counts(y_train)
    # false/ture
    F = list (value_label)[0]
    T = list (value_label)[1]
    F_T = F/T
    #max_depth and min_child_weight
    param_test = {'max_depth': range(5, 7, 1),  # 最大深度
                  'min_child_weight': range(0, 10, 1)  # 决定最小叶子节点样本权重和
                  }
    estimator = xgb.XGBClassifier(
        objective='binary:logistic',  # 学习目标：二元分类的逻辑回归，输出概率
        colsample_bytree=0.9,  # 子采样率
        eta=0.3,  # 学习速率
        n_estimators=num_rounds,  # 最大迭代次数
        scale_pos_weight=F_T,  # 正负权重平衡
        max_delta_step=0,  # 子叶输出最大步长
        subsample=0.9,  # 训练实例的子样本比率
        gamma=1.6,  # 节点分裂所需的最小损失函数下降值
        nthread=4,  # 线程速率
        reg_lambda=0.1,  # L2正则化速率
        reg_alpha=1e-5,  # L1正则化速率
        max_depth=6,  # 最大深度
        min_child_weight=3  # 决定最小叶子节点样本权重和
    )
    gsearch = GridSearchCV(estimator, param_grid=param_test,
                           n_jobs=4, scoring='roc_auc', cv=5)
    gsearch.fit(X_train, y_train)

    # gamma
    param_test = {'gamma': [i/10 for i in range(0, 30, 1)],  # 节点分裂所需的最小损失函数下降值
                  }
    estimator = gsearch.best_estimator_
    gsearch = GridSearchCV(estimator, param_grid=param_test,
                           n_jobs=4, scoring='roc_auc', cv=3)
    gsearch.fit(X_train, y_train)

    #colsample_bytree and subsample
    param_test = {'colsample_bytree': [i/10.0 for i in range(6, 10, 1)],  # 子采样率
                  'subsample': [i/10.0 for i in range(6, 10, 1)]  # 训练实例的子样本比率
                  }
    estimator = gsearch.best_estimator_
    gsearch = GridSearchCV(estimator, param_grid=param_test,
                           n_jobs=4, scoring='roc_auc', cv=3)
    gsearch.fit(X_train, y_train)
    best_parameters = gsearch.best_estimator_.get_params()

    best_param = []
    for param_name in sorted(param_test.keys()):
        best_param.append([param_name, best_parameters[param_name]])
    p1 = best_param[0][1]
    p2 = best_param[1][1]
    max_edge1 = int(p1*100+5)
    min_edge1 = int(p1*100-5)
    max_edge2 = int(p2*100+5)
    min_edge2 = int(p2*100-5)
    param_test = {'colsample_bytree': [i/100.0 for i in range(min_edge1, max_edge1, 1)],  # 子采样率
                  # 训练实例的子样本比率
                  'subsample': [i/100.0 for i in range(min_edge2, max_edge2, 1)]
                  }
    estimator = gsearch.best_estimator_
    gsearch = GridSearchCV(estimator, param_grid=param_test,
                           n_jobs=4, scoring='roc_auc', cv=3)
    gsearch.fit(X_train, y_train)

    #reg_alpha and reg_lambda
    param_test = {'reg_alpha': [1e-5, 1e-2, 0.1, 1, 100],  # L1正则化速率
                  'reg_lambda': [1e-5, 1e-2, 0.1, 1, 100],  # L2正则化速率
                  }
    estimator = gsearch.best_estimator_
    gsearch = GridSearchCV(estimator, param_grid=param_test,
                           n_jobs=4, scoring='roc_auc', cv=3)
    gsearch.fit(X_train, y_train)

    # max_delta_step
    param_test = {'max_delta_step': range(0, 15)  # 子叶输出最大步长
                  }
    estimator = gsearch.best_estimator_
    gsearch = GridSearchCV(estimator, param_grid=param_test,
                           n_jobs=4, scoring='roc_auc', cv=3)
    gsearch.fit(X_train, y_train)

    # eta
    param_test = {'eta': [i/10.0 for i in range(1, 10)],  # 学习速率
                  }
    estimator = gsearch.best_estimator_
    gsearch = GridSearchCV(estimator, param_grid=param_test,
                           n_jobs=4, scoring='roc_auc', cv=3)
    gsearch.fit(X_train, y_train)
    return gsearch

### score

In [27]:
# accuracy and auc
def score(model,X_data,y_data):
    dtest = xgb.DMatrix(X_data)
    preds =  model.predict(dtest)
    predictions = [round(value) for value in preds]
    accuracy = accuracy_score(y_data, predictions)
    print("Accuracy: %.2f%%" % (accuracy * 100.0))
    roc_auc = roc_auc_score(y_data, predictions)
    print("auc: %.2f%%" % (roc_auc * 100.0))

## example

In [28]:
from sklearn.datasets import load_breast_cancer

In [29]:
breast_cancer = load_breast_cancer()

In [30]:
df_label = pd.DataFrame({'label': breast_cancer.target})

df_feature = pd.DataFrame(breast_cancer.data, columns=breast_cancer.feature_names)

df = pd.concat([df_label, df_feature], axis=1)
X, y, X_train, X_test, y_train, y_test = Data_preprocessing(df)

### cv

In [31]:
#Automatically set default parameters
params,dtrain,num_rounds = set_params(None,X_train,y_train,200)

In [32]:
#get best num_rounds
params,num_rounds = get_num_rounds(dtrain,params,useTrainCV = True,early_stopping_rounds = 50)

[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 0 pruned nodes, max_depth=5
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 0 pruned nodes, max_depth=5
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=5
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 0 pruned nodes, max_depth=4
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=3
[0]	train-auc:0.989137+0.00254124	test-auc:0.953947+0.0170559
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 0 pruned nodes, max_depth=8
[17:59:47] C:\Users\Administrato

[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 6 extra nodes, 0 pruned nodes, max_depth=3
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 6 extra nodes, 0 pruned nodes, max_depth=2
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 6 extra nodes, 0 pruned nodes, max_depth=2
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 6 extra nodes, 0 pruned nodes, max_depth=3
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 6 extra nodes, 0 pruned nodes, max_depth=2
[11]	train-auc:0.999217+0.00034382	test-auc:0.988226+0.00823166
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 4 extra nodes, 0 pruned nodes, max_depth=2
[17:59:47] C:\Users\Administrator\De

[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 0 pruned nodes, max_depth=1
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 0 pruned nodes, max_depth=1
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 0 pruned nodes, max_depth=1
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 0 pruned nodes, max_depth=1
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 0 pruned nodes, max_depth=1
[22]	train-auc:0.99964+0.000229263	test-auc:0.987813+0.00873966
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 0 pruned nodes, max_depth=1
[17:59:47] C:\Users\Administrator\De

[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 0 pruned nodes, max_depth=1
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 0 pruned nodes, max_depth=1
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 0 pruned nodes, max_depth=1
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 0 pruned nodes, max_depth=1
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 0 pruned nodes, max_depth=1
[33]	train-auc:0.999826+0.000177355	test-auc:0.98792+0.00886879
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 0 pruned nodes, max_depth=1
[17:59:47] C:\Users\Administrator\De

[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 0 pruned nodes, max_depth=1
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 0 pruned nodes, max_depth=1
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[44]	train-auc:0.999826+0.000175067	test-auc:0.988215+0.00836196
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\D

[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 0 pruned nodes, max_depth=1
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 0 pruned nodes, max_depth=1
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[55]	train-auc:0.999871+0.000198687	test-auc:0.989165+0.00741112
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 0 pruned nodes, max_depth=1
[17:59:47] C:\Users\Administrator\D

[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 0 pruned nodes, max_depth=1
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 0 pruned nodes, max_depth=1
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[66]	train-auc:0.999884+0.00017038	test-auc:0.988853+0.00709856
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\De

[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 0 pruned nodes, max_depth=1
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[77]	train-auc:0.999878+0.000182908	test-auc:0.989456+0.00710968
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\D

[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 0 pruned nodes, max_depth=1
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 0 pruned nodes, max_depth=1
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[88]	train-auc:0.999884+0.00017038	test-auc:0.989258+0.00703519
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\De

[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[98]	train-auc:0.999884+0.00017038	test-auc:0.989152+0.00719395
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[99]	train-auc:0.999884+0.00017038	t

[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[109]	train-auc:0.99989+0.000173978	test-auc:0.989049+0.0072557
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\De

[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 0 pruned nodes, max_depth=1
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 0 pruned nodes, max_depth=1
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[120]	train-auc:0.99989+0.000173978	test-auc:0.989049+0.0072557
[17:59:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[17:59:47] C:\Users\Administrator\De

In [33]:
print(num_rounds)

72


In [34]:
gsearch = xgb_gsearch(X_train,y_train,num_rounds)

In [35]:
params,dtrain,num_rounds = set_params(gsearch,X_train,y_train,num_rounds)

In [36]:
params

{'colsample_bytree': 0.67,
 'gamma': 0.2,
 'max_delta_step': 0,
 'max_depth': 5,
 'min_child_weight': 1,
 'n_estimators': 72,
 'nthread': 4,
 'objective': 'binary:logistic',
 'scale_pos_weight': 1.6764705882352942,
 'subsample': 0.56,
 'eta': 0.3,
 'alpha': 0.1,
 'lambda': 1e-05}

### train

In [37]:
model = xgb.train(params, dtrain, num_rounds)

[18:00:23] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 0 pruned nodes, max_depth=5
[18:00:23] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 0 pruned nodes, max_depth=5
[18:00:23] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 2 pruned nodes, max_depth=4
[18:00:23] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=4
[18:00:23] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 2 pruned nodes, max_depth=4
[18:00:23] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5
[18:00:23] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning e

In [38]:
# score train
score(model,X_train,y_train)

Accuracy: 100.00%
auc: 100.00%


In [39]:
# score test
score(model,X_test,y_test)

Accuracy: 96.49%
auc: 96.23%
