In [1]:
# 导入常用包
import xgboost as xgb
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.feature_selection import SelectFromModel

In [2]:
# 数据集
cancer = datasets.load_breast_cancer()
X = cancer.data
Y = cancer.target

In [3]:
# 数据集的情况
# X.shape
# Y.shape
# X, Y

In [4]:
# 拆分训练集、测试集
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 1/5., random_state = 8)

In [5]:
xgb_train = xgb.DMatrix(X_train, label = Y_train)
xgb_test  = xgb.DMatrix(X_test,  label = Y_test)

In [6]:
params = {
    "objective": "binary:logistic",
    "booster": "gbtree",
    "eta": 0.1,
    "max_depth": 2
}

In [7]:
num_round = 30

In [8]:
watchlist = [(xgb_test, 'eval'), (xgb_train, 'train')]

In [9]:
bst1 = xgb.train(params, xgb_train, num_round, watchlist)

[0]	eval-error:0.10526	train-error:0.04835
[1]	eval-error:0.08772	train-error:0.04835
[2]	eval-error:0.08772	train-error:0.04615
[3]	eval-error:0.07895	train-error:0.04176
[4]	eval-error:0.04386	train-error:0.03956
[5]	eval-error:0.04386	train-error:0.03517
[6]	eval-error:0.03509	train-error:0.03077
[7]	eval-error:0.03509	train-error:0.02857
[8]	eval-error:0.03509	train-error:0.03077
[9]	eval-error:0.03509	train-error:0.02857
[10]	eval-error:0.03509	train-error:0.02857
[11]	eval-error:0.03509	train-error:0.02857
[12]	eval-error:0.03509	train-error:0.02637
[13]	eval-error:0.03509	train-error:0.02418
[14]	eval-error:0.03509	train-error:0.02418
[15]	eval-error:0.03509	train-error:0.02418
[16]	eval-error:0.02632	train-error:0.02198
[17]	eval-error:0.04386	train-error:0.02418
[18]	eval-error:0.03509	train-error:0.01978
[19]	eval-error:0.03509	train-error:0.01978
[20]	eval-error:0.03509	train-error:0.01758
[21]	eval-error:0.03509	train-error:0.01758
[22]	eval-error:0.03509	train-error:0.0109

In [10]:
def logregobj(preds, dtrain):
    labels = dtrain.get_label()
    preds = 1.0 / (1.0 + np.exp(-preds))
    grad = preds - labels
    hess = preds * (1.0 - preds)
    return grad, hess

In [11]:
def evalerror(preds, dtrain):
    labels = dtrain.get_label()
    preds = 1.0 / (1.0 + np.exp(-preds))
    return 'error', float(sum(labels != (preds > 0.5))) / len(labels)

In [12]:
params = {
    'objective': 'reg:logistic',
    "booster": "gbtree",
    "eta": 0.1,
    "max_depth": 2
}

In [13]:
bst2 = xgb.train(params, xgb_train, num_round, watchlist, obj = logregobj, feval = evalerror)

[0]	eval-rmse:0.46382	train-rmse:0.45993	eval-error:0.10526	train-error:0.04835
[1]	eval-rmse:0.42722	train-rmse:0.42529	eval-error:0.08772	train-error:0.04835
[2]	eval-rmse:0.39949	train-rmse:0.39349	eval-error:0.08772	train-error:0.04615
[3]	eval-rmse:0.37009	train-rmse:0.36511	eval-error:0.07895	train-error:0.04176
[4]	eval-rmse:0.34606	train-rmse:0.33943	eval-error:0.04386	train-error:0.03956
[5]	eval-rmse:0.32340	train-rmse:0.31664	eval-error:0.04386	train-error:0.03517
[6]	eval-rmse:0.30335	train-rmse:0.29625	eval-error:0.03509	train-error:0.03077
[7]	eval-rmse:0.28509	train-rmse:0.27919	eval-error:0.03509	train-error:0.02857
[8]	eval-rmse:0.27465	train-rmse:0.26329	eval-error:0.03509	train-error:0.03077
[9]	eval-rmse:0.26197	train-rmse:0.24898	eval-error:0.03509	train-error:0.02857
[10]	eval-rmse:0.25492	train-rmse:0.23679	eval-error:0.03509	train-error:0.02857
[11]	eval-rmse:0.24715	train-rmse:0.22515	eval-error:0.03509	train-error:0.02857
[12]	eval-rmse:0.23789	train-rmse:0.21

In [14]:
bst1.predict(xgb_test)#, output_margin = True)

array([0.9644884 , 0.9376393 , 0.9619146 , 0.95229685, 0.96051735,
       0.9644884 , 0.64606786, 0.47605127, 0.958065  , 0.9644884 ,
       0.95229685, 0.9562215 , 0.9619146 , 0.03260273, 0.04494493,
       0.9591457 , 0.91186637, 0.03621832, 0.69647205, 0.03260273,
       0.79840696, 0.1766003 , 0.73952323, 0.9619146 , 0.9608883 ,
       0.33771825, 0.05597691, 0.03260273, 0.95637906, 0.9404159 ,
       0.9644884 , 0.9494759 , 0.06781618, 0.03260273, 0.04011195,
       0.94670725, 0.04509089, 0.04494493, 0.74305564, 0.03260273,
       0.9644884 , 0.03260273, 0.08348227, 0.95637906, 0.6900638 ,
       0.9619146 , 0.8981643 , 0.06372928, 0.03621832, 0.03260273,
       0.7227807 , 0.07643224, 0.9431629 , 0.10546214, 0.9431629 ,
       0.03260273, 0.03260273, 0.9644884 , 0.9644884 , 0.9644884 ,
       0.03621832, 0.05699695, 0.9608883 , 0.8236815 , 0.9532413 ,
       0.9591457 , 0.9644884 , 0.10198193, 0.95229685, 0.9061715 ,
       0.08006211, 0.9187825 , 0.03260273, 0.9644884 , 0.03260

In [15]:
bst2.predict(xgb_test)

array([0.9644884 , 0.9376393 , 0.9619146 , 0.95229685, 0.96051735,
       0.9644884 , 0.64606786, 0.47605127, 0.958065  , 0.9644884 ,
       0.95229685, 0.9562215 , 0.9619146 , 0.03260273, 0.04494493,
       0.9591457 , 0.91186637, 0.03621832, 0.69647205, 0.03260273,
       0.79840696, 0.1766003 , 0.73952323, 0.9619146 , 0.9608883 ,
       0.33771825, 0.05597691, 0.03260273, 0.95637906, 0.9404159 ,
       0.9644884 , 0.9494759 , 0.06781618, 0.03260273, 0.04011195,
       0.94670725, 0.04509089, 0.04494493, 0.74305564, 0.03260273,
       0.9644884 , 0.03260273, 0.08348227, 0.95637906, 0.6900638 ,
       0.9619146 , 0.8981643 , 0.06372928, 0.03621832, 0.03260273,
       0.7227807 , 0.07643224, 0.9431629 , 0.10546214, 0.9431629 ,
       0.03260273, 0.03260273, 0.9644884 , 0.9644884 , 0.9644884 ,
       0.03621832, 0.05699695, 0.9608883 , 0.8236815 , 0.9532413 ,
       0.9591457 , 0.9644884 , 0.10198193, 0.95229685, 0.9061715 ,
       0.08006211, 0.9187825 , 0.03260273, 0.9644884 , 0.03260