In [1]:
# 导入常用包
import xgboost as xgb
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.feature_selection import SelectFromModel

In [2]:
# 数据集
cancer = datasets.load_breast_cancer()
X = cancer.data
Y = cancer.target

In [3]:
# 数据集的情况
# X.shape
# Y.shape
# X, Y

In [4]:
# 拆分训练集、测试集
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 1/5., random_state = 8)


In [5]:
xgb_train = xgb.DMatrix(X_train, label = Y_train)
xgb_test  = xgb.DMatrix(X_test,  label = Y_test)

In [6]:
params = {"objective": "binary:logistic",
          "booster": "gbtree",
          "eta": 0.1,
          "max_depth": 5
         }

In [7]:
num_round = 50

In [8]:
watchlist = [(xgb_test, 'eval'), (xgb_train, 'train')]

In [9]:
bst = xgb.train(params, xgb_train, num_boost_round = 20, evals = watchlist)

[0]	eval-error:0.03509	train-error:0.02418
[1]	eval-error:0.05263	train-error:0.01538
[2]	eval-error:0.07018	train-error:0.01319
[3]	eval-error:0.07895	train-error:0.00440
[4]	eval-error:0.07895	train-error:0.00440
[5]	eval-error:0.04386	train-error:0.00440
[6]	eval-error:0.05263	train-error:0.00440
[7]	eval-error:0.06140	train-error:0.00440
[8]	eval-error:0.05263	train-error:0.00440
[9]	eval-error:0.04386	train-error:0.00440
[10]	eval-error:0.04386	train-error:0.00440
[11]	eval-error:0.04386	train-error:0.00440
[12]	eval-error:0.04386	train-error:0.00440
[13]	eval-error:0.04386	train-error:0.00440
[14]	eval-error:0.03509	train-error:0.00440
[15]	eval-error:0.03509	train-error:0.00440
[16]	eval-error:0.03509	train-error:0.00440
[17]	eval-error:0.04386	train-error:0.00440
[18]	eval-error:0.04386	train-error:0.00440
[19]	eval-error:0.04386	train-error:0.00440


In [10]:
# output_margin 参数设为 True，表示最终输出的预测值为未进行 sigmoid 转化的原始值
pred_test = bst.predict(xgb_test, output_margin = True)
pred_test_sigmoid = bst.predict(xgb_test)

In [11]:
# 将 原始值进行 sigmoid 转化
1.0 / (1.0 + np.exp(-pred_test[:30]))

array([0.9303397 , 0.9119321 , 0.9303397 , 0.8942841 , 0.92795146,
       0.9303397 , 0.4654373 , 0.46111533, 0.9303397 , 0.9303397 ,
       0.91980803, 0.9303397 , 0.9303397 , 0.07041781, 0.07041781,
       0.9303397 , 0.89680326, 0.07595936, 0.5600603 , 0.07041781,
       0.72333413, 0.19820794, 0.72285205, 0.9303397 , 0.9303397 ,
       0.3644025 , 0.07041781, 0.07041781, 0.9303397 , 0.88139117],
      dtype=float32)

In [12]:
# 和原始值进行 simoid 转化的值进行逐一比较
pred_test_sigmoid[:30]

array([0.9303397 , 0.9119321 , 0.9303397 , 0.8942841 , 0.92795146,
       0.9303397 , 0.4654373 , 0.46111533, 0.9303397 , 0.9303397 ,
       0.91980803, 0.9303397 , 0.9303397 , 0.07041781, 0.07041781,
       0.9303397 , 0.89680326, 0.07595936, 0.5600603 , 0.07041781,
       0.72333413, 0.19820794, 0.72285205, 0.9303397 , 0.9303397 ,
       0.3644025 , 0.07041781, 0.07041781, 0.9303397 , 0.88139117],
      dtype=float32)