## xgboost主要调节参数

1. 其他参数
    * booster
    * n_jobs
    * objective
    * verbosity

2. 树调节参数
    * n_estimators
    * max_depth(重要程度高)
    * min_child_weight(重要程度高,与max_depth一起进行网格搜索)

2. 防止过拟合参数
    * gamma
    * learning_rate(Learning rate shrinks the contribution of each tree by learning_rate)
    * subsample
    * colsample_bytree
    * reg_alpha
    * reg_lambda

In [1]:
from xgboost import XGBClassifier  # XGBRegressor 同理
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np

In [2]:
X = datasets.fetch_covtype().data[:3000]
y = datasets.fetch_covtype().target[:3000]
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [3]:
X_train.shape  # 数据集有54个特征

(2250, 54)

In [4]:
np.unique(y_train)  # 7分类

array([1, 2, 3, 4, 5, 6, 7])

In [5]:
from sklearn.preprocessing import OrdinalEncoder

enc = OrdinalEncoder()
y_train_new = enc.fit_transform(y_train.reshape(-1, 1))
np.unique(y_train_new)

array([0., 1., 2., 3., 4., 5., 6.])

In [6]:
y_test_new = enc.transform(y_test.reshape(-1, 1))

In [7]:
# gbtree and dart use tree based models while gblinear uses linear functions.
booster = ['gbtree', 'gblinear', 'dart']

for i in booster:
    xg = XGBClassifier(n_jobs=-1,  # xgboost虽属于boosting算法,但也可进行并行计算
                       booster=i, use_label_encoder=False)  #  设置use_label_encoder=Fasle,和对标签进行标签编码避免警告

    xg.fit(X_train, y_train_new, eval_metric='mlogloss')
    if i == 'gblinear':  # 此时没有feature_importances_属性
        print("coef_", xg.coef_)  # Coefficients property(当前仅当booster='gblinear')
        print("intercept_", xg.intercept_)  # Intercept (bias) property(当前仅当booster='gblinear')
    print('booster=' + str(i) + ',  score=', xg.score(X_test, y_test_new))

booster=gbtree,  score= 0.8333333333333334
coef_ [[ 3.28349e-04 -1.06196e-04 -8.55410e-06 -1.95018e-04 -1.29448e-04
  -2.13075e-04  4.81626e-04 -2.03699e-03 -1.46347e-03  7.26749e-03
   5.12693e-03 -1.00380e-04  2.08064e-03 -5.39129e-04 -5.03684e-02
  -2.83966e-02  5.56726e-02 -6.23052e-02  1.17344e-02  5.47597e-02
  -6.96490e-02  1.67561e-03  2.12958e-03 -2.52775e-03 -3.34843e-02
  -4.34513e-03 -1.75680e-04 -2.24105e-03 -7.32984e-03 -8.69964e-04
   7.61959e-03  3.12086e-02  2.95582e-03  2.48401e-03  3.27052e-04
   2.97445e-04  1.32954e-04 -1.44724e-03  4.58231e-03 -5.42429e-04
  -7.81192e-04 -2.77627e-04 -2.93538e-03  5.19595e-05 -7.57307e-04
   8.70424e-03  4.81848e-03 -4.12414e-03  3.15295e-03 -1.58512e-03
  -8.39636e-05  2.97580e-03  2.66030e-03  2.01438e-03]
 [-3.29196e-03  4.62823e-03  1.22340e-03  3.17006e-03  3.04714e-03
  -2.17249e-02 -3.35930e-03  2.97621e-03 -1.95565e-03  9.86783e-05
   1.35306e-04 -4.28894e-03 -3.07738e-03 -1.52381e-04 -8.51760e-04
   2.04544e-04 -3.67355e-

In [8]:
# Verbosity of printing messages. Valid values are 0 (silent), 1 (warning), 2 (info), 3 (debug)
xg = XGBClassifier(n_jobs=-1, use_label_encoder=False)  # 默认verbosity=1
xg.fit(X_train, y_train_new, eval_metric='mlogloss')
print(xg.score(X_test, y_test_new))

  return f(*args, **kwargs)


0.8333333333333334


In [9]:
# Specify the learning task and the corresponding learning objective
# 可自定义
objective = ["reg:squarederror",  # L2损失
             "binary:logistic",  # logistic regression for binary classification, output probability
             "multi:softmax"]  #  set XGBoost to do multiclass classification using the softmax objective

for i in objective:
    '''
    objective : string, callable or None, optional (default=None)
            Specify the learning task and the corresponding learning objective or
            a custom objective function to be used (see note below).
            Default: 'regression' for LGBMRegressor, 'binary' or 'multiclass' for LGBMClassifier, 'lambdarank' for LGBMRanker.
    '''
    xgb = XGBClassifier(n_jobs=-1, use_label_encoder=False, objective=i)
    xgb.fit(X_train, y_train_new, eval_metric='mlogloss')
    print("objective=" + str(i) + ', score=', xgb.score(X_test, y_test_new))

objective=reg:squarederror, score= 0.8333333333333334
objective=binary:logistic, score= 0.8333333333333334
objective=multi:softmax, score= 0.8333333333333334


In [10]:
n_estimators = [10, 20, 50, 100, 200, 500]  # 默认n_estimators=100

for i in n_estimators:
    xg = XGBClassifier(n_jobs=-1, use_label_encoder=False,
                       n_estimators=i)  # Number of gradient boosted trees
    xg.fit(X_train, y_train_new, eval_metric='mlogloss')
    print('n_estimators=' + str(i) + ',  score=', xg.score(X_test, y_test_new))

n_estimators=10,  score= 0.812
n_estimators=20,  score= 0.8146666666666667
n_estimators=50,  score= 0.8346666666666667
n_estimators=100,  score= 0.8333333333333334
n_estimators=200,  score= 0.8413333333333334
n_estimators=500,  score= 0.84


In [11]:
max_depth = [1, 3, 6, 9, 12, 15, 18, 21]  # 默认max_depth=6
for i in max_depth:
    # Maximum depth of a tree. Increasing this value will make the model more complex and more likely to overfit
    xg = XGBClassifier(n_jobs=-1, use_label_encoder=False, max_depth=i)  #
    xg.fit(X_train, y_train_new, eval_metric='mlogloss')
    print('max_depth=' + str(i) + ',  score=', xg.score(X_test, y_test_new))

max_depth=1,  score= 0.7746666666666666
max_depth=3,  score= 0.8133333333333334
max_depth=6,  score= 0.8333333333333334
max_depth=9,  score= 0.8413333333333334
max_depth=12,  score= 0.832
max_depth=15,  score= 0.8466666666666667
max_depth=18,  score= 0.8413333333333334
max_depth=21,  score= 0.8373333333333334


In [12]:
min_child_weight = [0, 5, 10, 50, 100]  # 默认min_child_weight=1
for i in min_child_weight:
    """
    Minimum sum of instance weight (hessian) needed in a child. 
    If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, 
    then the building process will give up further partitioning. 
    In linear regression task, this simply corresponds to minimum number of instances needed to be in each node. 
    The larger min_child_weight is, the more conservative the algorithm will be.
    """
    xg = XGBClassifier(n_jobs=-1, use_label_encoder=False, min_child_weight=i)
    xg.fit(X_train, y_train_new, eval_metric='mlogloss')
    print('min_child_weight=' + str(i) + ',  score=', xg.score(X_test, y_test_new))

min_child_weight=0,  score= 0.8333333333333334
min_child_weight=5,  score= 0.8266666666666667
min_child_weight=10,  score= 0.816
min_child_weight=50,  score= 0.7866666666666666
min_child_weight=100,  score= 0.7506666666666667


In [13]:
gamma = [0, 0.3, 0.9, 2.7, 8.1, 27.3, 81.9]  # 默认gamma=0
for i in gamma:
    # Minimum loss reduction required to make a further partition on a leaf node of the tree. The larger gamma is, the more conservative the algorithm will be.
    xg = XGBClassifier(n_jobs=-1, use_label_encoder=False, gamma=i)
    xg.fit(X_train, y_train_new, eval_metric='mlogloss')
    print('gamma=' + str(i) + ',  score=', xg.score(X_test, y_test_new))

gamma=0,  score= 0.8333333333333334
gamma=0.3,  score= 0.836
gamma=0.9,  score= 0.8186666666666667
gamma=2.7,  score= 0.8066666666666666
gamma=8.1,  score= 0.7706666666666667
gamma=27.3,  score= 0.716
gamma=81.9,  score= 0.6213333333333333


In [14]:
learning_rate = [0.01, 0.02, 0.05, 0.1, 0.15, 0.3, 0.5, 0.7, 0.9]  # 默认learning_rate=0.3
for i in learning_rate:
    # Step size shrinkage used in update to prevents overfitting
    # This is learning_rate parameters in the GBDT
    xg = XGBClassifier(n_jobs=-1, use_label_encoder=False, learning_rate=i)  #
    xg.fit(X_train, y_train_new, eval_metric='mlogloss')
    print('learing_rate=' + str(i) + ',  score=', xg.score(X_test, y_test_new))

learing_rate=0.01,  score= 0.7786666666666666
learing_rate=0.02,  score= 0.7973333333333333
learing_rate=0.05,  score= 0.8213333333333334
learing_rate=0.1,  score= 0.828
learing_rate=0.15,  score= 0.832
learing_rate=0.3,  score= 0.8333333333333334
learing_rate=0.5,  score= 0.8386666666666667
learing_rate=0.7,  score= 0.836
learing_rate=0.9,  score= 0.8426666666666667


In [15]:
subsample = [0.1, 0.3, 0.4, 0.6, 0.7, 0.8, 0.85, 0.95, 1]  # 默认subsample=1
for i in subsample:
    # Subsample ratio of the training instances. Setting it to 0.5 means that XGBoost would randomly sample half of the training data prior to growing trees. and this will prevent overfitting.
    xg = XGBClassifier(n_jobs=-1, use_label_encoder=False, subsample=i)
    xg.fit(X_train, y_train_new, eval_metric='mlogloss')
    print('subsample=' + str(i) + ',  score=', xg.score(X_test, y_test_new))

subsample=0.1,  score= 0.8066666666666666
subsample=0.3,  score= 0.8253333333333334
subsample=0.4,  score= 0.8346666666666667
subsample=0.6,  score= 0.832
subsample=0.7,  score= 0.8293333333333334
subsample=0.8,  score= 0.8373333333333334
subsample=0.85,  score= 0.8426666666666667
subsample=0.95,  score= 0.8346666666666667
subsample=1,  score= 0.8333333333333334


In [16]:
colsample_bytree = [0.1, 0.3, 0.4, 0.6, 0.7, 0.8, 0.85, 0.95, 1]  # 默认colsample_bytree=1
for i in subsample:
    # colsample_bytree is the subsample ratio of columns when constructing each tree. Subsampling occurs once for every tree constructed.
    xg = XGBClassifier(n_jobs=-1, use_label_encoder=False, colsample_bytree=i)  # column (feature) subsampling,即列采样比率
    xg.fit(X_train, y_train_new, eval_metric='mlogloss')
    print('colsample_bytree=' + str(i) + ',  score=', xg.score(X_test, y_test_new))

colsample_bytree=0.1,  score= 0.772
colsample_bytree=0.3,  score= 0.8333333333333334
colsample_bytree=0.4,  score= 0.8373333333333334
colsample_bytree=0.6,  score= 0.8386666666666667
colsample_bytree=0.7,  score= 0.8346666666666667
colsample_bytree=0.8,  score= 0.8466666666666667
colsample_bytree=0.85,  score= 0.832
colsample_bytree=0.95,  score= 0.844
colsample_bytree=1,  score= 0.8333333333333334


In [17]:
reg_alpha = [0, 0.25, 0.5, 0.75, 1, 3, 9]  # 默认reg_alpha=0
for i in reg_alpha:
    # L1 regularization term on weights. Increasing this value will make model more conservative.
    xg = XGBClassifier(n_jobs=-1, use_label_encoder=False, reg_alpha=i)
    xg.fit(X_train, y_train_new, eval_metric='mlogloss')
    print('reg_alpha=' + str(i) + ',  score=', xg.score(X_test, y_test_new))

reg_alpha=0,  score= 0.8333333333333334
reg_alpha=0.25,  score= 0.8346666666666667
reg_alpha=0.5,  score= 0.8493333333333334
reg_alpha=0.75,  score= 0.832
reg_alpha=1,  score= 0.8333333333333334
reg_alpha=3,  score= 0.8293333333333334
reg_alpha=9,  score= 0.7986666666666666


In [18]:
reg_lambda = [0, 1, 3, 9, 27, 81]  # 默认reg_lambda=1
for i in reg_lambda:
    # L2 regularization term on weights. Increasing this value will make model more conservative.
    xg = XGBClassifier(n_jobs=-1, use_label_encoder=False, reg_lambda=i)
    xg.fit(X_train, y_train_new, eval_metric='mlogloss')
    print('reg_lambda=' + str(i) + ',  score=', xg.score(X_test, y_test_new))


reg_lambda=0,  score= 0.8293333333333334
reg_lambda=1,  score= 0.8333333333333334
reg_lambda=3,  score= 0.836
reg_lambda=9,  score= 0.8453333333333334
reg_lambda=27,  score= 0.8226666666666667
reg_lambda=81,  score= 0.8213333333333334
