1. 基本参数
    * booster
    * n_estimators
    * learning_rate(学习速度调节)
    * max_depth(重要程度高)
    * min_child_weight(重要程度高,与max_depth一起进行网格搜索)

2. 正则化调节
    * gamma
    * subsample
    * colsample_bytree
    * reg_alpha
    * reg_lambda

In [16]:
from xgboost import XGBClassifier # XGBRegressor 相同
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np

In [17]:
X = datasets.fetch_covtype().data[:3000]
y = datasets.fetch_covtype().target[:3000] 
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [18]:
X_train.shape # 数据集有54个特征

(2250, 54)

In [19]:
np.unique(y_train) # 7分类

array([1, 2, 3, 4, 5, 6, 7])

In [20]:
booster = ['gbtree',
           'gblinear', # 适用于使用线性模型
           'dart'] # 默认booster='gbtree'

for i in booster:
    xg = XGBClassifier(n_jobs=-1, # xgboost虽属于boosting算法,但也可进行并行计算
                       booster=i)
    xg.fit(X_train, y_train)
    if i == 'gblinear': # 此时没有feature_importances_属性
        print("coef_", xg.coef_) # Coefficients property(当前仅当booster='gblinear')
        print("intercept_", xg.intercept_) # Intercept (bias) property(当前仅当booster='gblinear')
    print('booster=' + str(i) + ',  score=', xg.score(X_test, y_test))



booster=gbtree,  score= 0.8586666666666667
coef_ [[ 3.29300e-04 -1.09874e-04 -6.31986e-05 -7.37975e-05 -1.41921e-04
  -2.31961e-04  4.80456e-04 -2.24765e-03 -1.13677e-03  6.60738e-03
   2.62490e-03  7.81994e-04  2.50138e-03  2.12308e-04 -4.87367e-02
  -2.85628e-02  5.62898e-02 -3.24191e-02  1.02828e-02  4.67273e-02
  -4.42519e-02  1.68427e-03  2.04239e-03 -1.64989e-03 -3.77806e-02
  -4.69981e-03  6.00104e-04 -2.41974e-03 -5.54757e-03 -1.84201e-03
   8.11982e-03  3.42190e-02  3.03332e-03  1.89655e-03 -2.57700e-03
   3.04212e-04  1.37974e-04 -1.73538e-03  6.14070e-03 -5.94326e-04
  -4.84939e-04 -2.97662e-04 -2.68267e-03 -2.96830e-04 -9.93050e-04
   7.42322e-03  5.32352e-03 -4.81310e-03  3.35191e-03 -1.33151e-03
   2.03652e-04  3.59882e-03  7.79603e-04  1.69515e-03]
 [-4.22352e-03  4.57456e-03  6.23238e-04  3.01581e-03  3.78790e-03
  -2.14013e-02 -3.30455e-03  3.01852e-03 -2.93318e-03  9.64448e-05
   1.64246e-04 -3.72833e-03 -3.32592e-03 -1.05801e-04 -8.13578e-04
   1.97348e-04 -1.46521e-

In [21]:
n_estimators = [10, 20, 50, 100, 200, 500] # 默认n_estimators=100

for i in n_estimators:
    xg = XGBClassifier(n_jobs=-1, 
                        n_estimators=i) # Number of gradient boosted trees
    xg.fit(X_train, y_train)
    print('n_estimators=' + str(i) + ',  score=', xg.score(X_test, y_test))

n_estimators=10,  score= 0.8106666666666666
n_estimators=20,  score= 0.8293333333333334
n_estimators=50,  score= 0.852
n_estimators=100,  score= 0.8586666666666667
n_estimators=200,  score= 0.86
n_estimators=500,  score= 0.8626666666666667




In [22]:
learning_rate = [0.01, 0.02, 0.05, 0.1, 0.15, 0.3, 0.5, 0.7, 0.9] # 默认learning_rate=0.3
for i in learning_rate:
    xg = XGBClassifier(n_jobs=-1, learning_rate=i) # Step size shrinkage used in update to prevents overfitting
    xg.fit(X_train, y_train)
    print('learing_rate=' + str(i) + ',  score=', xg.score(X_test, y_test))

learing_rate=0.01,  score= 0.7866666666666666
learing_rate=0.02,  score= 0.8053333333333333
learing_rate=0.05,  score= 0.8266666666666667
learing_rate=0.1,  score= 0.8533333333333334
learing_rate=0.15,  score= 0.8666666666666667
learing_rate=0.3,  score= 0.8586666666666667
learing_rate=0.5,  score= 0.8466666666666667
learing_rate=0.7,  score= 0.8626666666666667
learing_rate=0.9,  score= 0.8466666666666667




In [23]:
max_depth = [1, 3, 6, 9, 12, 15, 18, 21] # 默认max_depth=6
for i in max_depth:
    # Maximum depth of a tree. Increasing this value will make the model more complex and more likely to overfit
    xg = XGBClassifier(n_jobs=-1, max_depth=i, verbose=True)
    xg.fit(X_train, y_train)
    print('max_depth=' + str(i) + ',  score=', xg.score(X_test, y_test))

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


max_depth=1,  score= 0.7826666666666666
Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


max_depth=3,  score= 0.8453333333333334
Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


max_depth=6,  score= 0.8586666666666667
Parameters: { verbose } might n



In [24]:
min_child_weight = [0, 5, 10, 50, 100] # 默认min_child_weight=1
for i in min_child_weight:
    """
    Minimum sum of instance weight (hessian) needed in a child. 
    If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, 
    then the building process will give up further partitioning. 
    In linear regression task, this simply corresponds to minimum number of instances needed to be in each node. 
    The larger min_child_weight is, the more conservative the algorithm will be.
    """
    xg = XGBClassifier(n_jobs=-1, min_child_weight=i) 
    xg.fit(X_train, y_train)
    print('min_child_weight=' + str(i) + ',  score=', xg.score(X_test, y_test))

min_child_weight=0,  score= 0.8613333333333333
min_child_weight=5,  score= 0.8453333333333334
min_child_weight=10,  score= 0.8373333333333334
min_child_weight=50,  score= 0.792
min_child_weight=100,  score= 0.748




In [25]:
gamma = [0, 0.3, 0.9, 2.7, 8.1, 27.3, 81.9] # 默认gamma=0
for i in gamma:
    # Minimum loss reduction required to make a further partition on a leaf node of the tree. The larger gamma is, the more conservative the algorithm will be.
    xg = XGBClassifier(n_jobs=-1, gamma=i)
    xg.fit(X_train, y_train)
    print('gamma=' + str(i) + ',  score=', xg.score(X_test, y_test))

gamma=0,  score= 0.8586666666666667
gamma=0.3,  score= 0.8573333333333333
gamma=0.9,  score= 0.84
gamma=2.7,  score= 0.8173333333333334
gamma=8.1,  score= 0.7853333333333333
gamma=27.3,  score= 0.7253333333333334
gamma=81.9,  score= 0.6306666666666667




In [26]:
subsample = [0.1, 0.3, 0.4, 0.6, 0.7, 0.8, 0.85, 0.95, 1] # 默认subsample=1
for i in subsample:
    # Subsample ratio of the training instances. Setting it to 0.5 means that XGBoost would randomly sample half of the training data prior to growing trees. and this will prevent overfitting.
    xg = XGBClassifier(n_jobs=-1, subsample=i)
    xg.fit(X_train, y_train)
    print('subsample=' + str(i) + ',  score=', xg.score(X_test, y_test))

subsample=0.1,  score= 0.8
subsample=0.3,  score= 0.8373333333333334
subsample=0.4,  score= 0.836
subsample=0.6,  score= 0.8453333333333334
subsample=0.7,  score= 0.8466666666666667
subsample=0.8,  score= 0.8626666666666667
subsample=0.85,  score= 0.864
subsample=0.95,  score= 0.8613333333333333
subsample=1,  score= 0.8586666666666667




In [27]:
colsample_bytree = [0.1, 0.3, 0.4, 0.6, 0.7, 0.8, 0.85, 0.95, 1] # 默认colsample_bytree=1
for i in subsample:
    # colsample_bytree is the subsample ratio of columns when constructing each tree. Subsampling occurs once for every tree constructed.
    xg = XGBClassifier(n_jobs=-1, colsample_bytree=i)
    xg.fit(X_train, y_train)
    print('colsample_bytree=' + str(i) + ',  score=', xg.score(X_test, y_test))

colsample_bytree=0.1,  score= 0.8066666666666666
colsample_bytree=0.3,  score= 0.8493333333333334
colsample_bytree=0.4,  score= 0.856
colsample_bytree=0.6,  score= 0.8666666666666667
colsample_bytree=0.7,  score= 0.8693333333333333
colsample_bytree=0.8,  score= 0.8733333333333333
colsample_bytree=0.85,  score= 0.8626666666666667
colsample_bytree=0.95,  score= 0.8653333333333333
colsample_bytree=1,  score= 0.8586666666666667




In [28]:
reg_alpha = [0, 0.25, 0.5, 0.75, 1, 3, 9] # 默认reg_alpha=0
for i in reg_alpha:
    # L1 regularization term on weights. Increasing this value will make model more conservative.
    xg = XGBClassifier(n_jobs=-1, reg_alpha=i)
    xg.fit(X_train, y_train)
    print('reg_alpha=' + str(i) + ',  score=', xg.score(X_test, y_test))

reg_alpha=0,  score= 0.8586666666666667
reg_alpha=0.25,  score= 0.8693333333333333
reg_alpha=0.5,  score= 0.86
reg_alpha=0.75,  score= 0.86
reg_alpha=1,  score= 0.8493333333333334
reg_alpha=3,  score= 0.8506666666666667
reg_alpha=9,  score= 0.828




In [29]:
reg_lambda = [0, 1, 3, 9, 27, 81] # 默认reg_lambda=1
for i in reg_lambda:
    # L2 regularization term on weights. Increasing this value will make model more conservative.
    xg = XGBClassifier(n_jobs=-1, reg_lambda=i)
    xg.fit(X_train, y_train)
    print('reg_lambda=' + str(i) + ',  score=', xg.score(X_test, y_test))

reg_lambda=0,  score= 0.8706666666666667
reg_lambda=1,  score= 0.8586666666666667
reg_lambda=3,  score= 0.868
reg_lambda=9,  score= 0.8586666666666667
reg_lambda=27,  score= 0.852
reg_lambda=81,  score= 0.8426666666666667


