Make your first XGBoost case : https://xgboost.readthedocs.io/en/latest/

In [1]:
#predict_leaf_indices

import os
import xgboost as xgb

dtrain = xgb.DMatrix('agaricus.txt.train') #https://github.com/dmlc/xgboost/blob/master/demo/data/agaricus.txt.train
dtest = xgb.DMatrix('agaricus.txt.test') #https://github.com/dmlc/xgboost/blob/master/demo/data/agaricus.txt.test

watchlist = [(dtest, 'eval'), (dtrain, 'train')]
###
# advanced: start from a initial base prediction
#
print('start running example to start from a initial prediction')
# specify parameters via map, definition are same as c++ version
param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
# train xgboost for 1 round
bst = xgb.train(param, dtrain, 1, watchlist)
# Note: we need the margin value instead of transformed prediction in
# set_base_margin
# do predict with output_margin=True, will always give you margin values
# before logistic transformation
ptrain = bst.predict(dtrain, output_margin=True)
ptest = bst.predict(dtest, output_margin=True)
dtrain.set_base_margin(ptrain)
dtest.set_base_margin(ptest)

print('this is result of running from initial prediction')
bst = xgb.train(param, dtrain, 1, watchlist)

[15:51:08] 6513x127 matrix with 143286 entries loaded from agaricus.txt.train
[15:51:08] 1611x127 matrix with 35442 entries loaded from agaricus.txt.test
start running example to start from a initial prediction
[0]	eval-error:0.042831	train-error:0.046522
this is result of running from initial prediction
[0]	eval-error:0.021726	train-error:0.022263


In [2]:
import os
import xgboost as xgb
##
#  this script demonstrate how to fit generalized linear model in xgboost
#  basically, we are using linear model, instead of tree for our boosters

dtrain = xgb.DMatrix('agaricus.txt.train')
dtest = xgb.DMatrix('agaricus.txt.test')

# change booster to gblinear, so that we are fitting a linear model
# alpha is the L1 regularizer
# lambda is the L2 regularizer
# you can also set lambda_bias which is L2 regularizer on the bias term
param = {'objective':'binary:logistic', 'booster':'gblinear',
         'alpha': 0.0001, 'lambda': 1}

# normally, you do not need to set eta (step_size)
# XGBoost uses a parallel coordinate descent algorithm (shotgun),
# there could be affection on convergence with parallelization on certain cases
# setting eta to be smaller value, e.g 0.5 can make the optimization more stable
# param['eta'] = 1

##
# the rest of settings are the same
##
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
num_round = 4
bst = xgb.train(param, dtrain, num_round, watchlist)
preds = bst.predict(dtest)
labels = dtest.get_label()
print('error=%f' % (sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]) / float(len(preds))))

[15:51:09] 6513x127 matrix with 143286 entries loaded from agaricus.txt.train
[15:51:09] 1611x127 matrix with 35442 entries loaded from agaricus.txt.test
[0]	eval-error:0.114836	train-error:0.10456
[1]	eval-error:0.117939	train-error:0.105481
[2]	eval-error:0.11856	train-error:0.106249
[3]	eval-error:0.120422	train-error:0.106863
error=0.120422
