Skip to content

Commit

Permalink
rebase
Browse files Browse the repository at this point in the history
  • Loading branch information
david-cortes committed Mar 9, 2024
1 parent 2c12b95 commit 003302d
Show file tree
Hide file tree
Showing 29 changed files with 218 additions and 221 deletions.
98 changes: 49 additions & 49 deletions R-package/R/callbacks.R

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions R-package/R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -193,20 +193,20 @@ xgb.iter.update <- function(bst, dtrain, iter, obj) {
# Evaluate one iteration.
# Returns a named vector of evaluation metrics
# with the names in a 'datasetname-metricname' format.
xgb.iter.eval <- function(bst, watchlist, iter, feval) {
xgb.iter.eval <- function(bst, evals, iter, feval) {
handle <- xgb.get.handle(bst)

if (length(watchlist) == 0)
if (length(evals) == 0)
return(NULL)

evnames <- names(watchlist)
evnames <- names(evals)
if (is.null(feval)) {
msg <- .Call(XGBoosterEvalOneIter_R, handle, as.integer(iter), watchlist, as.list(evnames))
msg <- .Call(XGBoosterEvalOneIter_R, handle, as.integer(iter), evals, as.list(evnames))
mat <- matrix(strsplit(msg, '\\s+|:')[[1]][-1], nrow = 2)
res <- structure(as.numeric(mat[2, ]), names = mat[1, ])
} else {
res <- sapply(seq_along(watchlist), function(j) {
w <- watchlist[[j]]
res <- sapply(seq_along(evals), function(j) {
w <- evals[[j]]
## predict using all trees
preds <- predict(bst, w, outputmargin = TRUE, iterationrange = "all")
eval_res <- feval(preds, w)
Expand Down
1 change: 0 additions & 1 deletion R-package/R/xgb.create.features.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@
#' new.dtest <- xgb.DMatrix(
#' data = new.features.test, label = agaricus.test$label, nthread = 2
#' )
#' watchlist <- list(train = new.dtrain)
#' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
#'
#' # Model accuracy with new features
Expand Down
4 changes: 2 additions & 2 deletions R-package/R/xgb.cv.R
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
modelfile = NULL
)
bst <- bst$bst
list(dtrain = dtrain, bst = bst, watchlist = list(train = dtrain, test = dtest), index = folds[[k]])
list(dtrain = dtrain, bst = bst, evals = list(train = dtrain, test = dtest), index = folds[[k]])
})

# extract parameters that can affect the relationship b/w #trees and #iterations
Expand Down Expand Up @@ -254,7 +254,7 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
)
xgb.iter.eval(
bst = fd$bst,
watchlist = fd$watchlist,
evals = fd$evals,
iter = iteration - 1,
feval = feval
)
Expand Down
56 changes: 28 additions & 28 deletions R-package/R/xgb.train.R
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,13 @@
#' @param data training dataset. \code{xgb.train} accepts only an \code{xgb.DMatrix} as the input.
#' \code{xgboost}, in addition, also accepts \code{matrix}, \code{dgCMatrix}, or name of a local data file.
#' @param nrounds max number of boosting iterations.
#' @param watchlist named list of xgb.DMatrix datasets to use for evaluating model performance.
#' @param evals Named list of `xgb.DMatrix` datasets to use for evaluating model performance.
#' Metrics specified in either \code{eval_metric} or \code{feval} will be computed for each
#' of these datasets during each boosting iteration, and stored in the end as a field named
#' \code{evaluation_log} in the resulting object. When either \code{verbose>=1} or
#' \code{\link{xgb.cb.print.evaluation}} callback is engaged, the performance results are continuously
#' printed out during the training.
#' E.g., specifying \code{watchlist=list(validation1=mat1, validation2=mat2)} allows to track
#' E.g., specifying \code{evals=list(validation1=mat1, validation2=mat2)} allows to track
#' the performance of each round's model on mat1 and mat2.
#' @param obj customized objective function. Returns gradient and second order
#' gradient with given prediction and dtrain.
Expand Down Expand Up @@ -171,7 +171,7 @@
#' @details
#' These are the training functions for \code{xgboost}.
#'
#' The \code{xgb.train} interface supports advanced features such as \code{watchlist},
#' The \code{xgb.train} interface supports advanced features such as \code{evals},
#' customized objective and evaluation metric functions, therefore it is more flexible
#' than the \code{xgboost} interface.
#'
Expand Down Expand Up @@ -209,7 +209,7 @@
#' \itemize{
#' \item \code{xgb.cb.print.evaluation} is turned on when \code{verbose > 0};
#' and the \code{print_every_n} parameter is passed to it.
#' \item \code{xgb.cb.evaluation.log} is on when \code{watchlist} is present.
#' \item \code{xgb.cb.evaluation.log} is on when \code{evals} is present.
#' \item \code{xgb.cb.early.stop}: when \code{early_stopping_rounds} is set.
#' \item \code{xgb.cb.save.model}: when \code{save_period > 0} is set.
#' }
Expand Down Expand Up @@ -254,12 +254,12 @@
#' dtest <- with(
#' agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
#' )
#' watchlist <- list(train = dtrain, eval = dtest)
#' evals <- list(train = dtrain, eval = dtest)
#'
#' ## A simple xgb.train example:
#' param <- list(max_depth = 2, eta = 1, nthread = nthread,
#' objective = "binary:logistic", eval_metric = "auc")
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
#'
#' ## An xgb.train example where custom objective and evaluation metric are
#' ## used:
Expand All @@ -280,27 +280,27 @@
#' # as 'objective' and 'eval_metric' parameters in the params list:
#' param <- list(max_depth = 2, eta = 1, nthread = nthread,
#' objective = logregobj, eval_metric = evalerror)
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
#'
#' # or through the ... arguments:
#' param <- list(max_depth = 2, eta = 1, nthread = nthread)
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
#' objective = logregobj, eval_metric = evalerror)
#'
#' # or as dedicated 'obj' and 'feval' parameters of xgb.train:
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals,
#' obj = logregobj, feval = evalerror)
#'
#'
#' ## An xgb.train example of using variable learning rates at each iteration:
#' param <- list(max_depth = 2, eta = 1, nthread = nthread,
#' objective = "binary:logistic", eval_metric = "auc")
#' my_etas <- list(eta = c(0.5, 0.1))
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
#' callbacks = list(xgb.cb.reset.parameters(my_etas)))
#'
#' ## Early stopping:
#' bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
#' bst <- xgb.train(param, dtrain, nrounds = 25, evals = evals,
#' early_stopping_rounds = 3)
#'
#' ## An 'xgboost' interface example:
Expand All @@ -311,7 +311,7 @@
#'
#' @rdname xgb.train
#' @export
xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
xgb.train <- function(params = list(), data, nrounds, evals = list(),
obj = NULL, feval = NULL, verbose = 1, print_every_n = 1L,
early_stopping_rounds = NULL, maximize = NULL,
save_period = NULL, save_name = "xgboost.model",
Expand All @@ -324,17 +324,17 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
check.custom.obj()
check.custom.eval()

# data & watchlist checks
# data & evals checks
dtrain <- data
if (!inherits(dtrain, "xgb.DMatrix"))
stop("second argument dtrain must be xgb.DMatrix")
if (length(watchlist) > 0) {
if (typeof(watchlist) != "list" ||
!all(vapply(watchlist, inherits, logical(1), what = 'xgb.DMatrix')))
stop("watchlist must be a list of xgb.DMatrix elements")
evnames <- names(watchlist)
if (length(evals) > 0) {
if (typeof(evals) != "list" ||
!all(vapply(evals, inherits, logical(1), what = 'xgb.DMatrix')))
stop("'evals' must be a list of xgb.DMatrix elements")
evnames <- names(evals)
if (is.null(evnames) || any(evnames == ""))
stop("each element of the watchlist must have a name tag")
stop("each element of 'evals' must have a name tag")
}
# Handle multiple evaluation metrics given as a list
for (m in params$eval_metric) {
Expand Down Expand Up @@ -370,8 +370,8 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
if (verbose && !("print_evaluation" %in% cb_names)) {
callbacks <- add.callback(callbacks, xgb.cb.print.evaluation(print_every_n))
}
# evaluation log callback: it is automatically enabled when watchlist is provided
if (length(watchlist) && !("evaluation_log" %in% cb_names)) {
# evaluation log callback: it is automatically enabled when 'evals' is provided
if (length(evals) && !("evaluation_log" %in% cb_names)) {
callbacks <- add.callback(callbacks, xgb.cb.evaluation.log())
}
# Model saving callback
Expand All @@ -385,7 +385,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
# Construct a booster (either a new one or load from xgb_model)
bst <- xgb.Booster(
params = params,
cachelist = append(watchlist, dtrain),
cachelist = append(evals, dtrain),
modelfile = xgb_model
)
niter_init <- bst$niter
Expand All @@ -407,7 +407,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
callbacks,
bst,
dtrain,
watchlist,
evals,
begin_iteration,
end_iteration
)
Expand All @@ -419,7 +419,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
callbacks,
bst,
dtrain,
watchlist,
evals,
iteration
)

Expand All @@ -431,10 +431,10 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
)

bst_evaluation <- NULL
if (length(watchlist) > 0) {
if (length(evals) > 0) {
bst_evaluation <- xgb.iter.eval(
bst = bst,
watchlist = watchlist,
evals = evals,
iter = iteration - 1,
feval = feval
)
Expand All @@ -444,7 +444,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
callbacks,
bst,
dtrain,
watchlist,
evals,
iteration,
bst_evaluation
)
Expand All @@ -456,7 +456,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
callbacks,
bst,
dtrain,
watchlist,
evals,
iteration,
bst_evaluation
)
Expand Down
4 changes: 2 additions & 2 deletions R-package/R/xgboost.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
nthread = merged$nthread
)

watchlist <- list(train = dtrain)
evals <- list(train = dtrain)

bst <- xgb.train(params, dtrain, nrounds, watchlist, verbose = verbose, print_every_n = print_every_n,
bst <- xgb.train(params, dtrain, nrounds, evals, verbose = verbose, print_every_n = print_every_n,
early_stopping_rounds = early_stopping_rounds, maximize = maximize,
save_period = save_period, save_name = save_name,
xgb_model = xgb_model, callbacks = callbacks, ...)
Expand Down
20 changes: 10 additions & 10 deletions R-package/demo/basic_walkthrough.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,25 +74,25 @@ print(paste("sum(abs(pred3-pred))=", sum(abs(pred3 - pred))))
# to use advanced features, we need to put data in xgb.DMatrix
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
dtest <- xgb.DMatrix(data = test$data, label = test$label)
#---------------Using watchlist----------------
# watchlist is a list of xgb.DMatrix, each of them is tagged with name
watchlist <- list(train = dtrain, test = dtest)
# to train with watchlist, use xgb.train, which contains more advanced features
# watchlist allows us to monitor the evaluation result on all data in the list
print("Train xgboost using xgb.train with watchlist")
bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, watchlist = watchlist,
#---------------Using an evaluation set----------------
# 'evals' is a list of xgb.DMatrix, each of them is tagged with name
evals <- list(train = dtrain, test = dtest)
# to train with an evaluation set, use xgb.train, which contains more advanced features
# 'evals' argument allows us to monitor the evaluation result on all data in the list
print("Train xgboost using xgb.train with evaluation data")
bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, evals = evals,
nthread = 2, objective = "binary:logistic")
# we can change evaluation metrics, or use multiple evaluation metrics
print("train xgboost using xgb.train with watchlist, watch logloss and error")
bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, watchlist = watchlist,
print("train xgboost using xgb.train with evaluation data, watch logloss and error")
bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, evals = evals,
eval_metric = "error", eval_metric = "logloss",
nthread = 2, objective = "binary:logistic")

# xgb.DMatrix can also be saved using xgb.DMatrix.save
xgb.DMatrix.save(dtrain, "dtrain.buffer")
# to load it in, simply call xgb.DMatrix
dtrain2 <- xgb.DMatrix("dtrain.buffer")
bst <- xgb.train(data = dtrain2, max_depth = 2, eta = 1, nrounds = 2, watchlist = watchlist,
bst <- xgb.train(data = dtrain2, max_depth = 2, eta = 1, nrounds = 2, evals = evals,
nthread = 2, objective = "binary:logistic")
# information can be extracted from xgb.DMatrix using getinfo
label <- getinfo(dtest, "label")
Expand Down
6 changes: 3 additions & 3 deletions R-package/demo/boost_from_prediction.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@ data(agaricus.test, package = 'xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)

watchlist <- list(eval = dtest, train = dtrain)
evals <- list(eval = dtest, train = dtrain)
###
# advanced: start from a initial base prediction
#
print('start running example to start from a initial prediction')
# train xgboost for 1 round
param <- list(max_depth = 2, eta = 1, nthread = 2, objective = 'binary:logistic')
bst <- xgb.train(param, dtrain, 1, watchlist)
bst <- xgb.train(param, dtrain, 1, evals)
# Note: we need the margin value instead of transformed prediction in set_base_margin
# do predict with output_margin=TRUE, will always give you margin values before logistic transformation
ptrain <- predict(bst, dtrain, outputmargin = TRUE)
Expand All @@ -23,4 +23,4 @@ setinfo(dtrain, "base_margin", ptrain)
setinfo(dtest, "base_margin", ptest)

print('this is result of boost from initial prediction')
bst <- xgb.train(params = param, data = dtrain, nrounds = 1, watchlist = watchlist)
bst <- xgb.train(params = param, data = dtrain, nrounds = 1, evals = evals)
6 changes: 3 additions & 3 deletions R-package/demo/custom_objective.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
# note: for customized objective function, we leave objective as default
# note: what we are getting is margin value in prediction
# you must know what you are doing
watchlist <- list(eval = dtest, train = dtrain)
evals <- list(eval = dtest, train = dtrain)
num_round <- 2

# user define objective function, given prediction, return gradient and second order gradient
Expand Down Expand Up @@ -38,7 +38,7 @@ param <- list(max_depth = 2, eta = 1, nthread = 2, verbosity = 0,
print('start training with user customized objective')
# training with customized objective, we can also do step by step training
# simply look at xgboost.py's implementation of train
bst <- xgb.train(param, dtrain, num_round, watchlist)
bst <- xgb.train(param, dtrain, num_round, evals)

#
# there can be cases where you want additional information
Expand All @@ -62,4 +62,4 @@ param <- list(max_depth = 2, eta = 1, nthread = 2, verbosity = 0,
print('start training with user customized objective, with additional attributes in DMatrix')
# training with customized objective, we can also do step by step training
# simply look at xgboost.py's implementation of train
bst <- xgb.train(param, dtrain, num_round, watchlist)
bst <- xgb.train(param, dtrain, num_round, evals)
4 changes: 2 additions & 2 deletions R-package/demo/early_stopping.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
# note: what we are getting is margin value in prediction
# you must know what you are doing
param <- list(max_depth = 2, eta = 1, nthread = 2, verbosity = 0)
watchlist <- list(eval = dtest)
evals <- list(eval = dtest)
num_round <- 20
# user define objective function, given prediction, return gradient and second order gradient
# this is log likelihood loss
Expand All @@ -32,7 +32,7 @@ evalerror <- function(preds, dtrain) {
}
print('start training with early Stopping setting')

bst <- xgb.train(param, dtrain, num_round, watchlist,
bst <- xgb.train(param, dtrain, num_round, evals,
objective = logregobj, eval_metric = evalerror, maximize = FALSE,
early_stopping_round = 3)
bst <- xgb.cv(param, dtrain, num_round, nfold = 5,
Expand Down
4 changes: 2 additions & 2 deletions R-package/demo/generalized_linear_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ param <- list(objective = "binary:logistic", booster = "gblinear",
##
# the rest of settings are the same
##
watchlist <- list(eval = dtest, train = dtrain)
evals <- list(eval = dtest, train = dtrain)
num_round <- 2
bst <- xgb.train(param, dtrain, num_round, watchlist)
bst <- xgb.train(param, dtrain, num_round, evals)
ypred <- predict(bst, dtest)
labels <- getinfo(dtest, 'label')
cat('error of preds=', mean(as.numeric(ypred > 0.5) != labels), '\n')
6 changes: 3 additions & 3 deletions R-package/demo/gpu_accelerated.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ y <- rbinom(N, 1, plogis(m))
tr <- sample.int(N, N * 0.75)
dtrain <- xgb.DMatrix(X[tr, ], label = y[tr])
dtest <- xgb.DMatrix(X[-tr, ], label = y[-tr])
wl <- list(train = dtrain, test = dtest)
evals <- list(train = dtrain, test = dtest)

# An example of running 'gpu_hist' algorithm
# which is
Expand All @@ -35,11 +35,11 @@ wl <- list(train = dtrain, test = dtest)
param <- list(objective = 'reg:logistic', eval_metric = 'auc', subsample = 0.5, nthread = 4,
max_bin = 64, tree_method = 'gpu_hist')
pt <- proc.time()
bst_gpu <- xgb.train(param, dtrain, watchlist = wl, nrounds = 50)
bst_gpu <- xgb.train(param, dtrain, evals = evals, nrounds = 50)
proc.time() - pt

# Compare to the 'hist' algorithm:
param$tree_method <- 'hist'
pt <- proc.time()
bst_hist <- xgb.train(param, dtrain, watchlist = wl, nrounds = 50)
bst_hist <- xgb.train(param, dtrain, evals = evals, nrounds = 50)
proc.time() - pt
4 changes: 2 additions & 2 deletions R-package/demo/predict_first_ntree.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)

param <- list(max_depth = 2, eta = 1, objective = 'binary:logistic')
watchlist <- list(eval = dtest, train = dtrain)
evals <- list(eval = dtest, train = dtrain)
nrounds <- 2

# training the model for two rounds
bst <- xgb.train(param, dtrain, nrounds, nthread = 2, watchlist)
bst <- xgb.train(param, dtrain, nrounds, nthread = 2, evals = evals)
cat('start testing prediction from first n trees\n')
labels <- getinfo(dtest, 'label')

Expand Down
Loading

0 comments on commit 003302d

Please sign in to comment.