Skip to content

Commit

Permalink
budgets, doc and assertions
Browse files Browse the repository at this point in the history
  • Loading branch information
ja-thomas committed Apr 12, 2018
1 parent 98e3914 commit 0d95210
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 35 deletions.
6 changes: 4 additions & 2 deletions R/RLearner_classif_autoxgboost.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ makeRLearner.classif.autoxgboost = function() {
par.set = makeParamSet(
makeUntypedLearnerParam(id = "measure", default = mse),
makeUntypedLearnerParam(id = "control"),
makeIntegerLearnerParam(id = "iterations", lower = 1, default = 160),
makeIntegerLearnerParam(id = "time.budget", lower = 1, default = 3600),
makeUntypedLearnerParam(id = "par.set", default = autoxgbparset),
makeIntegerLearnerParam(id = "max.nrounds", lower = 1L, default = 10L^6),
makeIntegerLearnerParam(id = "early.stopping.rounds", lower = 1, default = 10L),
Expand All @@ -25,12 +27,12 @@ makeRLearner.classif.autoxgboost = function() {

#' @export
trainLearner.classif.autoxgboost = function(.learner, .task, .subset, .weights = NULL,
measure = mmce, control = NULL, par.set = autoxgbparset, max.nrounds = 10^6, early.stopping.rounds = 10L,
measure = mmce, control = NULL, iterations = 160, time.budget = 3600, par.set = autoxgbparset, max.nrounds = 10^6, early.stopping.rounds = 10L,
early.stopping.fraction = 4/5, build.final.model = TRUE, design.size = 15L,
impact.encoding.boundary = 10L, mbo.learner = NULL, nthread = NULL, tune.threshold = TRUE, ...) {

.task = subsetTask(.task, .subset)
autoxgboost(.task, measure, control, par.set, max.nrounds, early.stopping.rounds,
autoxgboost(.task, measure, control, iterations, time.budget, par.set, max.nrounds, early.stopping.rounds,
early.stopping.fraction, build.final.model, design.size,
impact.encoding.boundary, mbo.learner, nthread, tune.threshold)

Expand Down
6 changes: 4 additions & 2 deletions R/RLearner_regr_autoxgboost.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ makeRLearner.regr.autoxgboost = function() {
par.set = makeParamSet(
makeUntypedLearnerParam(id = "measure", default = mse),
makeUntypedLearnerParam(id = "control"),
makeIntegerLearnerParam(id = "iterations", lower = 1, default = 160),
makeIntegerLearnerParam(id = "time.budget", lower = 1, default = 3600),
makeUntypedLearnerParam(id = "par.set", default = autoxgbparset),
makeIntegerLearnerParam(id = "max.nrounds", lower = 1L, default = 10L^6),
makeIntegerLearnerParam(id = "early.stopping.rounds", lower = 1, default = 10L),
Expand All @@ -25,12 +27,12 @@ makeRLearner.regr.autoxgboost = function() {

#' @export
trainLearner.regr.autoxgboost = function(.learner, .task, .subset, .weights = NULL,
measure = mse, control = NULL, par.set = autoxgbparset, max.nrounds = 10^6, early.stopping.rounds = 10L,
measure = mse, control = NULL, iterations = 160, time.budget = 3600, par.set = autoxgbparset, max.nrounds = 10^6, early.stopping.rounds = 10L,
early.stopping.fraction = 4/5, build.final.model = TRUE, design.size = 15L,
impact.encoding.boundary = 10L, mbo.learner = NULL, nthread = NULL, tune.threshold = TRUE, ...) {

.task = subsetTask(.task, .subset)
autoxgboost(.task, measure, control, par.set, max.nrounds, early.stopping.rounds,
autoxgboost(.task, measure, control, iterations, time.budget, par.set, max.nrounds, early.stopping.rounds,
early.stopping.fraction, build.final.model, design.size,
impact.encoding.boundary, mbo.learner, nthread, tune.threshold)

Expand Down
52 changes: 34 additions & 18 deletions R/autoxgboost.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,24 @@
#' @param measure [\code{\link[mlr]{Measure}}]\cr
#' Performance measure. If \code{NULL} \code{\link[mlr]{getDefaultMeasure}} is used.
#' @param control [\code{\link[mlrMBO]{MBOControl}}]\cr
#' Control object for mbo. Specifies runtime behaviour.
#' Default is to run for 160 iterations or 1 hour, whatever happens first.
#' Control object for optimizer.
#' If not specified, the default \code{\link[mlrMBO]{makeMBOControl}}] object will be used with
#' \code{iterations} maximum iterations and a maximum runtime of \code{time.budget} seconds.
#' @param iterations [\code{integer(1L}]\cr
#' Number of MBO iterations to do. Will be ignored if custom \code{control} is used.
#' Default is \code{160}.
#' @param time.budget [\code{integer(1L}]\cr
#' Time that can be used for tuning (in seconds). Will be ignored if custom \code{control} is used.
#' Default is \code{3600}, i.e., one hour.
#' @param par.set [\code{\link[ParamHelpers]{ParamSet}}]\cr
#' Parameter set. Default is \code{\link{autoxgbparset}}.
#' Parameter set to tune over. Default is \code{\link{autoxgbparset}}.
#' @param max.nrounds [\code{integer(1)}]\cr
#' Maximum number of allowed iterations. Default is \code{10^6}.
#' Maximum number of allowed boosting iterations. Default is \code{10^6}.
#' @param early.stopping.rounds [\code{integer(1L}]\cr
#' After how many iterations without an improvement in the OOB error should be stopped?
#' Default is 10.
#' After how many iterations without an improvement in the boosting OOB error should be stopped?
#' Default is \code{10}.
#' @param build.final.model [\code{logical(1)}]\cr
#' Should the best found model be fitted on the complete dataset?
#' Should the model with the best found configuration be refitted on the complete dataset?
#' Default is \code{FALSE}.
#' @param early.stopping.fraction [\code{numeric(1)}]\cr
#' What fraction of the data should be used for early stopping (i.e. as a validation set).
Expand All @@ -33,7 +40,7 @@
#' @param impact.encoding.boundary [\code{integer(1)}]\cr
#' Defines the threshold on how factor variables are handled. Factors with more levels than the \code{"impact.encoding.boundary"} get impact encoded while factor variables with less or equal levels than the \code{"impact.encoding.boundary"} get dummy encoded.
#' For \code{impact.encoding.boundary = 0L}, all factor variables get impact encoded while for \code{impact.encoding.boundary = Inf}, all of them get dummy encoded.
#' Default is \code{10L}.
#' Default is \code{10}.
#' @param mbo.learner [\code{\link[mlr]{Learner}}]\cr
#' Regression learner from mlr, which is used as a surrogate to model our fitness function.
#' If \code{NULL} (default), the default learner is determined as described here: \link[mlrMBO]{mbo_default_learner}.
Expand All @@ -53,28 +60,39 @@
#' res = autoxgboost(iris.task, control = ctrl, tune.threshold = FALSE)
#' res
#' }
autoxgboost = function(task, measure = NULL, control = NULL, par.set = NULL, max.nrounds = 10^6,
autoxgboost = function(task, measure = NULL, control = NULL, iterations = 160L, time.budget = 3600L, par.set = NULL, max.nrounds = 10^6,
early.stopping.rounds = 10L, early.stopping.fraction = 4/5, build.final.model = TRUE,
design.size = 15L, impact.encoding.boundary = 10L, mbo.learner = NULL,
nthread = NULL, tune.threshold = TRUE) {


# check inputs
assertClass(task, "SupervisedTask", null.ok = FALSE)
assertClass(measure, "Measure", null.ok = TRUE)
assertClass(control, "MBOControl", null.ok = TRUE)
assertIntegerish(iterations, null.ok = FALSE)
assertIntegerish(time.budget, null.ok = FALSE)
assertClass(par.set, "ParamSet", null.ok = TRUE)
assertIntegerish(max.nrounds, lower = 1L, len = 1L)
assertIntegerish(early.stopping.rounds, lower = 1L, len = 1L)
assertNumeric(early.stopping.fraction, lower = 0, upper = 1, len = 1L)
assertFlag(build.final.model)
assertIntegerish(design.size, lower = 1, null.ok = FALSE)
assertIntegerish(design.size, lower = 1L, len = 1L)
if (is.infinite(impact.encoding.boundary))
impact.encoding.boundary = .Machine$integer.max
assertIntegerish(impact.encoding.boundary, lower = 0, upper = Inf, len = 1L)
assertIntegerish(nthread, lower = 1, null.ok = TRUE)
assertIntegerish(nthread, lower = 1, len = 1L, null.ok = TRUE)
assertFlag(tune.threshold)

# set defaults
measure = coalesce(measure, getDefaultMeasure(task))
if (is.null(control)) {
control = makeMBOControl()
control = setMBOControlTermination(control, iters = 160L, time.budget = 3600L)
control = setMBOControlTermination(control, iters = iterations, time.budget = time.budget)
}

par.set = coalesce(par.set, autoxgboost::autoxgbparset)


tt = getTaskType(task)
td = getTaskDesc(task)
has.cat.feats = sum(td$n.feat[c("factors", "ordered")]) > 0
Expand Down Expand Up @@ -166,11 +184,9 @@ autoxgboost = function(task, measure = NULL, control = NULL, par.set = NULL, max
lrn = buildFinalLearner(optim.result, objective, predict.type, par.set = par.set,
dummy.cols = dummy.cols, impact.cols = impact.cols, preproc.pipeline = preproc.pipeline)

mod = if(build.final.model) {
train(lrn, task)
} else {
NULL
}
mod = NULL
if(build.final.model)
mod = train(lrn, task)

makeS3Obj("AutoxgbResult",
optim.result = optim.result,
Expand Down
36 changes: 23 additions & 13 deletions man/autoxgboost.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 0d95210

Please sign in to comment.