budgets, doc and assertions

ja-thomas · Apr 12, 2018 · 0d95210 · 0d95210
1 parent 98e3914
commit 0d95210
Show file tree

Hide file tree

Showing 4 changed files with 65 additions and 35 deletions.
diff --git a/R/RLearner_classif_autoxgboost.R b/R/RLearner_classif_autoxgboost.R
@@ -6,6 +6,8 @@ makeRLearner.classif.autoxgboost = function() {
     par.set = makeParamSet(
       makeUntypedLearnerParam(id = "measure", default = mse),
       makeUntypedLearnerParam(id = "control"),
+      makeIntegerLearnerParam(id = "iterations", lower = 1, default = 160),
+      makeIntegerLearnerParam(id = "time.budget", lower = 1, default = 3600),
       makeUntypedLearnerParam(id = "par.set", default = autoxgbparset),
       makeIntegerLearnerParam(id = "max.nrounds", lower = 1L, default = 10L^6),
       makeIntegerLearnerParam(id = "early.stopping.rounds", lower = 1, default = 10L),
@@ -25,12 +27,12 @@ makeRLearner.classif.autoxgboost = function() {
 
 #' @export
 trainLearner.classif.autoxgboost = function(.learner, .task, .subset, .weights = NULL,
-  measure = mmce, control = NULL, par.set = autoxgbparset, max.nrounds = 10^6, early.stopping.rounds = 10L,
+  measure = mmce, control = NULL, iterations = 160, time.budget = 3600, par.set = autoxgbparset, max.nrounds = 10^6, early.stopping.rounds = 10L,
   early.stopping.fraction = 4/5, build.final.model = TRUE, design.size = 15L,
   impact.encoding.boundary = 10L, mbo.learner = NULL, nthread = NULL, tune.threshold = TRUE, ...) {
 
   .task = subsetTask(.task, .subset)
-  autoxgboost(.task, measure, control, par.set, max.nrounds, early.stopping.rounds,
+  autoxgboost(.task, measure, control, iterations, time.budget, par.set, max.nrounds, early.stopping.rounds,
     early.stopping.fraction, build.final.model, design.size,
     impact.encoding.boundary, mbo.learner, nthread, tune.threshold)
 

diff --git a/R/RLearner_regr_autoxgboost.R b/R/RLearner_regr_autoxgboost.R
@@ -6,6 +6,8 @@ makeRLearner.regr.autoxgboost = function() {
     par.set = makeParamSet(
       makeUntypedLearnerParam(id = "measure", default = mse),
       makeUntypedLearnerParam(id = "control"),
+      makeIntegerLearnerParam(id = "iterations", lower = 1, default = 160),
+      makeIntegerLearnerParam(id = "time.budget", lower = 1, default = 3600),
       makeUntypedLearnerParam(id = "par.set", default = autoxgbparset),
       makeIntegerLearnerParam(id = "max.nrounds", lower = 1L, default = 10L^6),
       makeIntegerLearnerParam(id = "early.stopping.rounds", lower = 1, default = 10L),
@@ -25,12 +27,12 @@ makeRLearner.regr.autoxgboost = function() {
 
 #' @export
 trainLearner.regr.autoxgboost = function(.learner, .task, .subset, .weights = NULL,
-  measure = mse, control = NULL, par.set = autoxgbparset, max.nrounds = 10^6, early.stopping.rounds = 10L,
+  measure = mse, control = NULL, iterations = 160, time.budget = 3600, par.set = autoxgbparset, max.nrounds = 10^6, early.stopping.rounds = 10L,
   early.stopping.fraction = 4/5, build.final.model = TRUE, design.size = 15L,
   impact.encoding.boundary = 10L, mbo.learner = NULL, nthread = NULL, tune.threshold = TRUE, ...) {
 
   .task = subsetTask(.task, .subset)
-  autoxgboost(.task, measure, control, par.set, max.nrounds, early.stopping.rounds,
+  autoxgboost(.task, measure, control, iterations, time.budget, par.set, max.nrounds, early.stopping.rounds,
     early.stopping.fraction, build.final.model, design.size,
     impact.encoding.boundary, mbo.learner, nthread, tune.threshold)
 

diff --git a/R/autoxgboost.R b/R/autoxgboost.R
@@ -13,17 +13,24 @@
 #' @param measure [\code{\link[mlr]{Measure}}]\cr
 #'   Performance measure. If \code{NULL} \code{\link[mlr]{getDefaultMeasure}} is used.
 #' @param control [\code{\link[mlrMBO]{MBOControl}}]\cr
-#'   Control object for mbo. Specifies runtime behaviour.
-#'   Default is to run for 160 iterations or 1 hour, whatever happens first.
+#'   Control object for optimizer.
+#'   If not specified, the default \code{\link[mlrMBO]{makeMBOControl}}] object will be used with
+#'   \code{iterations} maximum iterations and a maximum runtime of \code{time.budget} seconds.
+#' @param iterations [\code{integer(1L}]\cr
+#'   Number of MBO iterations to do. Will be ignored if custom \code{control} is used.
+#'   Default is \code{160}.
+#' @param time.budget [\code{integer(1L}]\cr
+#'   Time that can be used for tuning (in seconds). Will be ignored if custom \code{control} is used.
+#'   Default is \code{3600}, i.e., one hour.
 #' @param par.set [\code{\link[ParamHelpers]{ParamSet}}]\cr
-#'   Parameter set. Default is \code{\link{autoxgbparset}}.
+#'   Parameter set to tune over. Default is \code{\link{autoxgbparset}}.
 #' @param max.nrounds [\code{integer(1)}]\cr
-#'   Maximum number of allowed iterations. Default is \code{10^6}.
+#'   Maximum number of allowed boosting iterations. Default is \code{10^6}.
 #' @param early.stopping.rounds [\code{integer(1L}]\cr
-#'   After how many iterations without an improvement in the OOB error should be stopped?
-#'   Default is 10.
+#'   After how many iterations without an improvement in the boosting OOB error should be stopped?
+#'   Default is \code{10}.
 #' @param build.final.model [\code{logical(1)}]\cr
-#'   Should the best found model be fitted on the complete dataset?
+#'   Should the model with the best found configuration be refitted on the complete dataset?
 #'   Default is \code{FALSE}.
 #' @param early.stopping.fraction [\code{numeric(1)}]\cr
 #'   What fraction of the data should be used for early stopping (i.e. as a validation set).
@@ -33,7 +40,7 @@
 #' @param impact.encoding.boundary [\code{integer(1)}]\cr
 #'   Defines the threshold on how factor variables are handled. Factors with more levels than the \code{"impact.encoding.boundary"} get impact encoded while factor variables with less or equal levels than the \code{"impact.encoding.boundary"} get dummy encoded.
 #'   For \code{impact.encoding.boundary = 0L}, all factor variables get impact encoded while for \code{impact.encoding.boundary = Inf}, all of them get dummy encoded.
-#'   Default is \code{10L}.
+#'   Default is \code{10}.
 #' @param mbo.learner [\code{\link[mlr]{Learner}}]\cr
 #'   Regression learner from mlr, which is used as a surrogate to model our fitness function.
 #'   If \code{NULL} (default), the default learner is determined as described here: \link[mlrMBO]{mbo_default_learner}.
@@ -53,28 +60,39 @@
 #' res = autoxgboost(iris.task, control = ctrl, tune.threshold = FALSE)
 #' res
 #' }
-autoxgboost = function(task, measure = NULL, control = NULL, par.set = NULL, max.nrounds = 10^6,
+autoxgboost = function(task, measure = NULL, control = NULL, iterations = 160L, time.budget = 3600L, par.set = NULL, max.nrounds = 10^6,
   early.stopping.rounds = 10L, early.stopping.fraction = 4/5, build.final.model = TRUE,
   design.size = 15L, impact.encoding.boundary = 10L, mbo.learner = NULL,
   nthread = NULL, tune.threshold = TRUE) {
 
+
+  # check inputs
+  assertClass(task, "SupervisedTask", null.ok = FALSE)
+  assertClass(measure, "Measure", null.ok = TRUE)
+  assertClass(control, "MBOControl", null.ok = TRUE)
+  assertIntegerish(iterations, null.ok = FALSE)
+  assertIntegerish(time.budget, null.ok = FALSE)
+  assertClass(par.set, "ParamSet", null.ok = TRUE)
+  assertIntegerish(max.nrounds, lower = 1L, len = 1L)
   assertIntegerish(early.stopping.rounds, lower = 1L, len = 1L)
   assertNumeric(early.stopping.fraction, lower = 0, upper = 1, len = 1L)
   assertFlag(build.final.model)
-  assertIntegerish(design.size, lower = 1, null.ok = FALSE)
+  assertIntegerish(design.size, lower = 1L, len = 1L)
   if (is.infinite(impact.encoding.boundary))
     impact.encoding.boundary = .Machine$integer.max
   assertIntegerish(impact.encoding.boundary, lower = 0, upper = Inf, len = 1L)
-  assertIntegerish(nthread, lower = 1, null.ok = TRUE)
+  assertIntegerish(nthread, lower = 1, len = 1L, null.ok = TRUE)
   assertFlag(tune.threshold)
 
+  # set defaults
   measure = coalesce(measure, getDefaultMeasure(task))
   if (is.null(control)) {
     control = makeMBOControl()
-    control = setMBOControlTermination(control, iters = 160L, time.budget = 3600L)
+    control = setMBOControlTermination(control, iters = iterations, time.budget = time.budget)
   }
-
   par.set = coalesce(par.set, autoxgboost::autoxgbparset)
+
+
   tt = getTaskType(task)
   td = getTaskDesc(task)
   has.cat.feats = sum(td$n.feat[c("factors", "ordered")]) > 0
@@ -166,11 +184,9 @@ autoxgboost = function(task, measure = NULL, control = NULL, par.set = NULL, max
   lrn = buildFinalLearner(optim.result, objective, predict.type, par.set = par.set,
     dummy.cols = dummy.cols, impact.cols = impact.cols, preproc.pipeline = preproc.pipeline)
 
-  mod = if(build.final.model) {
-    train(lrn, task)
-  } else {
-    NULL
-  }
+  mod = NULL
+  if(build.final.model)
+    mod = train(lrn, task)
 
   makeS3Obj("AutoxgbResult",
     optim.result = optim.result,

diff --git a/man/autoxgboost.Rd b/man/autoxgboost.Rd