working end

jakob-r · Mar 30, 2017 · f6726b8 · f6726b8
1 parent 0396bb8
commit f6726b8
Show file tree

Hide file tree

Showing 3 changed files with 58 additions and 5 deletions.
diff --git a/R/getDefaultParSetValues.R b/R/getDefaultParSetValues.R
@@ -54,13 +54,13 @@ getDefaultParSetValues = function() {
     ## not compared to caret
     # random forest (only mtry in caret)
     classif.randomForest = makeParamSet(
-      makeNumericParam("ntree", lower = 0, upper = 7, trafo = function(x) round(2^x * 10), default = log2(500/10)),
+      makeNumericParam("ntree", lower = log2(10/10), upper = log2(1000/10), trafo = function(x) round(2^x * 10), default = log2(500/10)),
       makeIntegerParam("nodesize", lower = 1, upper = 10, default = 1),
       makeIntegerParam("mtry", lower = 1L, upper = expression(p), default = expression(floor(sqrt(p)))),
       keys = "p"
     ),
     regr.randomForest = makeParamSet(
-      makeNumericParam("ntree", lower = 0, upper = 7, trafo = function(x) round(2^x * 10), default = log2(500/10)),
+      makeNumericParam("ntree", lower = log2(10/10), upper = log2(1000/10), trafo = function(x) round(2^x * 10), default = log2(500/10)),
       makeIntegerParam("nodesize", lower = 1, upper = 10, default = 1),
       makeIntegerParam(id = "mtry", lower = 1L, upper = expression(p), default = expression(max(floor(p/3), 1))),
       keys = "p"

diff --git a/meta/benchmark_caret_mlrHyperopt.R b/meta/benchmark_caret_mlrHyperopt.R
@@ -124,14 +124,39 @@ res = merge(res, lrns2, all.x = TRUE, by = "learner")
 res[!is.na(mlr), learner := mlr, ]
 # Visualizing Results
 library(ggplot2)
+res$time = as.numeric(res$time, units = "secs")
 g = ggplot(data = res, aes(x = paste(algorithm, search, budget), y = measure, fill = paste(algorithm,search)))
 g + geom_boxplot() + facet_grid(problem~learner, scales = "free")
-g = ggplot(data = res, aes(x = measure, y = time, color = algorithm))
-g + geom_point() + facet_grid(problem~learner)
+g = ggplot(data = res, aes(x = measure, y = time, color = algorithm, size = as.factor(budget)))
+g + geom_point(alpha = 0.1) + facet_grid(learner~problem, scales = "free") + scale_y_log10()
+# extract the good parameter settings
 
 
 # Detailed Analysis
-res.list = reduceResultsList()
+res.list = reduceResultsList(ids = res[algorithm == "mlrHyperopt", job.id[1:10]])
+res.x = reduceResultsDataTable(fun = function(job, res) if(!is.null(res$model$bestTune)) res$model$bestTune else res$model$hyperopt.res$x, fill = TRUE)
+res.x = merge(res.x, getJobPars(res), all.y = FALSE)
+res.x.b = res.x
+hifu = function(x) {
+  if (all(is.na(x))) {
+    x[1:2]
+  } else if (is.integer(x)) {
+    as.integer(range(x, na.rm = TRUE))
+  } else if (is.numeric(x)) {
+    range(x, na.rm = TRUE)
+  } else if (is.factor(x) | is.character(x)) {
+    names(sort(table(x), decreasing = TRUE))[1:2]
+  } else {
+    x[1:2]
+  }
+}
+res.x[budget > 10 & algorithm == "caret", lapply(.SD, hifu), by = .(learner)]
+id.vars = c("algorithm", "fold", "learner", "budget", "search", "problem", "job.id")
+col.numeric = setdiff(names(which(sapply(res.x, is.numeric))), id.vars)
+m.res.x = melt(res.x[,c(id.vars, col.numeric),with = FALSE], id.vars = id.vars)
+m.res.x[variable %in% c("C", "sigma"), value := log2(value)]
+g = ggplot(m.res.x, mapping = aes(y = value, x = algorithm, color = learner))
+g + geom_violin() + geom_point(position = position_jitter(width = 0.2, height = 0)) + facet_wrap(~variable, scales = "free")
 good.caret = res.list[[10]]
 good.mlrHyper = res.list[[20]]
 good.caret$model$results

diff --git a/meta/popular_learners_mlr.R b/meta/popular_learners_mlr.R
@@ -0,0 +1,28 @@
+# find most popular learners in mlr
+# install_github("metacran/cranlogs")
+library(mlr)
+library(stringi)
+library(cranlogs)
+library(data.table)
+
+# obtain used packages for all learners
+lrns = as.data.table(listLearners())
+all.pkgs = stri_split(lrns$package, fixed = ",")
+
+# get download numbers for all packages
+all.downloads = cran_downloads(packages = unique(unlist(all.pkgs)), when = "last-month")
+all.downloads = as.data.table(all.downloads)
+monthly.downloads = all.downloads[, list(monthly = sum(count)), by = package]
+
+# use minimal download number as representation
+lrn.downloads = sapply(all.pkgs, function(pkgs) {
+  monthly.downloads[package %in% pkgs, min(monthly)]
+})
+
+lrns$downloads = lrn.downloads
+
+lrns[order(downloads, decreasing = TRUE), .(class, name, package, downloads)]
+
+# Take only one representative per name and package
+lrns.small = lrns[order(downloads, decreasing = TRUE), .SD[1,], by = .(name, package)]
+lrns.small[1:20, .(class, name, package, downloads)]