Skip to content
This repository has been archived by the owner on Jan 6, 2022. It is now read-only.

Commit

Permalink
working end
Browse files Browse the repository at this point in the history
  • Loading branch information
jakob-r committed Mar 30, 2017
1 parent 0396bb8 commit f6726b8
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 5 deletions.
4 changes: 2 additions & 2 deletions R/getDefaultParSetValues.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,13 @@ getDefaultParSetValues = function() {
## not compared to caret
# random forest (only mtry in caret)
classif.randomForest = makeParamSet(
makeNumericParam("ntree", lower = 0, upper = 7, trafo = function(x) round(2^x * 10), default = log2(500/10)),
makeNumericParam("ntree", lower = log2(10/10), upper = log2(1000/10), trafo = function(x) round(2^x * 10), default = log2(500/10)),
makeIntegerParam("nodesize", lower = 1, upper = 10, default = 1),
makeIntegerParam("mtry", lower = 1L, upper = expression(p), default = expression(floor(sqrt(p)))),
keys = "p"
),
regr.randomForest = makeParamSet(
makeNumericParam("ntree", lower = 0, upper = 7, trafo = function(x) round(2^x * 10), default = log2(500/10)),
makeNumericParam("ntree", lower = log2(10/10), upper = log2(1000/10), trafo = function(x) round(2^x * 10), default = log2(500/10)),
makeIntegerParam("nodesize", lower = 1, upper = 10, default = 1),
makeIntegerParam(id = "mtry", lower = 1L, upper = expression(p), default = expression(max(floor(p/3), 1))),
keys = "p"
Expand Down
31 changes: 28 additions & 3 deletions meta/benchmark_caret_mlrHyperopt.R
Original file line number Diff line number Diff line change
Expand Up @@ -124,14 +124,39 @@ res = merge(res, lrns2, all.x = TRUE, by = "learner")
res[!is.na(mlr), learner := mlr, ]
# Visualizing Results
library(ggplot2)
res$time = as.numeric(res$time, units = "secs")
g = ggplot(data = res, aes(x = paste(algorithm, search, budget), y = measure, fill = paste(algorithm,search)))
g + geom_boxplot() + facet_grid(problem~learner, scales = "free")
g = ggplot(data = res, aes(x = measure, y = time, color = algorithm))
g + geom_point() + facet_grid(problem~learner)
g = ggplot(data = res, aes(x = measure, y = time, color = algorithm, size = as.factor(budget)))
g + geom_point(alpha = 0.1) + facet_grid(learner~problem, scales = "free") + scale_y_log10()
# extract the good parameter settings


# Detailed Analysis
res.list = reduceResultsList()
res.list = reduceResultsList(ids = res[algorithm == "mlrHyperopt", job.id[1:10]])
res.x = reduceResultsDataTable(fun = function(job, res) if(!is.null(res$model$bestTune)) res$model$bestTune else res$model$hyperopt.res$x, fill = TRUE)
res.x = merge(res.x, getJobPars(res), all.y = FALSE)
res.x.b = res.x
hifu = function(x) {
if (all(is.na(x))) {
x[1:2]
} else if (is.integer(x)) {
as.integer(range(x, na.rm = TRUE))
} else if (is.numeric(x)) {
range(x, na.rm = TRUE)
} else if (is.factor(x) | is.character(x)) {
names(sort(table(x), decreasing = TRUE))[1:2]
} else {
x[1:2]
}
}
res.x[budget > 10 & algorithm == "caret", lapply(.SD, hifu), by = .(learner)]
id.vars = c("algorithm", "fold", "learner", "budget", "search", "problem", "job.id")
col.numeric = setdiff(names(which(sapply(res.x, is.numeric))), id.vars)
m.res.x = melt(res.x[,c(id.vars, col.numeric),with = FALSE], id.vars = id.vars)
m.res.x[variable %in% c("C", "sigma"), value := log2(value)]
g = ggplot(m.res.x, mapping = aes(y = value, x = algorithm, color = learner))
g + geom_violin() + geom_point(position = position_jitter(width = 0.2, height = 0)) + facet_wrap(~variable, scales = "free")
good.caret = res.list[[10]]
good.mlrHyper = res.list[[20]]
good.caret$model$results
Expand Down
28 changes: 28 additions & 0 deletions meta/popular_learners_mlr.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# find most popular learners in mlr
# install_github("metacran/cranlogs")
library(mlr)
library(stringi)
library(cranlogs)
library(data.table)

# obtain used packages for all learners
lrns = as.data.table(listLearners())
all.pkgs = stri_split(lrns$package, fixed = ",")

# get download numbers for all packages
all.downloads = cran_downloads(packages = unique(unlist(all.pkgs)), when = "last-month")
all.downloads = as.data.table(all.downloads)
monthly.downloads = all.downloads[, list(monthly = sum(count)), by = package]

# use minimal download number as representation
lrn.downloads = sapply(all.pkgs, function(pkgs) {
monthly.downloads[package %in% pkgs, min(monthly)]
})

lrns$downloads = lrn.downloads

lrns[order(downloads, decreasing = TRUE), .(class, name, package, downloads)]

# Take only one representative per name and package
lrns.small = lrns[order(downloads, decreasing = TRUE), .SD[1,], by = .(name, package)]
lrns.small[1:20, .(class, name, package, downloads)]

0 comments on commit f6726b8

Please sign in to comment.