Skip to content

Commit

Permalink
Merge pull request #31 from ledell/master
Browse files Browse the repository at this point in the history
Added regression functionality to h2o.ensemble.
  • Loading branch information
tomkraljevic committed Oct 22, 2014
2 parents f249955 + 484caf0 commit 43f653e
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 9 deletions.
1 change: 1 addition & 0 deletions R/ensemble/README.md
Expand Up @@ -28,6 +28,7 @@ R CMD INSTALL h2oEnsemble-package


## Known Bugs
- This package is incompatible with R 3.0.0-3.1.0 due to a [parser bug](https://bugs.r-project.org/bugzilla3/show_bug.cgi?id=15753) in R. Upgrade to R 3.1.1 or greater to resolve the issue. It may work on earlier versions of R but has not been tested.
- Sometimes while executing `h2o.ensemble`, the code hangs due to a communication issue with H2O. You may see something like this. To fix, restart R.
```
GET /Cloud.json HTTP/1.1
Expand Down
20 changes: 13 additions & 7 deletions R/ensemble/h2oEnsemble-package/R/ensemble.R
Expand Up @@ -61,7 +61,7 @@ function(x, y, data, family = "binomial",
if (grepl("^SL.", metalearner)) {
# this is very hacky and should be used only for testing until we get the h2o metalearner functions sorted out...
familyFun <- get(family, mode = "function", envir = parent.frame())
Ztmp <- subset(Z, select=-c(fold_id, Class))
Ztmp <- Z[, -which(names(Z) %in% c("fold_id", y))]
runtime$metalearning <- system.time(metafit <- match.fun(metalearner)(Y=as.data.frame(data[,c(y)])[,1], X=Ztmp, newX=Ztmp,
family=familyFun, id=seq(N), obsWeights=rep(1,N)), gcFirst=FALSE)
} else {
Expand Down Expand Up @@ -119,9 +119,11 @@ function(x, y, data, family = "binomial",
print(sprintf("Cross-validating learner %s: fold %s", idxs$l[i], idxs$v[i]))
if (is.numeric(seed)) set.seed(seed) #If seed is specified, set seed prior to next step
fit <- match.fun(learner[idxs$l[i]])(y=y, x=xcols, data=data[data$fold_id!=idxs$v[i]], family=family)
# Regarding preds assignment below: This is hardcoded for binary outcome (ie. we are grabbing the X1 column)
# Probably need to modify this line so that it also works for regression
preds <- as.data.frame(h2o.predict(fit, data[data$fold_id==idxs$v[i]]))$X1
if (family == "binomial") {
preds <- as.data.frame(h2o.predict(fit, data[data$fold_id==idxs$v[i]]))$X1
} else {
preds <- as.data.frame(h2o.predict(fit, data[data$fold_id==idxs$v[i]]))$predict
}
# Note: column subsetting not supported yet in H2OParsedData object however,
# if we can enable that, then it is probably better to insert the preds into
# a H2OParsedData object instead of returning 'preds' and bringing into R memory.
Expand Down Expand Up @@ -212,9 +214,13 @@ predict.h2o.ensemble <-
L <- length(object$basefits)
basepreddf <- as.data.frame(matrix(NA, nrow = nrow(newdata), ncol = L))
for (l in seq(L)) {
# This is hardcoded ($X1) for binary classification, should change this
basepreddf[, l] <- as.data.frame(do.call('h2o.predict', list(object = object$basefits[[l]],
newdata = newdata)))$X1
if (object$family == "binomial") {
basepreddf[, l] <- as.data.frame(do.call('h2o.predict', list(object = object$basefits[[l]],
newdata = newdata)))$X1
} else {
basepreddf[, l] <- as.data.frame(do.call('h2o.predict', list(object = object$basefits[[l]],
newdata = newdata)))$predict
}
}
names(basepreddf) <- names(object$basefits)
basepreddf[basepreddf < object$ylim[1]] <- object$ylim[1] #Enforce bounds
Expand Down
5 changes: 3 additions & 2 deletions R/ensemble/h2oEnsemble-package/R/wrappers.R
Expand Up @@ -5,7 +5,7 @@
# like "x, y, data, family", so that the ensemble code can be written more cleanly.


h2o.glm.wrapper <- function(x, y, data, key = "", family = "binomial", link = "logit", nfolds = 0, alpha = 0.5, nlambda = -1,
h2o.glm.wrapper <- function(x, y, data, key = "", family = "binomial", link, nfolds = 0, alpha = 0.5, nlambda = -1,
lambda.min.ratio = -1, lambda = 1e-5, epsilon = 1e-4, standardize = TRUE,
prior, variable_importances = FALSE, use_all_factor_levels = FALSE,
tweedie.p = ifelse(family == 'tweedie', 1.5, as.numeric(NA)), iter.max = 100,
Expand Down Expand Up @@ -44,7 +44,8 @@ h2o.randomForest.wrapper <- function(x, y, data, key = "", family = "binomial",
ntree = ntree, depth = depth, sample.rate = sample.rate, nbins = nbins, seed = seed,
importance = importance, nfolds = nfolds, validation = validation, nodesize = nodesize,
balance.classes = balance.classes, max.after.balance.size = max.after.balance.size,
doGrpSplit = doGrpSplit, verbose = verbose, oobee = oobee, stat.type = stat.type, type = type)
doGrpSplit = doGrpSplit, verbose = verbose, oobee = oobee, stat.type = stat.type,
type = ifelse(family=="binomial", type, "BigData"))
}


Expand Down

0 comments on commit 43f653e

Please sign in to comment.