diff --git a/R/ensemble/README.md b/R/ensemble/README.md index 49fd51cb7c..b0046542dc 100644 --- a/R/ensemble/README.md +++ b/R/ensemble/README.md @@ -28,6 +28,7 @@ R CMD INSTALL h2oEnsemble-package ## Known Bugs +- This package is incompatible with R 3.0.0-3.1.0 due to a [parser bug](https://bugs.r-project.org/bugzilla3/show_bug.cgi?id=15753) in R. Upgrade to R 3.1.1 or greater to resolve the issue. It may work on earlier versions of R but has not been tested. - Sometimes while executing `h2o.ensemble`, the code hangs due to a communication issue with H2O. You may see something like this. To fix, restart R. ``` GET /Cloud.json HTTP/1.1 diff --git a/R/ensemble/h2oEnsemble-package/R/ensemble.R b/R/ensemble/h2oEnsemble-package/R/ensemble.R index ef63ae207a..2a87e46c1d 100644 --- a/R/ensemble/h2oEnsemble-package/R/ensemble.R +++ b/R/ensemble/h2oEnsemble-package/R/ensemble.R @@ -61,7 +61,7 @@ function(x, y, data, family = "binomial", if (grepl("^SL.", metalearner)) { # this is very hacky and should be used only for testing until we get the h2o metalearner functions sorted out... familyFun <- get(family, mode = "function", envir = parent.frame()) - Ztmp <- subset(Z, select=-c(fold_id, Class)) + Ztmp <- Z[, -which(names(Z) %in% c("fold_id", y))] runtime$metalearning <- system.time(metafit <- match.fun(metalearner)(Y=as.data.frame(data[,c(y)])[,1], X=Ztmp, newX=Ztmp, family=familyFun, id=seq(N), obsWeights=rep(1,N)), gcFirst=FALSE) } else { @@ -119,9 +119,11 @@ function(x, y, data, family = "binomial", print(sprintf("Cross-validating learner %s: fold %s", idxs$l[i], idxs$v[i])) if (is.numeric(seed)) set.seed(seed) #If seed is specified, set seed prior to next step fit <- match.fun(learner[idxs$l[i]])(y=y, x=xcols, data=data[data$fold_id!=idxs$v[i]], family=family) - # Regarding preds assignment below: This is hardcoded for binary outcome (ie. we are grabbing the X1 column) - # Probably need to modify this line so that it also works for regression - preds <- as.data.frame(h2o.predict(fit, data[data$fold_id==idxs$v[i]]))$X1 + if (family == "binomial") { + preds <- as.data.frame(h2o.predict(fit, data[data$fold_id==idxs$v[i]]))$X1 + } else { + preds <- as.data.frame(h2o.predict(fit, data[data$fold_id==idxs$v[i]]))$predict + } # Note: column subsetting not supported yet in H2OParsedData object however, # if we can enable that, then it is probably better to insert the preds into # a H2OParsedData object instead of returning 'preds' and bringing into R memory. @@ -212,9 +214,13 @@ predict.h2o.ensemble <- L <- length(object$basefits) basepreddf <- as.data.frame(matrix(NA, nrow = nrow(newdata), ncol = L)) for (l in seq(L)) { - # This is hardcoded ($X1) for binary classification, should change this - basepreddf[, l] <- as.data.frame(do.call('h2o.predict', list(object = object$basefits[[l]], - newdata = newdata)))$X1 + if (object$family == "binomial") { + basepreddf[, l] <- as.data.frame(do.call('h2o.predict', list(object = object$basefits[[l]], + newdata = newdata)))$X1 + } else { + basepreddf[, l] <- as.data.frame(do.call('h2o.predict', list(object = object$basefits[[l]], + newdata = newdata)))$predict + } } names(basepreddf) <- names(object$basefits) basepreddf[basepreddf < object$ylim[1]] <- object$ylim[1] #Enforce bounds diff --git a/R/ensemble/h2oEnsemble-package/R/wrappers.R b/R/ensemble/h2oEnsemble-package/R/wrappers.R index 4dc5f2fe18..112a4ebd8c 100644 --- a/R/ensemble/h2oEnsemble-package/R/wrappers.R +++ b/R/ensemble/h2oEnsemble-package/R/wrappers.R @@ -5,7 +5,7 @@ # like "x, y, data, family", so that the ensemble code can be written more cleanly. -h2o.glm.wrapper <- function(x, y, data, key = "", family = "binomial", link = "logit", nfolds = 0, alpha = 0.5, nlambda = -1, +h2o.glm.wrapper <- function(x, y, data, key = "", family = "binomial", link, nfolds = 0, alpha = 0.5, nlambda = -1, lambda.min.ratio = -1, lambda = 1e-5, epsilon = 1e-4, standardize = TRUE, prior, variable_importances = FALSE, use_all_factor_levels = FALSE, tweedie.p = ifelse(family == 'tweedie', 1.5, as.numeric(NA)), iter.max = 100, @@ -44,7 +44,8 @@ h2o.randomForest.wrapper <- function(x, y, data, key = "", family = "binomial", ntree = ntree, depth = depth, sample.rate = sample.rate, nbins = nbins, seed = seed, importance = importance, nfolds = nfolds, validation = validation, nodesize = nodesize, balance.classes = balance.classes, max.after.balance.size = max.after.balance.size, - doGrpSplit = doGrpSplit, verbose = verbose, oobee = oobee, stat.type = stat.type, type = type) + doGrpSplit = doGrpSplit, verbose = verbose, oobee = oobee, stat.type = stat.type, + type = ifelse(family=="binomial", type, "BigData")) }