Permalink
Switch branches/tags
last_OK jenkins-tomk-hadoop-1 jenkins-tomas_jenkins-7 jenkins-tomas_jenkins-6 jenkins-tomas_jenkins-5 jenkins-tomas_jenkins-4 jenkins-tomas_jenkins-3 jenkins-tomas_jenkins-2 jenkins-tomas_jenkins-1 jenkins-sample-docs-3 jenkins-sample-docs-2 jenkins-sample-docs-1 jenkins-rel-wright-7 jenkins-rel-wright-6 jenkins-rel-wright-5 jenkins-rel-wright-4 jenkins-rel-wright-3 jenkins-rel-wright-2 jenkins-rel-wright-1 jenkins-rel-wolpert-11 jenkins-rel-wolpert-10 jenkins-rel-wolpert-9 jenkins-rel-wolpert-8 jenkins-rel-wolpert-7 jenkins-rel-wolpert-6 jenkins-rel-wolpert-5 jenkins-rel-wolpert-4 jenkins-rel-wolpert-3 jenkins-rel-wolpert-2 jenkins-rel-wolpert-1 jenkins-rel-wheeler-12 jenkins-rel-wheeler-11 jenkins-rel-wheeler-10 jenkins-rel-wheeler-9 jenkins-rel-wheeler-8 jenkins-rel-wheeler-7 jenkins-rel-wheeler-6 jenkins-rel-wheeler-5 jenkins-rel-wheeler-4 jenkins-rel-wheeler-3 jenkins-rel-wheeler-2 jenkins-rel-wheeler-1 jenkins-rel-weierstrass-7 jenkins-rel-weierstrass-6 jenkins-rel-weierstrass-5 jenkins-rel-weierstrass-4 jenkins-rel-weierstrass-3 jenkins-rel-weierstrass-2 jenkins-rel-weierstrass-1 jenkins-rel-vapnik-1 jenkins-rel-vajda-4 jenkins-rel-vajda-3 jenkins-rel-vajda-2 jenkins-rel-vajda-1 jenkins-rel-ueno-12 jenkins-rel-ueno-11 jenkins-rel-ueno-10 jenkins-rel-ueno-9 jenkins-rel-ueno-8 jenkins-rel-ueno-7 jenkins-rel-ueno-6 jenkins-rel-ueno-5 jenkins-rel-ueno-4 jenkins-rel-ueno-3 jenkins-rel-ueno-2 jenkins-rel-ueno-1 jenkins-rel-tverberg-6 jenkins-rel-tverberg-5 jenkins-rel-tverberg-4 jenkins-rel-tverberg-3 jenkins-rel-tverberg-2 jenkins-rel-tverberg-1 jenkins-rel-tutte-2 jenkins-rel-tutte-1 jenkins-rel-turnbull-2 jenkins-rel-turnbull-1 jenkins-rel-turing-10 jenkins-rel-turing-9 jenkins-rel-turing-8 jenkins-rel-turing-7 jenkins-rel-turing-6 jenkins-rel-turing-5 jenkins-rel-turing-4 jenkins-rel-turing-3 jenkins-rel-turing-2 jenkins-rel-turing-1 jenkins-rel-turin-4 jenkins-rel-turin-3 jenkins-rel-turin-2 jenkins-rel-turin-1 jenkins-rel-turchin-11 jenkins-rel-turchin-10 jenkins-rel-turchin-9 jenkins-rel-turchin-8 jenkins-rel-turchin-7 jenkins-rel-turchin-6 jenkins-rel-turchin-5 jenkins-rel-turchin-4 jenkins-rel-turchin-3 jenkins-rel-turchin-2
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
82 lines (64 sloc) 2.95 KB
# An example of binary classification on a local machine using h2o.stack
library(h2oEnsemble) # Requires version >=0.1.8 of h2oEnsemble
h2o.init(nthreads = -1) # Start an H2O cluster with nthreads = num cores on your machine
# Import a sample binary outcome train/test set into R
train <- h2o.importFile("https://s3.amazonaws.com/erin-data/higgs/higgs_train_5k.csv")
test <- h2o.importFile("https://s3.amazonaws.com/erin-data/higgs/higgs_test_5k.csv")
y <- "response"
x <- setdiff(names(train), y)
family <- "binomial"
#For binary classification, response should be a factor
train[,y] <- as.factor(train[,y])
test[,y] <- as.factor(test[,y])
# The h2o.stack function is an alternative to the h2o.ensemble function, which
# allows the user to specify H2O models individually and then stack them together
# at a later time. Saved models, re-loaded from disk, can also be stacked.
# The base models must use identical cv folds; this can be achieved in two ways:
# 1. they be specified explicitly by using the fold_column argument, or
# 2. use same value for `nfolds` and set `fold_assignment = "Modulo"`
nfolds <- 5
glm1 <- h2o.glm(x = x, y = y, family = family,
training_frame = train,
nfolds = nfolds,
fold_assignment = "Modulo",
keep_cross_validation_predictions = TRUE)
gbm1 <- h2o.gbm(x = x, y = y, distribution = "bernoulli",
training_frame = train,
seed = 1,
nfolds = nfolds,
fold_assignment = "Modulo",
keep_cross_validation_predictions = TRUE)
rf1 <- h2o.randomForest(x = x, y = y, #distribution = "bernoulli",
training_frame = train,
seed = 1,
nfolds = nfolds,
fold_assignment = "Modulo",
keep_cross_validation_predictions = TRUE)
dl1 <- h2o.deeplearning(x = x, y = y, distribution = "bernoulli",
training_frame = train,
nfolds = nfolds,
fold_assignment = "Modulo",
keep_cross_validation_predictions = TRUE)
models <- list(glm1, gbm1, rf1, dl1)
metalearner <- "h2o.glm.wrapper"
stack <- h2o.stack(models = models,
response_frame = train[,y],
metalearner = metalearner,
seed = 1,
keep_levelone_data = TRUE)
# Compute test set performance:
perf <- h2o.ensemble_performance(stack, newdata = test)
print(perf)
#Base learner performance, sorted by metric:
# learner AUC
#1 GLM_model_R_1459208808410_23420 0.6870772
#4 DeepLearning_model_R_1459208808410_25042 0.7167811
#3 DRF_model_R_1459208808410_24305 0.7697072
#2 GBM_model_R_1459208808410_23438 0.7817096
#
#
#H2O Ensemble Performance on <newdata>:
#----------------
#Family: binomial
#
#Ensemble performance (AUC): 0.787814391608448