Permalink
Switch branches/tags
last_OK jenkins-tomk-hadoop-1 jenkins-tomas_jenkins-7 jenkins-tomas_jenkins-6 jenkins-tomas_jenkins-5 jenkins-tomas_jenkins-4 jenkins-tomas_jenkins-3 jenkins-tomas_jenkins-2 jenkins-tomas_jenkins-1 jenkins-sample-docs-3 jenkins-sample-docs-2 jenkins-sample-docs-1 jenkins-rel-wright-10 jenkins-rel-wright-9 jenkins-rel-wright-8 jenkins-rel-wright-7 jenkins-rel-wright-6 jenkins-rel-wright-5 jenkins-rel-wright-4 jenkins-rel-wright-3 jenkins-rel-wright-2 jenkins-rel-wright-1 jenkins-rel-wolpert-11 jenkins-rel-wolpert-10 jenkins-rel-wolpert-9 jenkins-rel-wolpert-8 jenkins-rel-wolpert-7 jenkins-rel-wolpert-6 jenkins-rel-wolpert-5 jenkins-rel-wolpert-4 jenkins-rel-wolpert-3 jenkins-rel-wolpert-2 jenkins-rel-wolpert-1 jenkins-rel-wheeler-12 jenkins-rel-wheeler-11 jenkins-rel-wheeler-10 jenkins-rel-wheeler-9 jenkins-rel-wheeler-8 jenkins-rel-wheeler-7 jenkins-rel-wheeler-6 jenkins-rel-wheeler-5 jenkins-rel-wheeler-4 jenkins-rel-wheeler-3 jenkins-rel-wheeler-2 jenkins-rel-wheeler-1 jenkins-rel-weierstrass-7 jenkins-rel-weierstrass-6 jenkins-rel-weierstrass-5 jenkins-rel-weierstrass-4 jenkins-rel-weierstrass-3 jenkins-rel-weierstrass-2 jenkins-rel-weierstrass-1 jenkins-rel-vapnik-1 jenkins-rel-vajda-4 jenkins-rel-vajda-3 jenkins-rel-vajda-2 jenkins-rel-vajda-1 jenkins-rel-ueno-12 jenkins-rel-ueno-11 jenkins-rel-ueno-10 jenkins-rel-ueno-9 jenkins-rel-ueno-8 jenkins-rel-ueno-7 jenkins-rel-ueno-6 jenkins-rel-ueno-5 jenkins-rel-ueno-4 jenkins-rel-ueno-3 jenkins-rel-ueno-2 jenkins-rel-ueno-1 jenkins-rel-tverberg-6 jenkins-rel-tverberg-5 jenkins-rel-tverberg-4 jenkins-rel-tverberg-3 jenkins-rel-tverberg-2 jenkins-rel-tverberg-1 jenkins-rel-tutte-2 jenkins-rel-tutte-1 jenkins-rel-turnbull-2 jenkins-rel-turnbull-1 jenkins-rel-turing-10 jenkins-rel-turing-9 jenkins-rel-turing-8 jenkins-rel-turing-7 jenkins-rel-turing-6 jenkins-rel-turing-5 jenkins-rel-turing-4 jenkins-rel-turing-3 jenkins-rel-turing-2 jenkins-rel-turing-1 jenkins-rel-turin-4 jenkins-rel-turin-3 jenkins-rel-turin-2 jenkins-rel-turin-1 jenkins-rel-turchin-11 jenkins-rel-turchin-10 jenkins-rel-turchin-9 jenkins-rel-turchin-8 jenkins-rel-turchin-7 jenkins-rel-turchin-6 jenkins-rel-turchin-5
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
73 lines (62 sloc) 3.24 KB
setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f")))
source("../../../scripts/h2o-r-test-setup.R")
gbm.grid.test <- function() {
air.hex <- h2o.uploadFile(locate("smalldata/airlines/allyears2k_headers.zip"), destination_frame="air.hex")
print(summary(air.hex))
myX <- c("DayofMonth", "DayOfWeek")
# Specify grid hyper parameters
ntrees_opts <- c(5, 10, 15)
max_depth_opts <- c(2, 3, 4)
learn_rate_opts <- c(0.1, 0.2)
size_of_hyper_space <- length(ntrees_opts) * length(max_depth_opts) * length(learn_rate_opts)
hyper_params = list( ntrees = ntrees_opts, max_depth = max_depth_opts, learn_rate = learn_rate_opts)
air.grid <- h2o.grid("gbm", y = "IsDepDelayed", x = myX,
distribution="bernoulli",
training_frame = air.hex,
hyper_params = hyper_params)
print(air.grid)
expect_equal(length(air.grid@model_ids), size_of_hyper_space)
# Get models
grid_models <- lapply(air.grid@model_ids, function(mid) {
model = h2o.getModel(mid)
})
# Check expected number of models
expect_equal(length(grid_models), size_of_hyper_space)
expect_model_param(grid_models, "ntrees", ntrees_opts)
expect_model_param(grid_models, "max_depth", max_depth_opts)
expect_model_param(grid_models, "learn_rate", learn_rate_opts)
#
# test random/max_models search criterion: max_models
max_models <- 5
search_criteria = list(strategy = "RandomDiscrete", max_models = max_models, seed=1234)
air.grid <- h2o.grid("gbm", y = "IsDepDelayed", x = myX,
distribution="bernoulli",
training_frame = air.hex,
hyper_params = hyper_params,
search_criteria = search_criteria)
print(air.grid)
expect_equal(length(air.grid@model_ids), max_models)
# test random/max_models search criterion: asymptotic
search_criteria = list(strategy = "RandomDiscrete", stopping_metric = "AUTO", stopping_tolerance = 0.01, stopping_rounds = 3, seed=1234)
air.grid <- h2o.grid("gbm", y = "IsDepDelayed", x = myX,
distribution="bernoulli",
training_frame = air.hex,
hyper_params = hyper_params,
search_criteria = search_criteria,
nfolds = 5, fold_assignment = 'Modulo',
keep_cross_validation_predictions = TRUE,
seed = 5678)
print(air.grid)
expect_that(length(air.grid@model_ids) < size_of_hyper_space, is_true())
# stacker.grid <- h2o.grid("stackedensemble", y = "IsDepDelayed", x = myX,
# training_frame = air.hex,
# model_id = "my_ensemble",
# base_models = air.grid@model_ids)
stacker <- h2o.stackedEnsemble(x = myX, y = "IsDepDelayed", training_frame = air.hex,
model_id = "my_ensemble",
base_models = air.grid@model_ids)
predictions = h2o.predict(stacker, air.hex) # training data
print("preditions for ensemble are in: ")
print(h2o.getId(predictions))
}
doTest("GBM Grid Test: Airlines Smalldata", gbm.grid.test)