-
Notifications
You must be signed in to change notification settings - Fork 2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4078 from h2oai/zuzanao_PUBDEV-6754-Add_method_to…
…_provide_actual_ntree_value PUBDEV-6754: provide actual ntree value
- Loading branch information
Showing
4 changed files
with
168 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
69 changes: 69 additions & 0 deletions
69
h2o-py/tests/testdir_algos/sharedtree/pyunit_PUBDEV-6754_ntrees_actual_for_tree_algos.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
from builtins import range | ||
import sys | ||
sys.path.insert(1,"../../../") | ||
import h2o | ||
from tests import pyunit_utils | ||
from h2o.estimators.gbm import H2OGradientBoostingEstimator | ||
from h2o.estimators.isolation_forest import H2OIsolationForestEstimator | ||
from h2o.estimators.random_forest import H2ORandomForestEstimator | ||
from h2o.estimators.xgboost import H2OXGBoostEstimator | ||
|
||
def tree_algos_ntree_actual(): | ||
prostate = h2o.import_file(path=pyunit_utils.locate("smalldata/logreg/prostate.csv")) | ||
prostate[1] = prostate[1].asfactor() | ||
prostate.summary() | ||
ntrees_original = 1000 | ||
|
||
prostate_gbm = H2OGradientBoostingEstimator(nfolds=5,ntrees=ntrees_original, distribution="bernoulli", stopping_metric="MSE", stopping_tolerance=0.01, stopping_rounds=5) | ||
prostate_gbm.train(x=list(range(2,9)), y=1, training_frame=prostate) | ||
|
||
print("\n") | ||
print("GradientBoosting: number of trees set by user before building the model is:") | ||
print(ntrees_original) | ||
print("GradientBoosting: number of trees built with early-stopping is:") | ||
print(prostate_gbm.ntrees_actual()) | ||
|
||
assert prostate_gbm.ntrees_actual() < ntrees_original | ||
assert prostate_gbm.ntrees_actual() == prostate_gbm._model_json['output']['model_summary']['number_of_trees'][0] == prostate_gbm.summary()['number_of_trees'][0] | ||
|
||
|
||
prostate_if = H2OIsolationForestEstimator(sample_rate = 0.1, max_depth = 20, ntrees=ntrees_original, stopping_metric="anomalyscore", stopping_tolerance=0.01, stopping_rounds=5) | ||
prostate_if.train(x=list(range(2,9)), y=1, training_frame=prostate) | ||
|
||
print("\n") | ||
print("IsolationForest: number of trees set by user before building the model is:") | ||
print(ntrees_original) | ||
print("IsolationForest: number of trees built with early-stopping is:") | ||
print(prostate_if.ntrees_actual()) | ||
|
||
assert prostate_if.ntrees_actual() < ntrees_original | ||
assert prostate_if.ntrees_actual() == prostate_if._model_json['output']['model_summary']['number_of_trees'][0] == prostate_if.summary()['number_of_trees'][0] | ||
|
||
prostate_drf = H2ORandomForestEstimator(ntrees=ntrees_original, max_depth=20, min_rows=10, stopping_metric="auc", stopping_tolerance=0.01, stopping_rounds=5) | ||
prostate_drf.train(x=list(range(2,9)), y=1, training_frame=prostate) | ||
|
||
print("\n") | ||
print("RandomForest: number of trees set by user before building the model is:") | ||
print(ntrees_original) | ||
print("RandomForest: number of trees built with early-stopping is:") | ||
print(prostate_drf.ntrees_actual()) | ||
|
||
assert prostate_drf.ntrees_actual() < ntrees_original | ||
assert prostate_drf.ntrees_actual() == prostate_drf._model_json['output']['model_summary']['number_of_trees'][0] == prostate_drf.summary()['number_of_trees'][0] | ||
|
||
prostate_xgb = H2OXGBoostEstimator(distribution="auto", ntrees=ntrees_original, seed=1, stopping_metric="auc", stopping_tolerance=0.01, stopping_rounds=5) | ||
prostate_xgb.train(x=list(range(2,9)), y=1, training_frame=prostate) | ||
|
||
print("\n") | ||
print("XGBoost: number of trees set by user before building the model is:") | ||
print(ntrees_original) | ||
print("XGBoost: number of trees built with early-stopping is:") | ||
print(prostate_xgb.ntrees_actual()) | ||
|
||
assert prostate_xgb.ntrees_actual() < ntrees_original | ||
assert prostate_xgb.ntrees_actual() == prostate_xgb._model_json['output']['model_summary']['number_of_trees'][0] == prostate_xgb.summary()['number_of_trees'][0] | ||
|
||
if __name__ == "__main__": | ||
pyunit_utils.standalone_test(tree_algos_ntree_actual) | ||
else: | ||
tree_algos_ntree_actual() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
63 changes: 63 additions & 0 deletions
63
h2o-r/tests/testdir_algos/sharedtree/runit_PUBDEV-6754_ntrees_actual_for_tree_algos.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f"))) | ||
source("../../../scripts/h2o-r-test-setup.R") | ||
|
||
|
||
|
||
test.tree_algos.bernoulli <- function() { | ||
Log.info("Importing prostate.csv data...\n") | ||
prostate.hex <- h2o.uploadFile(locate("smalldata/logreg/prostate.csv"), destination_frame="prostate.hex") | ||
Log.info("Converting CAPSULE and RACE columns to factors...\n") | ||
prostate.hex$CAPSULE <- as.factor(prostate.hex$CAPSULE) | ||
prostate.hex$RACE <- as.factor(prostate.hex$RACE) | ||
Log.info("Summary of prostate.csv from H2O:\n") | ||
print(summary(prostate.hex)) | ||
|
||
# Import csv data for R to use in comparison | ||
prostate.data <- read.csv(locate("smalldata/logreg/prostate.csv"), header = TRUE) | ||
prostate.data$RACE <- as.factor(prostate.data$RACE) | ||
Log.info("Summary of prostate.csv from R:\n") | ||
print(summary(prostate.data)) | ||
|
||
# Train H2O GBM Model: | ||
ntrees <- 1000 | ||
Log.info(paste("H2O GBM with parameters:\nnfolds = 5, distribution = 'bernoulli', ntrees = ", ntrees, ", stopping_metric=\"MSE\", stopping_tolerance=0.01, stopping_rounds=5\n", sep = "")) | ||
prostate_gbm.h2o <- h2o.gbm(x = 3:9, y = "CAPSULE", training_frame = prostate.hex, nfolds = 5, distribution = "bernoulli", ntrees = ntrees, stopping_metric="MSE", stopping_tolerance=0.01, stopping_rounds=5) | ||
|
||
Log.info("GBM Model: number of trees set by user before building the model is:"); print(ntrees) | ||
Log.info("GBM Model: number of trees built with early-stopping is:"); print(h2o.get_ntrees_actual(prostate_gbm.h2o)) | ||
|
||
expect_true(h2o.get_ntrees_actual(prostate_gbm.h2o) < ntrees) | ||
expect_equal(h2o.get_ntrees_actual(prostate_gbm.h2o), prostate_gbm.h2o@model$model_summary['number_of_trees'][,1]) | ||
|
||
# Train H2O Isolation Forest Model: | ||
Log.info(paste("H2O Isolation Forest with parameters:\nsample_rate = 0.1, max_depth = 20, ntrees = ", ntrees, ", stopping_metric=\"AUTO\", stopping_tolerance=0.01, stopping_rounds=5\n", sep = "")) | ||
prostate_if.h2o <- h2o.isolationForest(sample_rate = 0.1, max_depth = 20, training_frame = prostate.hex, ntrees=ntrees, stopping_metric="AUTO", stopping_tolerance=0.01, stopping_rounds=5) | ||
|
||
Log.info("Isolation Forest Model: number of trees set by user before building the model is:"); print(ntrees) | ||
Log.info("Isolation Forest Model: number of trees built with early-stopping is:"); print(h2o.get_ntrees_actual(prostate_if.h2o)) | ||
|
||
expect_true(h2o.get_ntrees_actual(prostate_if.h2o) < ntrees) | ||
expect_equal(h2o.get_ntrees_actual(prostate_if.h2o), prostate_if.h2o@model$model_summary['number_of_trees'][,1]) | ||
|
||
# Train H2O Random Forest Model: | ||
Log.info(paste("H2O Random Forest with parameters:\nx = 1:4, y = 5,max_depth=20, min_rows=10, ntrees = ", ntrees, ", stopping_metric=\"AUTO\", stopping_tolerance=0.01, stopping_rounds=5\n", sep = "")) | ||
prostate_rf.h2o <- h2o.randomForest(x = 1:4, y = 5, ntrees=ntrees, max_depth=20, min_rows=10, training_frame = prostate.hex, stopping_metric="AUTO", stopping_tolerance=0.01, stopping_rounds=5) | ||
|
||
Log.info("Random Forest Model: number of trees set by user before building the model is:"); print(ntrees) | ||
Log.info("Random Forest Model: number of trees built with early-stopping is:"); print(h2o.get_ntrees_actual(prostate_rf.h2o)) | ||
|
||
expect_true(h2o.get_ntrees_actual(prostate_rf.h2o) < ntrees) | ||
expect_equal(h2o.get_ntrees_actual(prostate_rf.h2o), prostate_rf.h2o@model$model_summary['number_of_trees'][,1]) | ||
|
||
# Train H2O XGBoost Model: | ||
Log.info(paste("H2O XGBoost with parameters:\nx = 1:4, y = 5,distribution=\"auto\", seed=1, ntrees = ", ntrees, ", stopping_metric=\"AUTO\", stopping_tolerance=0.01, stopping_rounds=5\n", sep = "")) | ||
prostate_xgb.h2o <- h2o.xgboost(x = 1:4, y = 5, distribution="AUTO",training_frame = prostate.hex, ntrees=ntrees, seed=1, stopping_metric="deviance", stopping_tolerance=0.01, stopping_rounds=1) | ||
Log.info("XGBoost Model: number of trees set by user before building the model is:"); print(ntrees) | ||
Log.info("XGBoost Model: number of trees built with early-stopping is:"); print(h2o.get_ntrees_actual(prostate_xgb.h2o)) | ||
|
||
expect_true(h2o.get_ntrees_actual(prostate_xgb.h2o) < ntrees) | ||
expect_equal(h2o.get_ntrees_actual(prostate_xgb.h2o), prostate_xgb.h2o@model$model_summary['number_of_trees'][,1]) | ||
|
||
} | ||
|
||
doTest("GBM Test: provide actual ntree value", test.tree_algos.bernoulli) |