-
Notifications
You must be signed in to change notification settings - Fork 2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
h2o.feature_interaction can fail on cv models with early stopping #6605
Comments
JIRA Issue Details Jira Issue: PUBDEV-8808 |
I originally reported this issue on Jira. Reposting as a comment with better formatting: The following example shows that library(data.table)
library(ggplot2) # using to get the builtin diamonds dataset
library(h2o)
h2o.init()
h2o.getVersion()
# [1] "3.36.1.4"
diamonds <- ggplot2::diamonds
diamonds$cut <- factor(diamonds$cut, ordered = FALSE)
diamonds$color <- factor(diamonds$color, ordered = FALSE)
diamonds$clarity <- factor(diamonds$clarity, ordered = FALSE)
diamonds <- as.h2o(diamonds)
diamonds$expensive <- h2o.asfactor(ifelse(diamonds$price 5000, 1, 0))
d <- h2o.splitFrame(diamonds, seed = 987)
train <- d[[1]]
test <- d[[2]]
train$fold <- h2o.kfold_column(data = train, nfolds = 3, seed = 123)
params <- list( x = setdiff(names(diamonds), "expensive"), y = "expensive", fold_column = "fold", training_frame = as.name("train"), validation_frame = NULL, distribution = "bernoulli", learn_rate = 0.1, ntrees = 500, min_split_improvement = 1e-3, stopping_rounds = 3, stopping_tolerance = 0.001, seed = 456 )
my_gbm <- do.call(what = "h2o.gbm", args = params)
# works
h2o.feature_interaction(model = my_gbm)
my_cv_gbm <- h2o.getModel(my_gbm@model$cross_validation_models[[1]]$name)
h2o.feature_interaction(model = my_cv_gbm)
# ERROR: Unexpected HTTP Status code: 400 Bad Request (url = http://lxm0274:49898/3/FeatureInteraction)
# java.lang.IllegalArgumentException
# [1] "java.lang.IllegalArgumentException: Invalid tree index: 389. Tree index must be in range [0, 388]." " hex.tree.SharedTreeModel.getSharedTreeSubgraph(SharedTreeModel.java:846)"
# [3] " hex.tree.gbm.GBMModel.getFeatureInteractions(GBMModel.java:289)" " hex.tree.gbm.GBMModel.getFeatureInteractionsTable(GBMModel.java:304)"
# [5] " water.api.ModelsHandler.makeFeatureInteraction(ModelsHandler.java:181)" " sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)"
# [7] " sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)" " sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)"
# [9] " java.lang.reflect.Method.invoke(Method.java:498)" " water.api.Handler.handle(Handler.java:60)"
# [11] " water.api.RequestServer.serve(RequestServer.java:472)" " water.api.RequestServer.doGeneric(RequestServer.java:303)"
# [13] " water.api.RequestServer.doPost(RequestServer.java:227)" " javax.servlet.http.HttpServlet.service(HttpServlet.java:707)"
# [15] " javax.servlet.http.HttpServlet.service(HttpServlet.java:790)" " org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:865)"
# [17] " org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:535)" " org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:255)"
# [19] " org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1317)" " org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:203)"
# [21] " org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:473)" " org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:201)"
# [23] " org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1219)" " org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:144)"
# [25] " org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:126)" " org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)"
# [27] " water.webserver.jetty9.Jetty9ServerAdapter$LoginHandler.handle(Jetty9ServerAdapter.java:130)" " org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:126)"
# [29] " org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)" " org.eclipse.jetty.server.Server.handle(Server.java:531)"
# [31] " org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:352)" " org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:260)"
# [33] " org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:281)" " org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:102)"
# [35] " org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:118)" " org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:333)"
# [37] " org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:310)" " org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:168)"
# [39] " org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:126)" " org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:366)"
# [41] " org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:762)" " org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:680)"
# [43] " java.lang.Thread.run(Thread.java:748)"
# Error in .h2o.doSafeREST(h2oRestApiVersion = h2oRestApiVersion, urlSuffix = urlSuffix, :
# ERROR MESSAGE:
# Invalid tree index: 389. Tree index must be in range [0, 388]. |
The following example shows that {{h2o.feature_interaction}} can fail on cross-validation models. It appears to be related to early stopping. When setting {{stopping_rounds}} to 0, this didn’t come up. It seems likely related to this issue: [https://h2oai.atlassian.net/browse/PUBDEV-8625|https://h2oai.atlassian.net/browse/PUBDEV-8625|smart-link] It is trying to access trees that were ultimately dropped.
{code:r}library(data.table)
library(ggplot2) # using to get the builtin diamonds dataset
library(h2o)
h2o.init()
h2o.getVersion()
[1] "3.36.1.4"
diamonds <- ggplot2::diamonds
diamonds$cut <- factor(diamonds$cut, ordered = FALSE)
diamonds$color <- factor(diamonds$color, ordered = FALSE)
diamonds$clarity <- factor(diamonds$clarity, ordered = FALSE)
diamonds <- as.h2o(diamonds)
diamonds$expensive <- h2o.asfactor(ifelse(diamonds$price > 5000, 1, 0))
d <- h2o.splitFrame(diamonds, seed = 987)
train <- d[[1]]
test <- d[[2]]
train$fold <- h2o.kfold_column(data = train, nfolds = 3, seed = 123)
params <- list(
x = setdiff(names(diamonds), "expensive"),
y = "expensive",
fold_column = "fold",
training_frame = as.name("train"),
validation_frame = NULL,
distribution = "bernoulli",
learn_rate = 0.1,
ntrees = 500,
min_split_improvement = 1e-3,
stopping_rounds = 3,
stopping_tolerance = 0.001,
seed = 456
)
my_gbm <- do.call(what = "h2o.gbm", args = params)
works
h2o.feature_interaction(model = my_gbm)
my_cv_gbm <- h2o.getModel(my_gbm@model$cross_validation_models[[1]]$name)
h2o.feature_interaction(model = my_cv_gbm)
ERROR: Unexpected HTTP Status code: 400 Bad Request (url = http://lxm0274:49898/3/FeatureInteraction)
java.lang.IllegalArgumentException
[1] "java.lang.IllegalArgumentException: Invalid tree index: 389. Tree index must be in range [0, 388]." " hex.tree.SharedTreeModel.getSharedTreeSubgraph(SharedTreeModel.java:846)"
[3] " hex.tree.gbm.GBMModel.getFeatureInteractions(GBMModel.java:289)" " hex.tree.gbm.GBMModel.getFeatureInteractionsTable(GBMModel.java:304)"
[5] " water.api.ModelsHandler.makeFeatureInteraction(ModelsHandler.java:181)" " sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)"
[7] " sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)" " sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)"
[9] " java.lang.reflect.Method.invoke(Method.java:498)" " water.api.Handler.handle(Handler.java:60)"
[11] " water.api.RequestServer.serve(RequestServer.java:472)" " water.api.RequestServer.doGeneric(RequestServer.java:303)"
[13] " water.api.RequestServer.doPost(RequestServer.java:227)" " javax.servlet.http.HttpServlet.service(HttpServlet.java:707)"
[15] " javax.servlet.http.HttpServlet.service(HttpServlet.java:790)" " org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:865)"
[17] " org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:535)" " org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:255)"
[19] " org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1317)" " org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:203)"
[21] " org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:473)" " org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:201)"
[23] " org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1219)" " org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:144)"
[25] " org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:126)" " org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)"
[27] " water.webserver.jetty9.Jetty9ServerAdapter$LoginHandler.handle(Jetty9ServerAdapter.java:130)" " org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:126)"
[29] " org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)" " org.eclipse.jetty.server.Server.handle(Server.java:531)"
[31] " org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:352)" " org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:260)"
[33] " org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:281)" " org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:102)"
[35] " org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:118)" " org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:333)"
[37] " org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:310)" " org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:168)"
[39] " org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:126)" " org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:366)"
[41] " org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:762)" " org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:680)"
[43] " java.lang.Thread.run(Thread.java:748)"
Error in .h2o.doSafeREST(h2oRestApiVersion = h2oRestApiVersion, urlSuffix = urlSuffix, :
ERROR MESSAGE:
Invalid tree index: 389. Tree index must be in range [0, 388].{code}
The text was updated successfully, but these errors were encountered: