You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
X = np.vstack([rng.normal(cent, 1, size=(sz, 2)) for cent in class_centers])
X = np.column_stack([X, X.sum(1)])
y = np.concatenate([np.full(sz, cls) for cls in range(len(class_centers))])
train = h2o.H2OFrame(np.column_stack([y, X]))
train[train.col_names[0]] = train[train.col_names[0]].asfactor()
return train
/usr/local/lib/python3.8/dist-packages/h2o/job.py in poll(self, poll_updates)
77 if self.status == "FAILED":
78 if (isinstance(self.job, dict)) and ("stacktrace" in list(self.job)):
---> 79 raise EnvironmentError("Job with key {} failed with an exception: {}\nstacktrace: "
80 "\n{}".format(self.job_key, self.exception, self.job["stacktrace"]))
81 else:
OSError: Job with key $03017f00000132d4ffffffff$_a2e0b569ccccb207ccfec8ddbc574e72 failed with an exception: java.lang.ArrayIndexOutOfBoundsException: Index 3 out of bounds for length 3
stacktrace:
java.lang.ArrayIndexOutOfBoundsException: Index 3 out of bounds for length 3
at water.util.ArrayUtils.innerProduct(ArrayUtils.java:72)
at hex.optimization.OptimizationUtils$MoreThuente.evaluate(OptimizationUtils.java:334)
at hex.glm.GLM$GLMDriver.fitIRLSM_multinomial(GLM.java:1593)
at hex.glm.GLM$GLMDriver.fitModel(GLM.java:1978)
at hex.glm.GLM$GLMDriver.computeSubmodel(GLM.java:2437)
at hex.glm.GLM$GLMDriver.doCompute(GLM.java:2573)
at hex.glm.GLM$GLMDriver.computeImpl(GLM.java:2471)
at hex.ModelBuilder$Driver.compute2(ModelBuilder.java:246)
at hex.glm.GLM$GLMDriver.compute2(GLM.java:1154)
at water.H2O$H2OCountedCompleter.compute(H2O.java:1637)
at jsr166y.CountedCompleter.exec(CountedCompleter.java:468)
at jsr166y.ForkJoinTask.doExec(ForkJoinTask.java:263)
at jsr166y.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:974)
at jsr166y.ForkJoinPool.runWorker(ForkJoinPool.java:1477)
at jsr166y.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:104)
{noformat}
The text was updated successfully, but these errors were encountered:
Seems as though training fails when
remove_collinear_columns
is enabled (and multicollinearity is present) with a multinomial family glm.Here's a SSCCE to reproduce the problem.
{code:python}
from h2o.estimators import H2OGeneralizedLinearEstimator
import numpy as np
h2o.init()
def generate_data(class_centers, sz, seed = 23):
rng = np.random.default_rng(seed)
train = generate_data([0,5,10], 100)
mdl = H2OGeneralizedLinearEstimator(solver='IRLSM', family='multinomial', link='family_default', seed=76, lambda_=[0], max_iterations=100000, beta_epsilon=1e-7, early_stopping=False, standardize=True, remove_collinear_columns=True, max_runtime_secs=30)
mdl.start(x=train.col_names[1:], y=train.col_names[0], training_frame=train)
mdl.join()
{code}
and the results
{noformat}
OSError Traceback (most recent call last)
/notebooks/load_data.py in
17 mdl = H2OGeneralizedLinearEstimator(solver='IRLSM', family='multinomial', link='family_default', seed=76, lambda_=[0], max_iterations=100000, beta_epsilon=1e-7, early_stopping=False, standardize=True, remove_collinear_columns=True, max_runtime_secs=30)
18 mdl.start(x=train.col_names[1:], y=train.col_names[0], training_frame=train)
---> 19 mdl.join()
20
/usr/local/lib/python3.8/dist-packages/h2o/estimators/estimator_base.py in join(self)
84 """Wait until job's completion."""
85 self._future = False
---> 86 self._job.poll()
87 model_key = self._job.dest_key
88 self._job = None
/usr/local/lib/python3.8/dist-packages/h2o/job.py in poll(self, poll_updates)
77 if self.status == "FAILED":
78 if (isinstance(self.job, dict)) and ("stacktrace" in list(self.job)):
---> 79 raise EnvironmentError("Job with key {} failed with an exception: {}\nstacktrace: "
80 "\n{}".format(self.job_key, self.exception, self.job["stacktrace"]))
81 else:
OSError: Job with key $03017f00000132d4ffffffff$_a2e0b569ccccb207ccfec8ddbc574e72 failed with an exception: java.lang.ArrayIndexOutOfBoundsException: Index 3 out of bounds for length 3
stacktrace:
java.lang.ArrayIndexOutOfBoundsException: Index 3 out of bounds for length 3
at water.util.ArrayUtils.innerProduct(ArrayUtils.java:72)
at hex.optimization.OptimizationUtils$MoreThuente.evaluate(OptimizationUtils.java:334)
at hex.glm.GLM$GLMDriver.fitIRLSM_multinomial(GLM.java:1593)
at hex.glm.GLM$GLMDriver.fitModel(GLM.java:1978)
at hex.glm.GLM$GLMDriver.computeSubmodel(GLM.java:2437)
at hex.glm.GLM$GLMDriver.doCompute(GLM.java:2573)
at hex.glm.GLM$GLMDriver.computeImpl(GLM.java:2471)
at hex.ModelBuilder$Driver.compute2(ModelBuilder.java:246)
at hex.glm.GLM$GLMDriver.compute2(GLM.java:1154)
at water.H2O$H2OCountedCompleter.compute(H2O.java:1637)
at jsr166y.CountedCompleter.exec(CountedCompleter.java:468)
at jsr166y.ForkJoinTask.doExec(ForkJoinTask.java:263)
at jsr166y.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:974)
at jsr166y.ForkJoinPool.runWorker(ForkJoinPool.java:1477)
at jsr166y.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:104)
{noformat}
The text was updated successfully, but these errors were encountered: