Skip to content

XGBoost error with running v1.4.0 #2497

@angela97lin

Description

@angela97lin

Repro:

happiness_data_set = pd.read_csv("Happiness Data Full Set.csv")
y = happiness_data_set['Happiness']
X = happiness_data_set.drop(['Happiness'], axis=1)

X_train, X_holdout, y_train, y_holdout = evalml.preprocessing.split_data(X, y, problem_type='regression', test_size=0.2, random_seed=0)

automl_ = AutoMLSearch(X, y, problem_type="regression", error_callback=raise_error_callback, ensembling=True)
automl_.search()

---------------------------------------------------------------------------
XGBoostError                              Traceback (most recent call last)
<ipython-input-8-3a5d5a487c9c> in <module>
      1 automl_ = AutoMLSearch(X, y, problem_type="regression", error_callback=raise_error_callback, ensembling=True)
----> 2 automl_.search()

~/Desktop/evalml/evalml/automl/automl_search.py in search(self, show_iteration_plot)
    810                     computation = computations[current_computation_index]
    811                     if computation.done():
--> 812                         evaluation = computation.get_result()
    813                         data, pipeline, job_log = (
    814                             evaluation.get("scores"),

~/Desktop/evalml/evalml/automl/engine/sequential_engine.py in get_result(self)
     37         Raises Exception: If computation fails. Returns traceback.
     38         """
---> 39         return self.work(**self.kwargs)
     40 
     41     def cancel(self):

~/Desktop/evalml/evalml/automl/engine/engine_base.py in evaluate_pipeline(pipeline, automl_config, X, y, logger)
    308         full_X_train=X,
    309         full_y_train=y,
--> 310         logger=logger,
    311     )
    312 

~/Desktop/evalml/evalml/automl/engine/engine_base.py in train_and_score_pipeline(pipeline, automl_config, full_X_train, full_y_train, logger)
    227                     automl=automl_config,
    228                     fold_num=i,
--> 229                     pipeline=pipeline,
    230                 )
    231             if isinstance(e, PipelineScoreError):

~/Desktop/evalml/evalml/automl/callbacks.py in raise_error_callback(exception, traceback, automl, **kwargs)
     13     logger.error(f"AutoML search raised a fatal exception: {str(exception)}")
     14     logger.error("\n".join(traceback))
---> 15     raise exception
     16 
     17 

~/Desktop/evalml/evalml/automl/engine/engine_base.py in train_and_score_pipeline(pipeline, automl_config, full_X_train, full_y_train, logger)
    215                 )
    216             logger.debug(f"\t\t\tFold {i}: Scoring trained pipeline")
--> 217             scores = cv_pipeline.score(X_valid, y_valid, objectives=objectives_to_score)
    218             logger.debug(
    219                 f"\t\t\tFold {i}: {automl_config.objective.name} score: {scores[automl_config.objective.name]:.3f}"

~/Desktop/evalml/evalml/pipelines/regression_pipeline.py in score(self, X, y, objectives)
     51         """
     52         objectives = self.create_objectives(objectives)
---> 53         y_predicted = self.predict(X)
     54         return self._score_all_objectives(
     55             X, y, y_predicted, y_pred_proba=None, objectives=objectives

~/Desktop/evalml/evalml/pipelines/pipeline_meta.py in _check_for_fit(self, X, objective)
     24                 return method(self, X)
     25             elif method.__name__ == "predict":
---> 26                 return method(self, X, objective)
     27             elif method.__name__ == "inverse_transform":
     28                 return method(self, X)

~/Desktop/evalml/evalml/pipelines/pipeline_base.py in predict(self, X, objective)
    259         """
    260         X = infer_feature_types(X)
--> 261         predictions = self.component_graph.predict(X)
    262         predictions.name = self.input_target_name
    263         return infer_feature_types(predictions)

~/Desktop/evalml/evalml/pipelines/component_graph.py in predict(self, X)
    287             return infer_feature_types(X)
    288         final_component = self.compute_order[-1]
--> 289         outputs = self._compute_features(self.compute_order, X)
    290         return infer_feature_types(
    291             outputs.get(final_component, outputs.get(f"{final_component}.x"))

~/Desktop/evalml/evalml/pipelines/component_graph.py in _compute_features(self, component_list, X, y, fit)
    358                     fit and component_name == self.compute_order[-1]
    359                 ):  # Don't call predict on the final component during fit
--> 360                     output = component_instance.predict(input_x)
    361                 else:
    362                     output = None

~/Desktop/evalml/evalml/pipelines/components/component_base_meta.py in _check_for_fit(self, X, y)
     27                 return method(self)
     28             elif y is None:
---> 29                 return method(self, X)
     30             else:
     31                 return method(self, X, y)

~/Desktop/evalml/evalml/pipelines/components/estimators/regressors/xgboost_regressor.py in predict(self, X)
     77     def predict(self, X):
     78         X = _rename_column_names_to_numeric(X, flatten_tuples=False)
---> 79         return super().predict(X)
     80 
     81     @property

~/Desktop/evalml/evalml/pipelines/components/component_base_meta.py in _check_for_fit(self, X, y)
     27                 return method(self)
     28             elif y is None:
---> 29                 return method(self, X)
     30             else:
     31                 return method(self, X, y)

~/Desktop/evalml/evalml/pipelines/components/estimators/estimator.py in predict(self, X)
     72             if isinstance(X.columns, range.RangeIndex):
     73                 X.columns = [x for x in X.columns]
---> 74             predictions = self._component_obj.predict(X)
     75         except AttributeError:
     76             raise MethodPropertyNotFoundError(

~/Desktop/evalml_venv/lib/python3.7/site-packages/xgboost/sklearn.py in predict(self, X, output_margin, ntree_limit, validate_features, base_margin, iteration_range)
    824                     missing=self.missing,
    825                     base_margin=base_margin,
--> 826                     validate_features=validate_features,
    827                 )
    828                 if _is_cupy_array(predts):

~/Desktop/evalml_venv/lib/python3.7/site-packages/xgboost/core.py in inplace_predict(self, data, iteration_range, predict_type, missing, validate_features, base_margin, strict_shape)
   1852                     ctypes.byref(shape),
   1853                     ctypes.byref(dims),
-> 1854                     ctypes.byref(preds),
   1855                 )
   1856             )

~/Desktop/evalml_venv/lib/python3.7/site-packages/xgboost/core.py in _check_call(ret)
    208     """
    209     if ret != 0:
--> 210         raise XGBoostError(py_str(_LIB.XGBGetLastError()))
    211 
    212 

XGBoostError: [18:52:41] /Users/travis/build/dmlc/xgboost/src/c_api/../data/array_interface.h:139: Check failed: typestr.size() == 3 (2 vs. 3) : `typestr' should be of format <endian><type><size of type in bytes>.
Stack trace:
  [bt] (0) 1   libxgboost.dylib                    0x0000000142f90064 dmlc::LogMessageFatal::~LogMessageFatal() + 116
  [bt] (1) 2   libxgboost.dylib                    0x0000000142f9527f xgboost::ArrayInterfaceHandler::Validate(std::__1::map<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, xgboost::Json, std::__1::less<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::allocator<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const, xgboost::Json> > > const&) + 1023
  [bt] (2) 3   libxgboost.dylib                    0x0000000142f94822 xgboost::ArrayInterface::Initialize(std::__1::map<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, xgboost::Json, std::__1::less<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::allocator<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const, xgboost::Json> > > const&, bool) + 34
  [bt] (3) 4   libxgboost.dylib                    0x0000000142f9aaf3 xgboost::data::ArrayAdapter::ArrayAdapter(xgboost::StringView) + 147
  [bt] (4) 5   libxgboost.dylib                    0x0000000142f89419 XGBoosterPredictFromDense + 153
  [bt] (5) 6   _ctypes.cpython-37m-darwin.so       0x0000000108b78e77 ffi_call_unix64 + 79

It's frustrating that our tests didn't catch this during the upgrade though, so I think it is also worth adding tests so we can catch this later on.

I dug into this more and it was the Happiness Category, a Categorical col, that raised this issue (dropping the col made it go away). Will try to make a smaller repro in test.

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions