XGBoost error with running v1.4.0


Repro:
```
happiness_data_set = pd.read_csv("Happiness Data Full Set.csv")
y = happiness_data_set['Happiness']
X = happiness_data_set.drop(['Happiness'], axis=1)

X_train, X_holdout, y_train, y_holdout = evalml.preprocessing.split_data(X, y, problem_type='regression', test_size=0.2, random_seed=0)

automl_ = AutoMLSearch(X, y, problem_type="regression", error_callback=raise_error_callback, ensembling=True)
automl_.search()
```


```

---------------------------------------------------------------------------
XGBoostError                              Traceback (most recent call last)
<ipython-input-8-3a5d5a487c9c> in <module>
      1 automl_ = AutoMLSearch(X, y, problem_type="regression", error_callback=raise_error_callback, ensembling=True)
----> 2 automl_.search()

~/Desktop/evalml/evalml/automl/automl_search.py in search(self, show_iteration_plot)
    810                     computation = computations[current_computation_index]
    811                     if computation.done():
--> 812                         evaluation = computation.get_result()
    813                         data, pipeline, job_log = (
    814                             evaluation.get("scores"),

~/Desktop/evalml/evalml/automl/engine/sequential_engine.py in get_result(self)
     37         Raises Exception: If computation fails. Returns traceback.
     38         """
---> 39         return self.work(**self.kwargs)
     40 
     41     def cancel(self):

~/Desktop/evalml/evalml/automl/engine/engine_base.py in evaluate_pipeline(pipeline, automl_config, X, y, logger)
    308         full_X_train=X,
    309         full_y_train=y,
--> 310         logger=logger,
    311     )
    312 

~/Desktop/evalml/evalml/automl/engine/engine_base.py in train_and_score_pipeline(pipeline, automl_config, full_X_train, full_y_train, logger)
    227                     automl=automl_config,
    228                     fold_num=i,
--> 229                     pipeline=pipeline,
    230                 )
    231             if isinstance(e, PipelineScoreError):

~/Desktop/evalml/evalml/automl/callbacks.py in raise_error_callback(exception, traceback, automl, **kwargs)
     13     logger.error(f"AutoML search raised a fatal exception: {str(exception)}")
     14     logger.error("\n".join(traceback))
---> 15     raise exception
     16 
     17 

~/Desktop/evalml/evalml/automl/engine/engine_base.py in train_and_score_pipeline(pipeline, automl_config, full_X_train, full_y_train, logger)
    215                 )
    216             logger.debug(f"\t\t\tFold {i}: Scoring trained pipeline")
--> 217             scores = cv_pipeline.score(X_valid, y_valid, objectives=objectives_to_score)
    218             logger.debug(
    219                 f"\t\t\tFold {i}: {automl_config.objective.name} score: {scores[automl_config.objective.name]:.3f}"

~/Desktop/evalml/evalml/pipelines/regression_pipeline.py in score(self, X, y, objectives)
     51         """
     52         objectives = self.create_objectives(objectives)
---> 53         y_predicted = self.predict(X)
     54         return self._score_all_objectives(
     55             X, y, y_predicted, y_pred_proba=None, objectives=objectives

~/Desktop/evalml/evalml/pipelines/pipeline_meta.py in _check_for_fit(self, X, objective)
     24                 return method(self, X)
     25             elif method.__name__ == "predict":
---> 26                 return method(self, X, objective)
     27             elif method.__name__ == "inverse_transform":
     28                 return method(self, X)

~/Desktop/evalml/evalml/pipelines/pipeline_base.py in predict(self, X, objective)
    259         """
    260         X = infer_feature_types(X)
--> 261         predictions = self.component_graph.predict(X)
    262         predictions.name = self.input_target_name
    263         return infer_feature_types(predictions)

~/Desktop/evalml/evalml/pipelines/component_graph.py in predict(self, X)
    287             return infer_feature_types(X)
    288         final_component = self.compute_order[-1]
--> 289         outputs = self._compute_features(self.compute_order, X)
    290         return infer_feature_types(
    291             outputs.get(final_component, outputs.get(f"{final_component}.x"))

~/Desktop/evalml/evalml/pipelines/component_graph.py in _compute_features(self, component_list, X, y, fit)
    358                     fit and component_name == self.compute_order[-1]
    359                 ):  # Don't call predict on the final component during fit
--> 360                     output = component_instance.predict(input_x)
    361                 else:
    362                     output = None

~/Desktop/evalml/evalml/pipelines/components/component_base_meta.py in _check_for_fit(self, X, y)
     27                 return method(self)
     28             elif y is None:
---> 29                 return method(self, X)
     30             else:
     31                 return method(self, X, y)

~/Desktop/evalml/evalml/pipelines/components/estimators/regressors/xgboost_regressor.py in predict(self, X)
     77     def predict(self, X):
     78         X = _rename_column_names_to_numeric(X, flatten_tuples=False)
---> 79         return super().predict(X)
     80 
     81     @property

~/Desktop/evalml/evalml/pipelines/components/component_base_meta.py in _check_for_fit(self, X, y)
     27                 return method(self)
     28             elif y is None:
---> 29                 return method(self, X)
     30             else:
     31                 return method(self, X, y)

~/Desktop/evalml/evalml/pipelines/components/estimators/estimator.py in predict(self, X)
     72             if isinstance(X.columns, range.RangeIndex):
     73                 X.columns = [x for x in X.columns]
---> 74             predictions = self._component_obj.predict(X)
     75         except AttributeError:
     76             raise MethodPropertyNotFoundError(

~/Desktop/evalml_venv/lib/python3.7/site-packages/xgboost/sklearn.py in predict(self, X, output_margin, ntree_limit, validate_features, base_margin, iteration_range)
    824                     missing=self.missing,
    825                     base_margin=base_margin,
--> 826                     validate_features=validate_features,
    827                 )
    828                 if _is_cupy_array(predts):

~/Desktop/evalml_venv/lib/python3.7/site-packages/xgboost/core.py in inplace_predict(self, data, iteration_range, predict_type, missing, validate_features, base_margin, strict_shape)
   1852                     ctypes.byref(shape),
   1853                     ctypes.byref(dims),
-> 1854                     ctypes.byref(preds),
   1855                 )
   1856             )

~/Desktop/evalml_venv/lib/python3.7/site-packages/xgboost/core.py in _check_call(ret)
    208     """
    209     if ret != 0:
--> 210         raise XGBoostError(py_str(_LIB.XGBGetLastError()))
    211 
    212 

XGBoostError: [18:52:41] /Users/travis/build/dmlc/xgboost/src/c_api/../data/array_interface.h:139: Check failed: typestr.size() == 3 (2 vs. 3) : `typestr' should be of format <endian><type><size of type in bytes>.
Stack trace:
  [bt] (0) 1   libxgboost.dylib                    0x0000000142f90064 dmlc::LogMessageFatal::~LogMessageFatal() + 116
  [bt] (1) 2   libxgboost.dylib                    0x0000000142f9527f xgboost::ArrayInterfaceHandler::Validate(std::__1::map<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, xgboost::Json, std::__1::less<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::allocator<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const, xgboost::Json> > > const&) + 1023
  [bt] (2) 3   libxgboost.dylib                    0x0000000142f94822 xgboost::ArrayInterface::Initialize(std::__1::map<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, xgboost::Json, std::__1::less<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::allocator<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const, xgboost::Json> > > const&, bool) + 34
  [bt] (3) 4   libxgboost.dylib                    0x0000000142f9aaf3 xgboost::data::ArrayAdapter::ArrayAdapter(xgboost::StringView) + 147
  [bt] (4) 5   libxgboost.dylib                    0x0000000142f89419 XGBoosterPredictFromDense + 153
  [bt] (5) 6   _ctypes.cpython-37m-darwin.so       0x0000000108b78e77 ffi_call_unix64 + 79

```

It's frustrating that our tests didn't catch this during the upgrade though, so I think it is also worth adding tests so we can catch this later on.

I dug into this more and it was the `Happiness Category`, a Categorical col, that raised this issue (dropping the col made it go away). Will try to make a smaller repro in test.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

XGBoost error with running v1.4.0 #2497

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

XGBoost error with running v1.4.0 #2497

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions