-
Notifications
You must be signed in to change notification settings - Fork 92
Closed
Description
Repro:
happiness_data_set = pd.read_csv("Happiness Data Full Set.csv")
y = happiness_data_set['Happiness']
X = happiness_data_set.drop(['Happiness'], axis=1)
X_train, X_holdout, y_train, y_holdout = evalml.preprocessing.split_data(X, y, problem_type='regression', test_size=0.2, random_seed=0)
automl_ = AutoMLSearch(X, y, problem_type="regression", error_callback=raise_error_callback, ensembling=True)
automl_.search()
---------------------------------------------------------------------------
XGBoostError Traceback (most recent call last)
<ipython-input-8-3a5d5a487c9c> in <module>
1 automl_ = AutoMLSearch(X, y, problem_type="regression", error_callback=raise_error_callback, ensembling=True)
----> 2 automl_.search()
~/Desktop/evalml/evalml/automl/automl_search.py in search(self, show_iteration_plot)
810 computation = computations[current_computation_index]
811 if computation.done():
--> 812 evaluation = computation.get_result()
813 data, pipeline, job_log = (
814 evaluation.get("scores"),
~/Desktop/evalml/evalml/automl/engine/sequential_engine.py in get_result(self)
37 Raises Exception: If computation fails. Returns traceback.
38 """
---> 39 return self.work(**self.kwargs)
40
41 def cancel(self):
~/Desktop/evalml/evalml/automl/engine/engine_base.py in evaluate_pipeline(pipeline, automl_config, X, y, logger)
308 full_X_train=X,
309 full_y_train=y,
--> 310 logger=logger,
311 )
312
~/Desktop/evalml/evalml/automl/engine/engine_base.py in train_and_score_pipeline(pipeline, automl_config, full_X_train, full_y_train, logger)
227 automl=automl_config,
228 fold_num=i,
--> 229 pipeline=pipeline,
230 )
231 if isinstance(e, PipelineScoreError):
~/Desktop/evalml/evalml/automl/callbacks.py in raise_error_callback(exception, traceback, automl, **kwargs)
13 logger.error(f"AutoML search raised a fatal exception: {str(exception)}")
14 logger.error("\n".join(traceback))
---> 15 raise exception
16
17
~/Desktop/evalml/evalml/automl/engine/engine_base.py in train_and_score_pipeline(pipeline, automl_config, full_X_train, full_y_train, logger)
215 )
216 logger.debug(f"\t\t\tFold {i}: Scoring trained pipeline")
--> 217 scores = cv_pipeline.score(X_valid, y_valid, objectives=objectives_to_score)
218 logger.debug(
219 f"\t\t\tFold {i}: {automl_config.objective.name} score: {scores[automl_config.objective.name]:.3f}"
~/Desktop/evalml/evalml/pipelines/regression_pipeline.py in score(self, X, y, objectives)
51 """
52 objectives = self.create_objectives(objectives)
---> 53 y_predicted = self.predict(X)
54 return self._score_all_objectives(
55 X, y, y_predicted, y_pred_proba=None, objectives=objectives
~/Desktop/evalml/evalml/pipelines/pipeline_meta.py in _check_for_fit(self, X, objective)
24 return method(self, X)
25 elif method.__name__ == "predict":
---> 26 return method(self, X, objective)
27 elif method.__name__ == "inverse_transform":
28 return method(self, X)
~/Desktop/evalml/evalml/pipelines/pipeline_base.py in predict(self, X, objective)
259 """
260 X = infer_feature_types(X)
--> 261 predictions = self.component_graph.predict(X)
262 predictions.name = self.input_target_name
263 return infer_feature_types(predictions)
~/Desktop/evalml/evalml/pipelines/component_graph.py in predict(self, X)
287 return infer_feature_types(X)
288 final_component = self.compute_order[-1]
--> 289 outputs = self._compute_features(self.compute_order, X)
290 return infer_feature_types(
291 outputs.get(final_component, outputs.get(f"{final_component}.x"))
~/Desktop/evalml/evalml/pipelines/component_graph.py in _compute_features(self, component_list, X, y, fit)
358 fit and component_name == self.compute_order[-1]
359 ): # Don't call predict on the final component during fit
--> 360 output = component_instance.predict(input_x)
361 else:
362 output = None
~/Desktop/evalml/evalml/pipelines/components/component_base_meta.py in _check_for_fit(self, X, y)
27 return method(self)
28 elif y is None:
---> 29 return method(self, X)
30 else:
31 return method(self, X, y)
~/Desktop/evalml/evalml/pipelines/components/estimators/regressors/xgboost_regressor.py in predict(self, X)
77 def predict(self, X):
78 X = _rename_column_names_to_numeric(X, flatten_tuples=False)
---> 79 return super().predict(X)
80
81 @property
~/Desktop/evalml/evalml/pipelines/components/component_base_meta.py in _check_for_fit(self, X, y)
27 return method(self)
28 elif y is None:
---> 29 return method(self, X)
30 else:
31 return method(self, X, y)
~/Desktop/evalml/evalml/pipelines/components/estimators/estimator.py in predict(self, X)
72 if isinstance(X.columns, range.RangeIndex):
73 X.columns = [x for x in X.columns]
---> 74 predictions = self._component_obj.predict(X)
75 except AttributeError:
76 raise MethodPropertyNotFoundError(
~/Desktop/evalml_venv/lib/python3.7/site-packages/xgboost/sklearn.py in predict(self, X, output_margin, ntree_limit, validate_features, base_margin, iteration_range)
824 missing=self.missing,
825 base_margin=base_margin,
--> 826 validate_features=validate_features,
827 )
828 if _is_cupy_array(predts):
~/Desktop/evalml_venv/lib/python3.7/site-packages/xgboost/core.py in inplace_predict(self, data, iteration_range, predict_type, missing, validate_features, base_margin, strict_shape)
1852 ctypes.byref(shape),
1853 ctypes.byref(dims),
-> 1854 ctypes.byref(preds),
1855 )
1856 )
~/Desktop/evalml_venv/lib/python3.7/site-packages/xgboost/core.py in _check_call(ret)
208 """
209 if ret != 0:
--> 210 raise XGBoostError(py_str(_LIB.XGBGetLastError()))
211
212
XGBoostError: [18:52:41] /Users/travis/build/dmlc/xgboost/src/c_api/../data/array_interface.h:139: Check failed: typestr.size() == 3 (2 vs. 3) : `typestr' should be of format <endian><type><size of type in bytes>.
Stack trace:
[bt] (0) 1 libxgboost.dylib 0x0000000142f90064 dmlc::LogMessageFatal::~LogMessageFatal() + 116
[bt] (1) 2 libxgboost.dylib 0x0000000142f9527f xgboost::ArrayInterfaceHandler::Validate(std::__1::map<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, xgboost::Json, std::__1::less<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::allocator<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const, xgboost::Json> > > const&) + 1023
[bt] (2) 3 libxgboost.dylib 0x0000000142f94822 xgboost::ArrayInterface::Initialize(std::__1::map<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, xgboost::Json, std::__1::less<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::allocator<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const, xgboost::Json> > > const&, bool) + 34
[bt] (3) 4 libxgboost.dylib 0x0000000142f9aaf3 xgboost::data::ArrayAdapter::ArrayAdapter(xgboost::StringView) + 147
[bt] (4) 5 libxgboost.dylib 0x0000000142f89419 XGBoosterPredictFromDense + 153
[bt] (5) 6 _ctypes.cpython-37m-darwin.so 0x0000000108b78e77 ffi_call_unix64 + 79
It's frustrating that our tests didn't catch this during the upgrade though, so I think it is also worth adding tests so we can catch this later on.
I dug into this more and it was the Happiness Category, a Categorical col, that raised this issue (dropping the col made it go away). Will try to make a smaller repro in test.
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels