Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Have automl search raise config errors in init instead of search #933

Merged
merged 11 commits into from Jul 16, 2020
1 change: 1 addition & 0 deletions docs/source/changelog.rst
Expand Up @@ -16,6 +16,7 @@ Changelog
* Rename `master` branch to `main` :pr:`918`
* Add pypi release github action :pr:`923`
* Updated AutoMLSearch.search stdout output and logging and removed tqdm progress bar :pr:`921`
* Moved automl config checks previously in `search()` to init :pr:`933`
* Documentation Changes
* Testing Changes
* Cleaned up fixture names and usages in tests :pr:`895`
Expand Down
16 changes: 5 additions & 11 deletions evalml/automl/automl_search.py
Expand Up @@ -193,6 +193,8 @@ def __init__(self,
self._automl_algorithm = None
self._start = None

self._validate_problem_type()
angela97lin marked this conversation as resolved.
Show resolved Hide resolved

@property
def data_check_results(self):
return self._data_check_results
Expand Down Expand Up @@ -339,7 +341,6 @@ def search(self, X, y, data_checks="auto", feature_types=None, raise_errors=True
logger.debug(f"allowed_pipelines set to {[pipeline.name for pipeline in self.allowed_pipelines]}")
logger.debug(f"allowed_model_families set to {self.allowed_model_families}")

self._validate_problem_type()
self._automl_algorithm = IterativeAlgorithm(
max_pipelines=self.max_pipelines,
allowed_pipelines=self.allowed_pipelines,
Expand Down Expand Up @@ -445,7 +446,7 @@ def _validate_problem_type(self):
if obj.problem_type != self.problem_type:
raise ValueError("Additional objective {} is not compatible with a {} problem.".format(obj.name, self.problem_type.value))

for pipeline in self.allowed_pipelines:
for pipeline in self.allowed_pipelines or []:
angela97lin marked this conversation as resolved.
Show resolved Hide resolved
if not pipeline.problem_type == self.problem_type:
raise ValueError("Given pipeline {} is not compatible with problem_type {}.".format(pipeline.name, self.problem_type.value))

Expand Down Expand Up @@ -483,15 +484,8 @@ def _compute_cv_scores(self, pipeline, X, y, raise_errors=True):
logger.info("\tStarting cross validation")
for i, (train, test) in enumerate(self.data_split.split(X, y)):
logger.debug(f"\t\tTraining and scoring on fold {i}")
if isinstance(X, pd.DataFrame):
angela97lin marked this conversation as resolved.
Show resolved Hide resolved
X_train, X_test = X.iloc[train], X.iloc[test]
else:
X_train, X_test = X[train], X[test]
if isinstance(y, pd.Series):
y_train, y_test = y.iloc[train], y.iloc[test]
else:
y_train, y_test = y[train], y[test]

X_train, X_test = X.iloc[train], X.iloc[test]
y_train, y_test = y.iloc[train], y.iloc[test]
objectives_to_score = [self.objective] + self.additional_objectives
try:
X_threshold_tuning = None
Expand Down
25 changes: 8 additions & 17 deletions evalml/tests/automl_tests/test_automl.py
Expand Up @@ -571,14 +571,15 @@ def test_large_dataset_regression(mock_score):
assert automl.results['pipeline_results'][pipeline_id]['cv_data'][0]['score'] == 1.234


def test_allowed_pipelines_with_incorrect_problem_type(X_y_regression, dummy_binary_pipeline_class):
X, y = X_y_regression
auto = AutoMLSearch(problem_type='regression', allowed_pipelines=[dummy_binary_pipeline_class])
def test_allowed_pipelines_with_incorrect_problem_type(dummy_binary_pipeline_class):
# checks that not setting allowed_pipelines does not error out
AutoMLSearch(problem_type='binary')

with pytest.raises(ValueError, match="is not compatible with problem_type"):
auto.search(X, y)
AutoMLSearch(problem_type='regression', allowed_pipelines=[dummy_binary_pipeline_class])


def test_obj_matches_problem_type():
def test_main_objective_problem_type_mismatch():
with pytest.raises(ValueError, match="is not compatible with a"):
AutoMLSearch(problem_type='binary', objective='R2')

Expand Down Expand Up @@ -609,20 +610,10 @@ def test_checks_at_search_time(mock_search, dummy_regression_pipeline_class, X_y
with pytest.raises(ValueError, match=error_text):
error_automl.search(X, y)

error_text = "in search, problem_type mismatches allowed_pipelines."
mock_search.side_effect = ValueError(error_text)

allowed_pipelines = [dummy_regression_pipeline_class]
error_automl = AutoMLSearch(problem_type='binary', allowed_pipelines=allowed_pipelines)
with pytest.raises(ValueError, match=error_text):
error_automl.search(X, y)


def test_objective_at_search_time(X_y_multi):
X, y = X_y_multi
error_automl = AutoMLSearch(problem_type='multiclass', additional_objectives=['Precision', 'AUC'],)
def test_incompatible_additional_objectives():
with pytest.raises(ValueError, match="is not compatible with a "):
error_automl.search(X, y)
Comment on lines -621 to -625
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reworked this test a little via rename and updating to check during initialization.

AutoMLSearch(problem_type='multiclass', additional_objectives=['Precision', 'AUC'])


def test_default_objective():
Expand Down