Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix broken properties when search has not been run #894

Merged
merged 11 commits into from
Jun 30, 2020
1 change: 1 addition & 0 deletions docs/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ Changelog
* Enforce requirement that builtin components save all inputted values in their parameters dict :pr:`847`
* Don't list base classes in `all_components` output :pr:`847`
* Standardize all components to output pandas datastructures, and accept either pandas or numpy :pr:`853`
* Fixed rankings and full_rankings error when search has not been run :pr:`894`
* Changes
* Update `all_pipelines` and `all_components` to try initializing pipelines/components, and on failure exclude them :pr:`849`
* Refactor `handle_components` to `handle_components_class`, standardize to `ComponentBase` subclass instead of instance :pr:`850`
Expand Down
16 changes: 14 additions & 2 deletions evalml/automl/automl_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,10 +217,10 @@ def _get_funct_name(function):
f"Optimize Thresholds: {self.optimize_thresholds}\n"
)

try:
if not self.rankings.empty:
rankings_str = self.rankings.drop(['parameters'], axis='columns').to_string()
rankings_desc = f"\nSearch Results: \n{'='*20}\n{rankings_str}"
except KeyError:
else:
rankings_desc = ""
jeremyliweishih marked this conversation as resolved.
Show resolved Hide resolved

return search_desc + rankings_desc
Expand Down Expand Up @@ -630,6 +630,11 @@ def add_to_rankings(self, pipeline, X, y):
return
self._evaluate(pipeline, X, y, raise_errors=True)

@property
jeremyliweishih marked this conversation as resolved.
Show resolved Hide resolved
def has_searched(self):
searched = True if self.results['pipeline_results'] else False
return searched

@property
def rankings(self):
"""Returns a pandas.DataFrame with scoring results from the highest-scoring set of parameters used with each pipeline."""
Expand All @@ -642,6 +647,9 @@ def full_rankings(self):
if self.objective.greater_is_better:
ascending = False

if not self.has_searched:
return pd.DataFrame(columns=["id", "pipeline_name", "score", "high_variance_cv", "parameters"])
jeremyliweishih marked this conversation as resolved.
Show resolved Hide resolved

rankings_df = pd.DataFrame(self.results['pipeline_results'].values())
rankings_df = rankings_df[["id", "pipeline_name", "score", "high_variance_cv", "parameters"]]
rankings_df.sort_values("score", ascending=ascending, inplace=True)
Expand All @@ -651,6 +659,10 @@ def full_rankings(self):
@property
def best_pipeline(self):
"""Returns the best model found"""

if not self.has_searched:
raise RuntimeError("automl search must be run before selecting `best_pipeline`.")
jeremyliweishih marked this conversation as resolved.
Show resolved Hide resolved

best = self.rankings.iloc[0]
return self.get_pipeline(best["id"])

Expand Down
47 changes: 47 additions & 0 deletions evalml/tests/automl_tests/test_automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,20 @@ def test_add_to_rankings(mock_fit, mock_score, dummy_binary_pipeline_class, X_y)
assert 0.1234 in automl.rankings['score'].values


@patch('evalml.pipelines.BinaryClassificationPipeline.score')
@patch('evalml.pipelines.BinaryClassificationPipeline.fit')
def test_add_to_rankings_no_search(mock_fit, mock_score, dummy_binary_pipeline_class, X_y):
X, y = X_y
automl = AutoMLSearch(problem_type='binary', max_pipelines=1, allowed_pipelines=[dummy_binary_pipeline_class])

mock_score.return_value = {'Log Loss Binary': 0.1234}
test_pipeline = dummy_binary_pipeline_class(parameters={})
automl.add_to_rankings(test_pipeline, X, y)

assert len(automl.rankings) == 1
jeremyliweishih marked this conversation as resolved.
Show resolved Hide resolved
assert 0.1234 in automl.rankings['score'].values


@patch('evalml.pipelines.BinaryClassificationPipeline.score')
@patch('evalml.pipelines.BinaryClassificationPipeline.fit')
def test_add_to_rankings_duplicate(mock_fit, mock_score, dummy_binary_pipeline_class, X_y):
Expand Down Expand Up @@ -536,3 +550,36 @@ class CoolBinaryClassificationPipeline(dummy_binary_pipeline_class):
automl.add_to_rankings(test_pipeline_trained, X, y)

assert list(automl.rankings['score'].values).count(0.1234) == 2


@patch('evalml.pipelines.BinaryClassificationPipeline.score')
@patch('evalml.pipelines.BinaryClassificationPipeline.fit')
def test_has_searched(mock_fit, mock_score, dummy_binary_pipeline_class, X_y):
X, y = X_y

automl = AutoMLSearch(problem_type='binary', max_pipelines=1)
assert automl.has_searched is False

automl.search(X, y)
assert automl.has_searched is True
jeremyliweishih marked this conversation as resolved.
Show resolved Hide resolved


def test_no_search():
automl = AutoMLSearch(problem_type='binary')
assert isinstance(automl.rankings, pd.DataFrame)
assert isinstance(automl.full_rankings, pd.DataFrame)

df_columns = ["id", "pipeline_name", "score", "high_variance_cv", "parameters"]
assert (automl.rankings.columns == df_columns).all()
assert (automl.full_rankings.columns == df_columns).all()

assert automl._data_check_results is None

with pytest.raises(RuntimeError):
jeremyliweishih marked this conversation as resolved.
Show resolved Hide resolved
automl.best_pipeline

with pytest.raises(RuntimeError):
automl.get_pipeline(0)

with pytest.raises(RuntimeError):
automl.describe_pipeline(0)