Skip to content

Commit

Permalink
Remove extra naive pipelines (#4142)
Browse files Browse the repository at this point in the history
* Random Forest - Two Naive

* Elastic Net - Two Naive

* XGBoost - Two Naive

* 1 Naive RF - XGB Feat Select

* 1 Naive RF - EN Feat Select

* Revert feature selector changes

* Updated tests

* Set timeseries example to iterative algo

* Lint fix

* Lint notebook
  • Loading branch information
christopherbunn committed Apr 18, 2023
1 parent 134630a commit 21eb2ff
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 25 deletions.
1 change: 1 addition & 0 deletions docs/source/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ Release Notes
**Future Releases**
* Enhancements
* Saved computed additional_objectives computed during search to AutoML object :pr:`4141`
* Remove extra naive pipelines :pr:`4142`
* Fixes
* Fixed usage of codecov after uploader deprecation :pr:`4144`
* Changes
Expand Down
1 change: 1 addition & 0 deletions docs/source/user_guide/timeseries.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,7 @@
" problem_type=\"time series regression\",\n",
" max_batches=1,\n",
" problem_configuration=problem_config,\n",
" automl_algorithm=\"iterative\",\n",
" allowed_model_families=[\n",
" \"xgboost\",\n",
" \"random_forest\",\n",
Expand Down
2 changes: 0 additions & 2 deletions evalml/automl/automl_algorithm/default_algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,12 +180,10 @@ def num_pipelines_per_batch(self, batch_number):
def _naive_estimators(self):
if is_regression(self.problem_type):
naive_estimators = [
"Elastic Net Regressor",
"Random Forest Regressor",
]
else:
naive_estimators = [
"Logistic Regression Classifier",
"Random Forest Classifier",
]
estimators = [
Expand Down
32 changes: 16 additions & 16 deletions evalml/tests/automl_tests/test_automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,9 +261,9 @@ def test_search_batch_times(caplog, X_y_binary, AutoMLTestEnv):
assert isinstance(batch_times[3]["Total time of batch"], str)

assert len(batch_times) == 3
assert len(batch_times[1]) == 3
assert len(batch_times[2]) == 3
assert len(batch_times[3]) == 7
assert len(batch_times[1]) == 2
assert len(batch_times[2]) == 2
assert len(batch_times[3]) == 8

assert "Batch Time Stats" in out
assert "Batch 1 time stats" in out
Expand Down Expand Up @@ -1781,14 +1781,14 @@ def test_pipelines_in_batch_return_nan(
):
X, y = X_y_binary
mock_rankings.side_effect = [
make_mock_rankings([0, 0, 0]), # first batch
make_mock_rankings([0, 0, 0, 0, np.nan]), # second batch
make_mock_rankings([0, 0, 0, 0, np.nan, np.nan, np.nan]),
make_mock_rankings([0, 0]), # first batch
make_mock_rankings([0, 0, 0, np.nan]), # second batch
make_mock_rankings([0, 0, 0, np.nan, np.nan, np.nan]),
] # third batch, should raise error
mock_full_rankings.side_effect = [
make_mock_rankings([0, 0, 0]), # first batch
make_mock_rankings([0, 0, 0, 0, np.nan]), # second batch
make_mock_rankings([0, 0, 0, 0, np.nan, np.nan, np.nan]),
make_mock_rankings([0, 0]), # first batch
make_mock_rankings([0, 0, 0, np.nan]), # second batch
make_mock_rankings([0, 0, 0, np.nan, np.nan, np.nan]),
] # third batch, should raise error
mock_next_batch.side_effect = [
[
Expand Down Expand Up @@ -1834,14 +1834,14 @@ def test_pipelines_in_batch_return_none(
):
X, y = X_y_binary
mock_rankings.side_effect = [
make_mock_rankings([0, 0, 0]), # first batch
make_mock_rankings([0, 0, 0, 0, None]), # second batch
make_mock_rankings([0, 0, 0, 0, None, None, None]),
make_mock_rankings([0, 0]), # first batch
make_mock_rankings([0, 0, 0, None]), # second batch
make_mock_rankings([0, 0, 0, None, None, None]),
] # third batch, should raise error
mock_full_rankings.side_effect = [
make_mock_rankings([0, 0, 0]), # first batch
make_mock_rankings([0, 0, 0, 0, None]), # second batch
make_mock_rankings([0, 0, 0, 0, None, None, None]),
make_mock_rankings([0, 0]), # first batch
make_mock_rankings([0, 0, 0, None]), # second batch
make_mock_rankings([0, 0, 0, None, None, None]),
] # third batch, should raise error
mock_next_batch.side_effect = [
[
Expand Down Expand Up @@ -2588,7 +2588,7 @@ def test_max_batches_plays_nice_with_other_stopping_criteria(AutoMLTestEnv, X_y_
with env.test_context(score_return_value={"Log Loss Binary": 0.3}):
automl.search()

assert len(automl.results["pipeline_results"]) == 3
assert len(automl.results["pipeline_results"]) == 2


@pytest.mark.parametrize("max_batches", [-1, -10, -np.inf])
Expand Down
14 changes: 7 additions & 7 deletions evalml/tests/automl_tests/test_default_algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,15 +140,15 @@ def test_default_algorithm(
problem_type = automl_type
sampler_name = None
algo = DefaultAlgorithm(X, y, problem_type, sampler_name, ensembling=True)
naive_model_families = set([ModelFamily.LINEAR_MODEL, ModelFamily.RANDOM_FOREST])
naive_model_families = set([ModelFamily.RANDOM_FOREST])

first_batch = algo.next_batch()
assert len(first_batch) == 2
assert len(first_batch) == 1
assert {p.model_family for p in first_batch} == naive_model_families
add_result(algo, first_batch)

second_batch = algo.next_batch()
assert len(second_batch) == 2
assert len(second_batch) == 1
assert {p.model_family for p in second_batch} == naive_model_families
for pipeline in second_batch:
assert pipeline.get_component(fs)
Expand Down Expand Up @@ -600,10 +600,10 @@ def test_default_algorithm_time_series(
sampler_name,
search_parameters=search_parameters,
)
naive_model_families = set([ModelFamily.LINEAR_MODEL, ModelFamily.RANDOM_FOREST])
naive_model_families = set([ModelFamily.RANDOM_FOREST])

first_batch = algo.next_batch()
assert len(first_batch) == 2 if problem_type != "time series regression" else 4
assert len(first_batch) == 1 if problem_type != "time series regression" else 4
assert {p.model_family for p in first_batch} == naive_model_families
for pipeline in first_batch:
assert pipeline.parameters["pipeline"] == search_parameters["pipeline"]
Expand Down Expand Up @@ -680,10 +680,10 @@ def test_default_algorithm_time_series_known_in_advance(
sampler_name,
search_parameters=search_parameters,
)
naive_model_families = set([ModelFamily.LINEAR_MODEL, ModelFamily.RANDOM_FOREST])
naive_model_families = set([ModelFamily.RANDOM_FOREST])

first_batch = algo.next_batch()
assert len(first_batch) == 2
assert len(first_batch) == 1
assert {p.model_family for p in first_batch} == naive_model_families
for pipeline in first_batch:
assert (
Expand Down

0 comments on commit 21eb2ff

Please sign in to comment.