From 1ddfa6f39c36069d4c82d12bde1efd63be6bce8d Mon Sep 17 00:00:00 2001 From: bchen1116 Date: Wed, 20 Jan 2021 18:24:06 -0500 Subject: [PATCH 1/7] initial impl --- evalml/automl/automl_search.py | 7 +++---- evalml/tests/automl_tests/test_automl.py | 14 +++++++++++++- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/evalml/automl/automl_search.py b/evalml/automl/automl_search.py index e6cbf59759..33a5e44f1d 100644 --- a/evalml/automl/automl_search.py +++ b/evalml/automl/automl_search.py @@ -245,7 +245,7 @@ def __init__(self, 'errors': [] } self.random_state = get_random_state(random_state) - self.random_seed = get_random_seed(self.random_state) + self.random_seed = get_random_seed(random_state) self.n_jobs = n_jobs self.plot = None @@ -844,13 +844,12 @@ def _evaluate_pipelines(self, current_pipeline_batch, baseline=False, search_ite return current_batch_pipeline_scores - def get_pipeline(self, pipeline_id, random_state=0): + def get_pipeline(self, pipeline_id): """Given the ID of a pipeline training result, returns an untrained instance of the specified pipeline initialized with the parameters used to train that pipeline during automl search. Arguments: pipeline_id (int): pipeline to retrieve - random_state (int, np.random.RandomState): The random seed/state. Defaults to 0. Returns: PipelineBase: untrained pipeline instance associated with the provided ID @@ -862,7 +861,7 @@ def get_pipeline(self, pipeline_id, random_state=0): parameters = pipeline_results.get('parameters') if pipeline_class is None or parameters is None: raise PipelineNotFoundError("Pipeline class or parameters not found in automl results") - return pipeline_class(parameters, random_state=random_state) + return pipeline_class(parameters, random_state=self.random_seed) def describe_pipeline(self, pipeline_id, return_dict=False): """Describe a pipeline diff --git a/evalml/tests/automl_tests/test_automl.py b/evalml/tests/automl_tests/test_automl.py index b2570cf9cd..18ee105ea0 100644 --- a/evalml/tests/automl_tests/test_automl.py +++ b/evalml/tests/automl_tests/test_automl.py @@ -58,7 +58,8 @@ from evalml.tuners import NoParamsException, RandomSearchTuner from evalml.utils.gen_utils import ( check_random_state_equality, - get_random_state + get_random_state, + get_random_seed ) @@ -2180,3 +2181,14 @@ def test_automl_pipeline_params_kwargs(mock_fit, mock_score, X_y_multi): if 'Decision Tree Classifier' in row['parameters']: assert 0.1 < row['parameters']['Decision Tree Classifier']['ccp_alpha'] < 0.5 assert row['parameters']['Decision Tree Classifier']['max_depth'] == 2 + + +@pytest.mark.parametrize("random_state", [0, 1, 3, 9]) +@patch('evalml.pipelines.MulticlassClassificationPipeline.score') +@patch('evalml.pipelines.MulticlassClassificationPipeline.fit') +def test_automl_pipeline_random_state(mock_fit, mock_score, random_state, X_y_multi): + X, y = X_y_multi + automl = AutoMLSearch(X_train=X, y_train=y, problem_type='multiclass', random_state=random_state, n_jobs=1) + automl.search() + for i, row in automl.rankings.iterrows(): + print(automl.get_pipeline(row['id']).random_state) From 93a2911a985b0625b75fd5d2fb91ba43ac412003 Mon Sep 17 00:00:00 2001 From: bchen1116 Date: Thu, 21 Jan 2021 15:33:04 -0500 Subject: [PATCH 2/7] remove random_state param from get_pipelines --- docs/source/release_notes.rst | 1 + evalml/automl/automl_search.py | 4 ++-- evalml/tests/automl_tests/test_automl.py | 6 ++++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index 611583e23e..4b17e9ff33 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -33,6 +33,7 @@ Release Notes * Added ``positive_only`` objectives to ``non_core_objectives`` :pr:`1661` * Updated CatBoost estimators to return self in ``.fit()`` rather than the underlying model for consistency :pr:`1701` * Added ability to initialize pipeline parameters in ``AutoMLSearch`` constructor :pr:`1676` + * Remove ``random_state`` arg from ``get_pipelines`` in ``AutoMLSearch`` :pr:`` * Changes * Added labeling to ``graph_confusion_matrix`` :pr:`1632` * Rerunning search for ``AutoMLSearch`` results in a message thrown rather than failing the search, and removed ``has_searched`` property :pr:`1647` diff --git a/evalml/automl/automl_search.py b/evalml/automl/automl_search.py index 33a5e44f1d..2ec34d98a8 100644 --- a/evalml/automl/automl_search.py +++ b/evalml/automl/automl_search.py @@ -245,7 +245,7 @@ def __init__(self, 'errors': [] } self.random_state = get_random_state(random_state) - self.random_seed = get_random_seed(random_state) + self.random_seed = get_random_seed(self.random_state) self.n_jobs = n_jobs self.plot = None @@ -861,7 +861,7 @@ def get_pipeline(self, pipeline_id): parameters = pipeline_results.get('parameters') if pipeline_class is None or parameters is None: raise PipelineNotFoundError("Pipeline class or parameters not found in automl results") - return pipeline_class(parameters, random_state=self.random_seed) + return pipeline_class(parameters, random_state=self.random_state) def describe_pipeline(self, pipeline_id, return_dict=False): """Describe a pipeline diff --git a/evalml/tests/automl_tests/test_automl.py b/evalml/tests/automl_tests/test_automl.py index 18ee105ea0..031fcbd163 100644 --- a/evalml/tests/automl_tests/test_automl.py +++ b/evalml/tests/automl_tests/test_automl.py @@ -2183,12 +2183,14 @@ def test_automl_pipeline_params_kwargs(mock_fit, mock_score, X_y_multi): assert row['parameters']['Decision Tree Classifier']['max_depth'] == 2 -@pytest.mark.parametrize("random_state", [0, 1, 3, 9]) +@pytest.mark.parametrize("random_state", [0, 1, 9, np.random.RandomState(100)]) @patch('evalml.pipelines.MulticlassClassificationPipeline.score') @patch('evalml.pipelines.MulticlassClassificationPipeline.fit') def test_automl_pipeline_random_state(mock_fit, mock_score, random_state, X_y_multi): X, y = X_y_multi automl = AutoMLSearch(X_train=X, y_train=y, problem_type='multiclass', random_state=random_state, n_jobs=1) automl.search() + if isinstance(random_state, int): + random_state = automl.random_state for i, row in automl.rankings.iterrows(): - print(automl.get_pipeline(row['id']).random_state) + assert check_random_state_equality(automl.get_pipeline(row['id']).random_state, random_state) From 4839d273c1db7599b9f6f14d9fbd76a1e4c4c4c2 Mon Sep 17 00:00:00 2001 From: bchen1116 Date: Thu, 21 Jan 2021 15:35:47 -0500 Subject: [PATCH 3/7] update release notes --- docs/source/release_notes.rst | 2 +- evalml/tests/automl_tests/test_automl.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index 4b17e9ff33..15c0e65bfc 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -33,7 +33,7 @@ Release Notes * Added ``positive_only`` objectives to ``non_core_objectives`` :pr:`1661` * Updated CatBoost estimators to return self in ``.fit()`` rather than the underlying model for consistency :pr:`1701` * Added ability to initialize pipeline parameters in ``AutoMLSearch`` constructor :pr:`1676` - * Remove ``random_state`` arg from ``get_pipelines`` in ``AutoMLSearch`` :pr:`` + * Remove ``random_state`` arg from ``get_pipelines`` in ``AutoMLSearch`` :pr:`1719` * Changes * Added labeling to ``graph_confusion_matrix`` :pr:`1632` * Rerunning search for ``AutoMLSearch`` results in a message thrown rather than failing the search, and removed ``has_searched`` property :pr:`1647` diff --git a/evalml/tests/automl_tests/test_automl.py b/evalml/tests/automl_tests/test_automl.py index 031fcbd163..bda3287220 100644 --- a/evalml/tests/automl_tests/test_automl.py +++ b/evalml/tests/automl_tests/test_automl.py @@ -58,8 +58,7 @@ from evalml.tuners import NoParamsException, RandomSearchTuner from evalml.utils.gen_utils import ( check_random_state_equality, - get_random_state, - get_random_seed + get_random_state ) From 5747dcb22e53348f976011eef74865ffe9d9d497 Mon Sep 17 00:00:00 2001 From: bchen1116 Date: Fri, 22 Jan 2021 10:53:59 -0500 Subject: [PATCH 4/7] testing --- evalml/automl/automl_search.py | 8 ++++- evalml/tests/automl_tests/test_automl.py | 38 ++++++++++++++++++++---- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/evalml/automl/automl_search.py b/evalml/automl/automl_search.py index 2ec34d98a8..95894dd36c 100644 --- a/evalml/automl/automl_search.py +++ b/evalml/automl/automl_search.py @@ -244,8 +244,11 @@ def __init__(self, 'search_order': [], 'errors': [] } + self._random_isint = False + if isinstance(random_state, int): + self._random_isint = True self.random_state = get_random_state(random_state) - self.random_seed = get_random_seed(self.random_state) + self.random_seed = get_random_seed(random_state) self.n_jobs = n_jobs self.plot = None @@ -861,6 +864,9 @@ def get_pipeline(self, pipeline_id): parameters = pipeline_results.get('parameters') if pipeline_class is None or parameters is None: raise PipelineNotFoundError("Pipeline class or parameters not found in automl results") + if self._random_isint: + print(self.random_seed) + return pipeline_class(parameters, random_state=self.random_seed) return pipeline_class(parameters, random_state=self.random_state) def describe_pipeline(self, pipeline_id, return_dict=False): diff --git a/evalml/tests/automl_tests/test_automl.py b/evalml/tests/automl_tests/test_automl.py index bda3287220..c96fd1ca0d 100644 --- a/evalml/tests/automl_tests/test_automl.py +++ b/evalml/tests/automl_tests/test_automl.py @@ -49,7 +49,9 @@ BinaryClassificationPipeline, Estimator, MulticlassClassificationPipeline, - RegressionPipeline + RegressionPipeline, + PipelineBase, + ModeBaselineMulticlassPipeline ) from evalml.pipelines.components.utils import get_estimators from evalml.pipelines.utils import make_pipeline @@ -2185,11 +2187,37 @@ def test_automl_pipeline_params_kwargs(mock_fit, mock_score, X_y_multi): @pytest.mark.parametrize("random_state", [0, 1, 9, np.random.RandomState(100)]) @patch('evalml.pipelines.MulticlassClassificationPipeline.score') @patch('evalml.pipelines.MulticlassClassificationPipeline.fit') -def test_automl_pipeline_random_state(mock_fit, mock_score, random_state, X_y_multi): +# @patch.object(PipelineBase, '__init__') +@patch('evalml.pipelines.classification_pipeline.ClassificationPipeline.__init__', return_value=None) +def test_automl_pipeline_random_state(mock_init, mock_fit, mock_score, random_state, X_y_multi): X, y = X_y_multi + + class MulticlassPipeline(MulticlassClassificationPipeline): + component_graph = ['Imputer', 'Random Forest Classifier'] + # def side_effect(*args, **kwargs): + # print(side_effect.counter, args, kwargs) + # if side_effect.counter <= 10: + # side_effect.counter += 1 + # return ModeBaselineMulticlassPipeline({}) + # else: + # return MulticlassClassificationPipeline({}) + # side_effect.counter = 0 + # mock_init.side_effect = MulticlassPipeline({}) automl = AutoMLSearch(X_train=X, y_train=y, problem_type='multiclass', random_state=random_state, n_jobs=1) - automl.search() + # automl.search() + try: + automl.search() + except AttributeError: + print(mock_init.call_args_list) + try: + automl.search() + except AttributeError: + print(mock_init.call_args_list) if isinstance(random_state, int): - random_state = automl.random_state + random_state = get_random_state(random_state) for i, row in automl.rankings.iterrows(): - assert check_random_state_equality(automl.get_pipeline(row['id']).random_state, random_state) + if 'Base' not in list(row['parameters'].keys())[0]: + print(automl.get_pipeline(row['id']).random_state) + print("STATE", mock_init) + # print(mock_init.call_args) + # assert check_random_state_equality(automl.get_pipeline(row['id']).random_state, random_state) From 9ee26227e9096dc4375d8368074d3177155f8fc8 Mon Sep 17 00:00:00 2001 From: bchen1116 Date: Fri, 22 Jan 2021 12:23:05 -0500 Subject: [PATCH 5/7] on pause --- evalml/tests/automl_tests/test_automl.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/evalml/tests/automl_tests/test_automl.py b/evalml/tests/automl_tests/test_automl.py index c96fd1ca0d..2c1545d006 100644 --- a/evalml/tests/automl_tests/test_automl.py +++ b/evalml/tests/automl_tests/test_automl.py @@ -2187,7 +2187,6 @@ def test_automl_pipeline_params_kwargs(mock_fit, mock_score, X_y_multi): @pytest.mark.parametrize("random_state", [0, 1, 9, np.random.RandomState(100)]) @patch('evalml.pipelines.MulticlassClassificationPipeline.score') @patch('evalml.pipelines.MulticlassClassificationPipeline.fit') -# @patch.object(PipelineBase, '__init__') @patch('evalml.pipelines.classification_pipeline.ClassificationPipeline.__init__', return_value=None) def test_automl_pipeline_random_state(mock_init, mock_fit, mock_score, random_state, X_y_multi): X, y = X_y_multi @@ -2204,15 +2203,15 @@ class MulticlassPipeline(MulticlassClassificationPipeline): # side_effect.counter = 0 # mock_init.side_effect = MulticlassPipeline({}) automl = AutoMLSearch(X_train=X, y_train=y, problem_type='multiclass', random_state=random_state, n_jobs=1) - # automl.search() - try: - automl.search() - except AttributeError: - print(mock_init.call_args_list) - try: - automl.search() - except AttributeError: - print(mock_init.call_args_list) + automl.search() + # try: + # automl.search() + # except AttributeError: + # print(mock_init.call_args_list) + # try: + # automl.search() + # except AttributeError: + # print(mock_init.call_args_list) if isinstance(random_state, int): random_state = get_random_state(random_state) for i, row in automl.rankings.iterrows(): From 5d63a5b4e0e04ab23aa080c02dbcbf4ab91eb058 Mon Sep 17 00:00:00 2001 From: bchen1116 Date: Tue, 26 Jan 2021 14:01:24 -0500 Subject: [PATCH 6/7] remove unneeded code --- evalml/tests/automl_tests/test_automl.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/evalml/tests/automl_tests/test_automl.py b/evalml/tests/automl_tests/test_automl.py index 3610456266..c98cc4d91c 100644 --- a/evalml/tests/automl_tests/test_automl.py +++ b/evalml/tests/automl_tests/test_automl.py @@ -2184,10 +2184,6 @@ def test_automl_pipeline_params_kwargs(mock_fit, mock_score, X_y_multi): @patch('evalml.pipelines.MulticlassClassificationPipeline.fit') def test_automl_pipeline_random_state(mock_fit, mock_score, random_state, X_y_multi): X, y = X_y_multi - - class MulticlassPipeline(MulticlassClassificationPipeline): - component_graph = ['Imputer', 'Random Forest Classifier'] - automl = AutoMLSearch(X_train=X, y_train=y, problem_type='multiclass', random_state=random_state, n_jobs=1) automl.search() From 4bd86cbfa348d6bec5d4d22eed78bf9ec706e0a6 Mon Sep 17 00:00:00 2001 From: bchen1116 Date: Wed, 27 Jan 2021 13:59:56 -0500 Subject: [PATCH 7/7] fix release notes' --- docs/source/release_notes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index 8ff6f851d8..6d3831aadc 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -36,7 +36,7 @@ Release Notes * Fixed stacking argument ``n_jobs`` for IterativeAlgorithm :pr:`1706` * Updated CatBoost estimators to return self in ``.fit()`` rather than the underlying model for consistency :pr:`1701` * Added ability to initialize pipeline parameters in ``AutoMLSearch`` constructor :pr:`1676` - * Remove ``random_state`` arg from ``get_pipelines`` in ``AutoMLSearch`` :pr:`1719` + * Removed ``random_state`` arg from ``get_pipelines`` in ``AutoMLSearch`` :pr:`1719` * Changes * Added labeling to ``graph_confusion_matrix`` :pr:`1632` * Rerunning search for ``AutoMLSearch`` results in a message thrown rather than failing the search, and removed ``has_searched`` property :pr:`1647`