From d7cfce86f49b79eaa8708ee0ad741a5c40f0fce4 Mon Sep 17 00:00:00 2001 From: Karsten Chu Date: Thu, 18 Mar 2021 16:22:41 -0400 Subject: [PATCH] Linting. --- evalml/tests/automl_tests/dask_testing.py | 9 ++-- evalml/tests/automl_tests/test_automl_dask.py | 29 +++++----- evalml/tests/automl_tests/test_dask_engine.py | 54 ++++++------------- evalml/tests/conftest.py | 4 +- 4 files changed, 37 insertions(+), 59 deletions(-) diff --git a/evalml/tests/automl_tests/dask_testing.py b/evalml/tests/automl_tests/dask_testing.py index 3f9e29a15d..da15564e12 100644 --- a/evalml/tests/automl_tests/dask_testing.py +++ b/evalml/tests/automl_tests/dask_testing.py @@ -4,7 +4,12 @@ from evalml.pipelines import BinaryClassificationPipeline from evalml.preprocessing.data_splitters import TrainingValidationSplit + # Top-level replacement for AutoML object to supply data for testing purposes. +def err_call(*args, **kwargs): + return 1 + + AutoMLSearchStruct = namedtuple("AutoML", "data_splitter problem_type objective additional_objectives optimize_thresholds error_callback random_seed ensembling_indices") data_splitter = TrainingValidationSplit() @@ -12,10 +17,6 @@ objective = get_objective("Log Loss Binary", return_instance=True) additional_objectives = [] optimize_thresholds = False - - -def err_call(*args, **kwargs): - return 1 error_callback = err_call random_seed = 0 ensembling_indices = [0] diff --git a/evalml/tests/automl_tests/test_automl_dask.py b/evalml/tests/automl_tests/test_automl_dask.py index 598473312e..757ed57c9b 100644 --- a/evalml/tests/automl_tests/test_automl_dask.py +++ b/evalml/tests/automl_tests/test_automl_dask.py @@ -1,17 +1,11 @@ import unittest + +import numpy as np import pytest from distributed import Client -import numpy as np -from evalml.automl.engine import DaskEngine, SequentialEngine from evalml.automl import AutoMLSearch - -""" -The stopping criteria is respected when you use a dask engine -train_pipelines and score_pipelines behaves the same way when you use a dask engine -The error_callbacks behave the same way when you use a dask engine -The results of sequential match those of dask engine -""" +from evalml.automl.engine import DaskEngine, SequentialEngine @pytest.mark.usefixtures("X_y_binary_cls") @@ -37,25 +31,28 @@ def test_automl(self): parallel_results = parallel_rankings.drop(columns=["id"]) sequential_results = sequential_rankings.drop(columns=["id"]) - assert parallel_results.drop(columns=["validation_score"]).equals(sequential_results.drop(columns=["validation_score"])) - assert np.allclose(np.array(sequential_results["validation_score"]), np.array(parallel_results["validation_score"])) + assert parallel_results.drop(columns=["validation_score"]).equals( + sequential_results.drop(columns=["validation_score"])) + assert np.allclose(np.array(sequential_results["validation_score"]), + np.array(parallel_results["validation_score"])) def test_automl_max_iterations(self): """ Making sure that the max_iterations parameter limits the number of pipelines run. """ X, y = self.X_y_binary max_iterations = 4 - par_automl = AutoMLSearch(X_train=X, y_train=y, problem_type="binary", engine=self.parallel_engine, max_iterations=max_iterations) + par_automl = AutoMLSearch(X_train=X, y_train=y, problem_type="binary", engine=self.parallel_engine, + max_iterations=max_iterations) par_automl.search() parallel_rankings = par_automl.full_rankings - seq_automl = AutoMLSearch(X_train=X, y_train=y, problem_type="binary", engine=self.sequential_engine, max_iterations=max_iterations) + seq_automl = AutoMLSearch(X_train=X, y_train=y, problem_type="binary", engine=self.sequential_engine, + max_iterations=max_iterations) seq_automl.search() sequential_rankings = seq_automl.full_rankings assert len(sequential_rankings) == len(parallel_rankings) == max_iterations - #TODO: Figure out how to mock the train_and_score_pipelines call to assert the call count. - + # TODO: Figure out how to mock the train_and_score_pipelines call to assert the call count. @classmethod def tearDownClass(cls) -> None: - cls.client.close() \ No newline at end of file + cls.client.close() diff --git a/evalml/tests/automl_tests/test_dask_engine.py b/evalml/tests/automl_tests/test_dask_engine.py index c40de68bef..bfae83e944 100644 --- a/evalml/tests/automl_tests/test_dask_engine.py +++ b/evalml/tests/automl_tests/test_dask_engine.py @@ -1,17 +1,18 @@ -import time import unittest -import pytest -import numpy as np -import pandas as pd -from distributed import Client +import numpy as np +import pytest import woodwork as ww +from distributed import Client -from evalml.pipelines.pipeline_base import PipelineBase -from evalml.automl.engine.engine_base import JobLogger -from evalml.automl.engine.engine_base import train_pipeline, evaluate_pipeline from evalml.automl.engine.dask_engine import DaskComputation, DaskEngine +from evalml.automl.engine.engine_base import ( + JobLogger, + evaluate_pipeline, + train_pipeline +) from evalml.automl.engine.sequential_engine import SequentialEngine +from evalml.pipelines.pipeline_base import PipelineBase from evalml.tests.automl_tests.dask_testing import ( TestCBPipeline, TestLRCPipeline, @@ -19,9 +20,9 @@ automl_data ) -from evalml.automl.engine.engine_base import train_pipeline, evaluate_pipeline, train_and_score_pipeline + def score_pipeline(pipeline, X, y, objectives): - return pipeline.score(X,y,objectives) + return pipeline.score(X, y, objectives) @pytest.mark.usefixtures("X_y_binary_cls") @@ -71,12 +72,12 @@ def fit_pipelines(pipelines, engine): return results # Verify all pipelines are trained and fitted. - seq_pipelines= fit_pipelines(pipelines, SequentialEngine()) + seq_pipelines = fit_pipelines(pipelines, SequentialEngine()) for pipeline in seq_pipelines: assert pipeline._is_fitted # Verify all pipelines are trained and fitted. - par_pipelines= fit_pipelines(pipelines, DaskEngine(client=self.client)) + par_pipelines = fit_pipelines(pipelines, DaskEngine(client=self.client)) for pipeline in par_pipelines: assert pipeline._is_fitted @@ -140,7 +141,7 @@ def eval_pipelines(pipelines, engine): par_dicts = [s[0] for s in par_eval_results] par_scores = [s["cv_data"][0]["score"] for s in par_dicts] par_pipelines = [s[1] for s in par_eval_results] - + seq_eval_results = eval_pipelines(pipelines, SequentialEngine()) seq_dicts = [s[0] for s in seq_eval_results] seq_scores = [s["cv_data"][0]["score"] for s in seq_dicts] @@ -170,8 +171,8 @@ def test_submit_scoring_job_single(self): automl_data=automl_data, pipeline=pipeline) pipeline = pipeline_future.get_result() pipeline_score_future = engine.submit_scoring_job(X=ww.DataTable(X), y=ww.DataColumn(y), - automl_data=automl_data, pipeline=pipeline, - objectives=objectives) + automl_data=automl_data, pipeline=pipeline, + objectives=objectives) assert isinstance(pipeline_score_future, DaskComputation) pipeline_score = pipeline_score_future.get_result() @@ -212,29 +213,6 @@ def score_pipelines(pipelines, engine): assert len(par_eval_results) == len(pipelines) assert set(par_scores) == set(seq_scores) - def test_freddy(self): - X, y = self.X_y_binary - pipelines = [TestLRCPipeline({}), - TestCBPipeline({}), - TestSVMPipeline({})] - - batch_futures = [] - for pipeline in pipelines: - p_f = self.client.submit(train_pipeline, pipeline=pipeline, - X=X, y=y, optimize_thresholds=True, - objective=automl_data.objective) - p_f = self.client.submit(score_pipeline, pipeline=p_f, X=X, y=y, - objectives=[automl_data.objective]) - batch_futures.append(p_f) - batch_scores = self.client.gather(batch_futures) - """ - assert batch_scores == [OrderedDict([('Log Loss Binary', 0.17764440547651003)]), - OrderedDict([('Log Loss Binary', 0.4841126635831677)]), - OrderedDict([('Log Loss Binary', 0.11584614593690136)]) - """ - # import pdb; pdb.set_trace() - - @classmethod def tearDownClass(cls) -> None: cls.client.close() diff --git a/evalml/tests/conftest.py b/evalml/tests/conftest.py index 33b935cb29..6902873cbb 100644 --- a/evalml/tests/conftest.py +++ b/evalml/tests/conftest.py @@ -121,10 +121,12 @@ def X_y_binary(): return X, y + @pytest.fixture(scope="class") def X_y_binary_cls(request): request.cls.X_y_binary = datasets.make_classification(n_samples=100, n_features=20, - n_informative=2, n_redundant=2, random_state=0) + n_informative=2, n_redundant=2, random_state=0) + @pytest.fixture def X_y_regression():