Linting.

alteryx · Mar 18, 2021 · d7cfce8 · d7cfce8
1 parent e2fed23
commit d7cfce8
Show file tree

Hide file tree

Showing 4 changed files with 37 additions and 59 deletions.
diff --git a/evalml/tests/automl_tests/dask_testing.py b/evalml/tests/automl_tests/dask_testing.py
@@ -4,18 +4,19 @@
 from evalml.pipelines import BinaryClassificationPipeline
 from evalml.preprocessing.data_splitters import TrainingValidationSplit
 
+
 # Top-level replacement for AutoML object to supply data for testing purposes.
+def err_call(*args, **kwargs):
+    return 1
+
+
 AutoMLSearchStruct = namedtuple("AutoML",
                                 "data_splitter problem_type objective additional_objectives optimize_thresholds error_callback random_seed ensembling_indices")
 data_splitter = TrainingValidationSplit()
 problem_type = "binary"
 objective = get_objective("Log Loss Binary", return_instance=True)
 additional_objectives = []
 optimize_thresholds = False
-
-
-def err_call(*args, **kwargs):
-    return 1
 error_callback = err_call
 random_seed = 0
 ensembling_indices = [0]

diff --git a/evalml/tests/automl_tests/test_automl_dask.py b/evalml/tests/automl_tests/test_automl_dask.py
@@ -1,17 +1,11 @@
 import unittest
+
+import numpy as np
 import pytest
 from distributed import Client
-import numpy as np
 
-from evalml.automl.engine import DaskEngine, SequentialEngine
 from evalml.automl import AutoMLSearch
-
-"""
-The stopping criteria is respected when you use a dask engine
-train_pipelines and score_pipelines behaves the same way when you use a dask engine
-The error_callbacks behave the same way when you use a dask engine
-The results of sequential match those of dask engine
-"""
+from evalml.automl.engine import DaskEngine, SequentialEngine
 
 
 @pytest.mark.usefixtures("X_y_binary_cls")
@@ -37,25 +31,28 @@ def test_automl(self):
         parallel_results = parallel_rankings.drop(columns=["id"])
         sequential_results = sequential_rankings.drop(columns=["id"])
 
-        assert parallel_results.drop(columns=["validation_score"]).equals(sequential_results.drop(columns=["validation_score"]))
-        assert np.allclose(np.array(sequential_results["validation_score"]), np.array(parallel_results["validation_score"]))
+        assert parallel_results.drop(columns=["validation_score"]).equals(
+            sequential_results.drop(columns=["validation_score"]))
+        assert np.allclose(np.array(sequential_results["validation_score"]),
+                           np.array(parallel_results["validation_score"]))
 
     def test_automl_max_iterations(self):
         """ Making sure that the max_iterations parameter limits the number of pipelines run. """
         X, y = self.X_y_binary
         max_iterations = 4
-        par_automl = AutoMLSearch(X_train=X, y_train=y, problem_type="binary", engine=self.parallel_engine, max_iterations=max_iterations)
+        par_automl = AutoMLSearch(X_train=X, y_train=y, problem_type="binary", engine=self.parallel_engine,
+                                  max_iterations=max_iterations)
         par_automl.search()
         parallel_rankings = par_automl.full_rankings
 
-        seq_automl = AutoMLSearch(X_train=X, y_train=y, problem_type="binary", engine=self.sequential_engine, max_iterations=max_iterations)
+        seq_automl = AutoMLSearch(X_train=X, y_train=y, problem_type="binary", engine=self.sequential_engine,
+                                  max_iterations=max_iterations)
         seq_automl.search()
         sequential_rankings = seq_automl.full_rankings
 
         assert len(sequential_rankings) == len(parallel_rankings) == max_iterations
-        #TODO: Figure out how to mock the train_and_score_pipelines call to assert the call count.
-
+        # TODO: Figure out how to mock the train_and_score_pipelines call to assert the call count.
 
     @classmethod
     def tearDownClass(cls) -> None:
-        cls.client.close()
+        cls.client.close()
diff --git a/evalml/tests/automl_tests/test_dask_engine.py b/evalml/tests/automl_tests/test_dask_engine.py
@@ -1,27 +1,28 @@
-import time
 import unittest
-import pytest
-import numpy as np
-import pandas as pd
-from distributed import Client
 
+import numpy as np
+import pytest
 import woodwork as ww
+from distributed import Client
 
-from evalml.pipelines.pipeline_base import PipelineBase
-from evalml.automl.engine.engine_base import JobLogger
-from evalml.automl.engine.engine_base import train_pipeline, evaluate_pipeline
 from evalml.automl.engine.dask_engine import DaskComputation, DaskEngine
+from evalml.automl.engine.engine_base import (
+    JobLogger,
+    evaluate_pipeline,
+    train_pipeline
+)
 from evalml.automl.engine.sequential_engine import SequentialEngine
+from evalml.pipelines.pipeline_base import PipelineBase
 from evalml.tests.automl_tests.dask_testing import (
     TestCBPipeline,
     TestLRCPipeline,
     TestSVMPipeline,
     automl_data
 )
 
-from evalml.automl.engine.engine_base import train_pipeline, evaluate_pipeline, train_and_score_pipeline
+
 def score_pipeline(pipeline, X, y, objectives):
-    return pipeline.score(X,y,objectives)
+    return pipeline.score(X, y, objectives)
 
 
 @pytest.mark.usefixtures("X_y_binary_cls")
@@ -71,12 +72,12 @@ def fit_pipelines(pipelines, engine):
             return results
 
         # Verify all pipelines are trained and fitted.
-        seq_pipelines= fit_pipelines(pipelines, SequentialEngine())
+        seq_pipelines = fit_pipelines(pipelines, SequentialEngine())
         for pipeline in seq_pipelines:
             assert pipeline._is_fitted
 
         # Verify all pipelines are trained and fitted.
-        par_pipelines= fit_pipelines(pipelines, DaskEngine(client=self.client))
+        par_pipelines = fit_pipelines(pipelines, DaskEngine(client=self.client))
         for pipeline in par_pipelines:
             assert pipeline._is_fitted
 
@@ -140,7 +141,7 @@ def eval_pipelines(pipelines, engine):
         par_dicts = [s[0] for s in par_eval_results]
         par_scores = [s["cv_data"][0]["score"] for s in par_dicts]
         par_pipelines = [s[1] for s in par_eval_results]
-        
+
         seq_eval_results = eval_pipelines(pipelines, SequentialEngine())
         seq_dicts = [s[0] for s in seq_eval_results]
         seq_scores = [s["cv_data"][0]["score"] for s in seq_dicts]
@@ -170,8 +171,8 @@ def test_submit_scoring_job_single(self):
                                                      automl_data=automl_data, pipeline=pipeline)
         pipeline = pipeline_future.get_result()
         pipeline_score_future = engine.submit_scoring_job(X=ww.DataTable(X), y=ww.DataColumn(y),
-                                                    automl_data=automl_data, pipeline=pipeline,
-                                                    objectives=objectives)
+                                                          automl_data=automl_data, pipeline=pipeline,
+                                                          objectives=objectives)
         assert isinstance(pipeline_score_future, DaskComputation)
         pipeline_score = pipeline_score_future.get_result()
 
@@ -212,29 +213,6 @@ def score_pipelines(pipelines, engine):
         assert len(par_eval_results) == len(pipelines)
         assert set(par_scores) == set(seq_scores)
 
-    def test_freddy(self):
-        X, y = self.X_y_binary
-        pipelines = [TestLRCPipeline({}),
-                     TestCBPipeline({}),
-                     TestSVMPipeline({})]
-
-        batch_futures = []
-        for pipeline in pipelines:
-            p_f = self.client.submit(train_pipeline, pipeline=pipeline,
-                                     X=X, y=y, optimize_thresholds=True,
-                                     objective=automl_data.objective)
-            p_f = self.client.submit(score_pipeline, pipeline=p_f, X=X, y=y,
-                                     objectives=[automl_data.objective])
-            batch_futures.append(p_f)
-        batch_scores = self.client.gather(batch_futures)
-        """
-        assert batch_scores == [OrderedDict([('Log Loss Binary', 0.17764440547651003)]),
-                               OrderedDict([('Log Loss Binary', 0.4841126635831677)]),
-                               OrderedDict([('Log Loss Binary', 0.11584614593690136)])
-        """
-        # import pdb; pdb.set_trace()
-
-
     @classmethod
     def tearDownClass(cls) -> None:
         cls.client.close()

diff --git a/evalml/tests/conftest.py b/evalml/tests/conftest.py
@@ -121,10 +121,12 @@ def X_y_binary():
 
     return X, y
 
+
 @pytest.fixture(scope="class")
 def X_y_binary_cls(request):
     request.cls.X_y_binary = datasets.make_classification(n_samples=100, n_features=20,
-                                        n_informative=2, n_redundant=2, random_state=0)
+                                                          n_informative=2, n_redundant=2, random_state=0)
+
 
 @pytest.fixture
 def X_y_regression():