Skip to content

Commit

Permalink
Linting.
Browse files Browse the repository at this point in the history
  • Loading branch information
chukarsten committed Mar 18, 2021
1 parent e2fed23 commit d7cfce8
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 59 deletions.
9 changes: 5 additions & 4 deletions evalml/tests/automl_tests/dask_testing.py
Expand Up @@ -4,18 +4,19 @@
from evalml.pipelines import BinaryClassificationPipeline
from evalml.preprocessing.data_splitters import TrainingValidationSplit


# Top-level replacement for AutoML object to supply data for testing purposes.
def err_call(*args, **kwargs):
return 1


AutoMLSearchStruct = namedtuple("AutoML",
"data_splitter problem_type objective additional_objectives optimize_thresholds error_callback random_seed ensembling_indices")
data_splitter = TrainingValidationSplit()
problem_type = "binary"
objective = get_objective("Log Loss Binary", return_instance=True)
additional_objectives = []
optimize_thresholds = False


def err_call(*args, **kwargs):
return 1
error_callback = err_call
random_seed = 0
ensembling_indices = [0]
Expand Down
29 changes: 13 additions & 16 deletions evalml/tests/automl_tests/test_automl_dask.py
@@ -1,17 +1,11 @@
import unittest

import numpy as np
import pytest
from distributed import Client
import numpy as np

from evalml.automl.engine import DaskEngine, SequentialEngine
from evalml.automl import AutoMLSearch

"""
The stopping criteria is respected when you use a dask engine
train_pipelines and score_pipelines behaves the same way when you use a dask engine
The error_callbacks behave the same way when you use a dask engine
The results of sequential match those of dask engine
"""
from evalml.automl.engine import DaskEngine, SequentialEngine


@pytest.mark.usefixtures("X_y_binary_cls")
Expand All @@ -37,25 +31,28 @@ def test_automl(self):
parallel_results = parallel_rankings.drop(columns=["id"])
sequential_results = sequential_rankings.drop(columns=["id"])

assert parallel_results.drop(columns=["validation_score"]).equals(sequential_results.drop(columns=["validation_score"]))
assert np.allclose(np.array(sequential_results["validation_score"]), np.array(parallel_results["validation_score"]))
assert parallel_results.drop(columns=["validation_score"]).equals(
sequential_results.drop(columns=["validation_score"]))
assert np.allclose(np.array(sequential_results["validation_score"]),
np.array(parallel_results["validation_score"]))

def test_automl_max_iterations(self):
""" Making sure that the max_iterations parameter limits the number of pipelines run. """
X, y = self.X_y_binary
max_iterations = 4
par_automl = AutoMLSearch(X_train=X, y_train=y, problem_type="binary", engine=self.parallel_engine, max_iterations=max_iterations)
par_automl = AutoMLSearch(X_train=X, y_train=y, problem_type="binary", engine=self.parallel_engine,
max_iterations=max_iterations)
par_automl.search()
parallel_rankings = par_automl.full_rankings

seq_automl = AutoMLSearch(X_train=X, y_train=y, problem_type="binary", engine=self.sequential_engine, max_iterations=max_iterations)
seq_automl = AutoMLSearch(X_train=X, y_train=y, problem_type="binary", engine=self.sequential_engine,
max_iterations=max_iterations)
seq_automl.search()
sequential_rankings = seq_automl.full_rankings

assert len(sequential_rankings) == len(parallel_rankings) == max_iterations
#TODO: Figure out how to mock the train_and_score_pipelines call to assert the call count.

# TODO: Figure out how to mock the train_and_score_pipelines call to assert the call count.

@classmethod
def tearDownClass(cls) -> None:
cls.client.close()
cls.client.close()
54 changes: 16 additions & 38 deletions evalml/tests/automl_tests/test_dask_engine.py
@@ -1,27 +1,28 @@
import time
import unittest
import pytest
import numpy as np
import pandas as pd
from distributed import Client

import numpy as np
import pytest
import woodwork as ww
from distributed import Client

from evalml.pipelines.pipeline_base import PipelineBase
from evalml.automl.engine.engine_base import JobLogger
from evalml.automl.engine.engine_base import train_pipeline, evaluate_pipeline
from evalml.automl.engine.dask_engine import DaskComputation, DaskEngine
from evalml.automl.engine.engine_base import (
JobLogger,
evaluate_pipeline,
train_pipeline
)
from evalml.automl.engine.sequential_engine import SequentialEngine
from evalml.pipelines.pipeline_base import PipelineBase
from evalml.tests.automl_tests.dask_testing import (
TestCBPipeline,
TestLRCPipeline,
TestSVMPipeline,
automl_data
)

from evalml.automl.engine.engine_base import train_pipeline, evaluate_pipeline, train_and_score_pipeline

def score_pipeline(pipeline, X, y, objectives):
return pipeline.score(X,y,objectives)
return pipeline.score(X, y, objectives)


@pytest.mark.usefixtures("X_y_binary_cls")
Expand Down Expand Up @@ -71,12 +72,12 @@ def fit_pipelines(pipelines, engine):
return results

# Verify all pipelines are trained and fitted.
seq_pipelines= fit_pipelines(pipelines, SequentialEngine())
seq_pipelines = fit_pipelines(pipelines, SequentialEngine())
for pipeline in seq_pipelines:
assert pipeline._is_fitted

# Verify all pipelines are trained and fitted.
par_pipelines= fit_pipelines(pipelines, DaskEngine(client=self.client))
par_pipelines = fit_pipelines(pipelines, DaskEngine(client=self.client))
for pipeline in par_pipelines:
assert pipeline._is_fitted

Expand Down Expand Up @@ -140,7 +141,7 @@ def eval_pipelines(pipelines, engine):
par_dicts = [s[0] for s in par_eval_results]
par_scores = [s["cv_data"][0]["score"] for s in par_dicts]
par_pipelines = [s[1] for s in par_eval_results]

seq_eval_results = eval_pipelines(pipelines, SequentialEngine())
seq_dicts = [s[0] for s in seq_eval_results]
seq_scores = [s["cv_data"][0]["score"] for s in seq_dicts]
Expand Down Expand Up @@ -170,8 +171,8 @@ def test_submit_scoring_job_single(self):
automl_data=automl_data, pipeline=pipeline)
pipeline = pipeline_future.get_result()
pipeline_score_future = engine.submit_scoring_job(X=ww.DataTable(X), y=ww.DataColumn(y),
automl_data=automl_data, pipeline=pipeline,
objectives=objectives)
automl_data=automl_data, pipeline=pipeline,
objectives=objectives)
assert isinstance(pipeline_score_future, DaskComputation)
pipeline_score = pipeline_score_future.get_result()

Expand Down Expand Up @@ -212,29 +213,6 @@ def score_pipelines(pipelines, engine):
assert len(par_eval_results) == len(pipelines)
assert set(par_scores) == set(seq_scores)

def test_freddy(self):
X, y = self.X_y_binary
pipelines = [TestLRCPipeline({}),
TestCBPipeline({}),
TestSVMPipeline({})]

batch_futures = []
for pipeline in pipelines:
p_f = self.client.submit(train_pipeline, pipeline=pipeline,
X=X, y=y, optimize_thresholds=True,
objective=automl_data.objective)
p_f = self.client.submit(score_pipeline, pipeline=p_f, X=X, y=y,
objectives=[automl_data.objective])
batch_futures.append(p_f)
batch_scores = self.client.gather(batch_futures)
"""
assert batch_scores == [OrderedDict([('Log Loss Binary', 0.17764440547651003)]),
OrderedDict([('Log Loss Binary', 0.4841126635831677)]),
OrderedDict([('Log Loss Binary', 0.11584614593690136)])
"""
# import pdb; pdb.set_trace()


@classmethod
def tearDownClass(cls) -> None:
cls.client.close()
Expand Down
4 changes: 3 additions & 1 deletion evalml/tests/conftest.py
Expand Up @@ -121,10 +121,12 @@ def X_y_binary():

return X, y


@pytest.fixture(scope="class")
def X_y_binary_cls(request):
request.cls.X_y_binary = datasets.make_classification(n_samples=100, n_features=20,
n_informative=2, n_redundant=2, random_state=0)
n_informative=2, n_redundant=2, random_state=0)


@pytest.fixture
def X_y_regression():
Expand Down

0 comments on commit d7cfce8

Please sign in to comment.