Skip to content

Commit

Permalink
Use black to format pipeline code (#4005)
Browse files Browse the repository at this point in the history
* Use black to format pipeline code

* Lint

* move black import

* Fix dependency tests

* Fix dependencies

* Change lowest click version to match black

* Fix tests

* Pull black config

* set default black config

* Add test cases

* Fix tests

* fix testing
  • Loading branch information
jeremyliweishih committed Feb 22, 2023
1 parent 15c93c4 commit be98201
Show file tree
Hide file tree
Showing 12 changed files with 107 additions and 29 deletions.
3 changes: 2 additions & 1 deletion .github/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ outputs:
- statsmodels >=0.12.2
- colorama >=0.4.4
- cloudpickle >=1.5.0
- click >=7.1.2
- click>=8.0.0
- shap >=0.40.0
- texttable >=1.6.2
- woodwork >=0.21.1
Expand All @@ -45,6 +45,7 @@ outputs:
- python-graphviz >=0.13
- tomli >=2.0.1
- packaging >=23.0
- black[jupyter] >=22.3.0
test:
imports:
- evalml
Expand Down
2 changes: 1 addition & 1 deletion core-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ scikit-optimize>=0.9.0
pyzmq>=20.0.0
colorama>=0.4.4
cloudpickle>=1.5.0
click>=7.1.2
click>=8.0.0
shap>=0.40.0
statsmodels>=0.12.2
texttable>=1.6.2
Expand Down
1 change: 1 addition & 0 deletions docs/source/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ Release Notes
-------------
**Future Releases**
* Enhancements
* Move black to regular dependency and use it for ``generate_pipeline_code`` :pr:`4005`
* Fixes
* Changes
* Increase min catboost to 1.1.1 and xgboost to 1.7.0 to add nullable type support for those estimators :pr:`3996`
Expand Down
10 changes: 9 additions & 1 deletion evalml/pipelines/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""Utility methods for EvalML pipelines."""
import copy
import os

import black
from woodwork import logical_types

from evalml.data_checks import DataCheckActionCode, DataCheckActionOption
Expand Down Expand Up @@ -64,6 +66,7 @@
is_time_series,
)
from evalml.utils import get_time_index, infer_feature_types
from evalml.utils.cli_utils import get_evalml_black_config
from evalml.utils.gen_utils import contains_all_ts_parameters


Expand Down Expand Up @@ -635,7 +638,12 @@ def generate_pipeline_code(element):
),
)
code_strings.append(repr(element))
return "\n".join(code_strings)
pipeline_code = "\n".join(code_strings)
current_dir = os.path.dirname(os.path.abspath(__file__))
evalml_path = os.path.abspath(os.path.join(current_dir, "..", ".."))
black_config = get_evalml_black_config(evalml_path)
pipeline_code = black.format_str(pipeline_code, mode=black.Mode(**black_config))
return pipeline_code


def _make_stacked_ensemble_pipeline(
Expand Down
9 changes: 9 additions & 0 deletions evalml/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
is_time_series,
)
from evalml.utils import infer_feature_types
from evalml.utils.cli_utils import get_evalml_black_config


def pytest_configure(config):
Expand Down Expand Up @@ -2337,3 +2338,11 @@ def categorical_floats_df():
)

return X


@pytest.fixture
def get_black_config():
current_dir = os.path.dirname(os.path.abspath(__file__))
evalml_path = os.path.abspath(os.path.join(current_dir, "..", ".."))
black_config = get_evalml_black_config(evalml_path)
return black_config
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
black==23.1.0
catboost==1.1.1
category-encoders==2.5.1.post0
click==8.1.3
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
black==22.3.0
catboost==1.1.1
category-encoders==2.2.2
click==7.1.2
click>=8.0.0
cloudpickle==1.5.0
colorama==0.4.4
dask==2022.2.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ IPython==8.10.0
PyYAML==5.4
catboost==1.1.1
category-encoders==2.2.2
click==7.1.2
click>=8.0.0
cloudpickle==1.5.0
codecov==2.1.11
colorama==0.4.4
Expand Down
51 changes: 29 additions & 22 deletions evalml/tests/pipeline_tests/test_pipeline_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from unittest.mock import patch

import black
import numpy as np
import pandas as pd
import pytest
Expand Down Expand Up @@ -653,7 +654,7 @@ def test_generate_code_pipeline_errors():
generate_pipeline_code([Imputer(), LogisticRegressionClassifier()])


def test_generate_code_pipeline_json_with_objects():
def test_generate_code_pipeline_json_with_objects(get_black_config):
class CustomEstimator(Estimator):
name = "My Custom Estimator"
hyperparameter_ranges = {}
Expand All @@ -676,12 +677,12 @@ def __init__(self, random_arg=False, numpy_arg=[], random_seed=0):
parameters={"My Custom Estimator": {"numpy_arg": np.array([0])}},
)
generated_pipeline_code = generate_pipeline_code(pipeline)
assert (
generated_pipeline_code
== "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
assert generated_pipeline_code == black.format_str(
"from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
"pipeline = BinaryClassificationPipeline(component_graph={'Imputer': ['Imputer', 'X', 'y'], 'My Custom Estimator': [CustomEstimator, 'Imputer.x', 'y']}, "
"parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'boolean_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None, 'boolean_fill_value': None}, "
"'My Custom Estimator':{'random_arg': False, 'numpy_arg': array([0])}}, custom_name='Mock Binary Pipeline with Transformer', random_seed=0)"
"'My Custom Estimator':{'random_arg': False, 'numpy_arg': array([0])}}, custom_name='Mock Binary Pipeline with Transformer', random_seed=0)",
mode=black.Mode(**get_black_config),
)

pipeline = BinaryClassificationPipeline(
Expand All @@ -690,37 +691,40 @@ def __init__(self, random_arg=False, numpy_arg=[], random_seed=0):
parameters={"My Custom Estimator": {"random_arg": Imputer()}},
)
generated_pipeline_code = generate_pipeline_code(pipeline)
assert (
generated_pipeline_code
== "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
assert generated_pipeline_code == black.format_str(
"from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
"pipeline = BinaryClassificationPipeline(component_graph={'Imputer': ['Imputer', 'X', 'y'], 'My Custom Estimator': [CustomEstimator, 'Imputer.x', 'y']}, "
"parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'boolean_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None, 'boolean_fill_value': None}, "
"'My Custom Estimator':{'random_arg': Imputer(categorical_impute_strategy='most_frequent', numeric_impute_strategy='mean', boolean_impute_strategy='most_frequent', categorical_fill_value=None, numeric_fill_value=None, boolean_fill_value=None), 'numpy_arg': []}}, "
"custom_name='Mock Binary Pipeline with Transformer', random_seed=0)"
"custom_name='Mock Binary Pipeline with Transformer', random_seed=0)",
mode=black.Mode(**get_black_config),
)


def test_generate_code_pipeline():
def test_generate_code_pipeline(get_black_config):
binary_pipeline = BinaryClassificationPipeline(
["Imputer", "Random Forest Classifier"],
)
expected_code = (
expected_code = black.format_str(
"from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
"pipeline = BinaryClassificationPipeline(component_graph={'Imputer': ['Imputer', 'X', 'y'], 'Random Forest Classifier': ['Random Forest Classifier', 'Imputer.x', 'y']}, "
"parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'boolean_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None, 'boolean_fill_value': None}, "
"'Random Forest Classifier':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}}, random_seed=0)"
"'Random Forest Classifier':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}}, random_seed=0)",
mode=black.Mode(**get_black_config),
)

pipeline = generate_pipeline_code(binary_pipeline)
assert expected_code == pipeline

regression_pipeline = RegressionPipeline(
["Imputer", "Random Forest Regressor"],
custom_name="Mock Regression Pipeline",
)
expected_code = (
expected_code = black.format_str(
"from evalml.pipelines.regression_pipeline import RegressionPipeline\n"
"pipeline = RegressionPipeline(component_graph={'Imputer': ['Imputer', 'X', 'y'], 'Random Forest Regressor': ['Random Forest Regressor', 'Imputer.x', 'y']}, parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'boolean_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None, 'boolean_fill_value': None}, "
"'Random Forest Regressor':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}}, custom_name='Mock Regression Pipeline', random_seed=0)"
"'Random Forest Regressor':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}}, custom_name='Mock Regression Pipeline', random_seed=0)",
mode=black.Mode(**get_black_config),
)
pipeline = generate_pipeline_code(regression_pipeline)
assert pipeline == expected_code
Expand All @@ -733,17 +737,18 @@ def test_generate_code_pipeline():
"Random Forest Regressor": {"n_estimators": 50},
},
)
expected_code_params = (
expected_code_params = black.format_str(
"from evalml.pipelines.regression_pipeline import RegressionPipeline\n"
"pipeline = RegressionPipeline(component_graph={'Imputer': ['Imputer', 'X', 'y'], 'Random Forest Regressor': ['Random Forest Regressor', 'Imputer.x', 'y']}, "
"parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'most_frequent', 'boolean_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None, 'boolean_fill_value': None}, "
"'Random Forest Regressor':{'n_estimators': 50, 'max_depth': 6, 'n_jobs': -1}}, custom_name='Mock Regression Pipeline', random_seed=0)"
"'Random Forest Regressor':{'n_estimators': 50, 'max_depth': 6, 'n_jobs': -1}}, custom_name='Mock Regression Pipeline', random_seed=0)",
mode=black.Mode(**get_black_config),
)
pipeline = generate_pipeline_code(regression_pipeline_with_params)
assert pipeline == expected_code_params


def test_generate_code_nonlinear_pipeline():
def test_generate_code_nonlinear_pipeline(get_black_config):
custom_name = "Non Linear Binary Pipeline"
component_graph = {
"Imputer": ["Imputer", "X", "y"],
Expand All @@ -762,7 +767,7 @@ def test_generate_code_nonlinear_pipeline():
component_graph=component_graph,
custom_name=custom_name,
)
expected = (
expected = black.format_str(
"from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
"pipeline = BinaryClassificationPipeline("
"component_graph={'Imputer': ['Imputer', 'X', 'y'], "
Expand All @@ -777,13 +782,14 @@ def test_generate_code_nonlinear_pipeline():
"'Random Forest':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}, "
"'Elastic Net':{'penalty': 'elasticnet', 'C': 1.0, 'l1_ratio': 0.15, 'n_jobs': -1, 'multi_class': 'auto', 'solver': 'saga'}, "
"'Logistic Regression Classifier':{'penalty': 'l2', 'C': 1.0, 'n_jobs': -1, 'multi_class': 'auto', 'solver': 'lbfgs'}}, "
"custom_name='Non Linear Binary Pipeline', random_seed=0)"
"custom_name='Non Linear Binary Pipeline', random_seed=0)",
mode=black.Mode(**get_black_config),
)
pipeline_code = generate_pipeline_code(pipeline)
assert pipeline_code == expected


def test_generate_code_pipeline_with_custom_components():
def test_generate_code_pipeline_with_custom_components(get_black_config):
class CustomTransformer(Transformer):
name = "My Custom Transformer"
hyperparameter_ranges = {}
Expand Down Expand Up @@ -818,10 +824,11 @@ def __init__(self, random_arg=False, random_seed=0):
mock_pipeline_with_custom_components = BinaryClassificationPipeline(
[CustomTransformer, CustomEstimator],
)
expected_code = (
expected_code = black.format_str(
"from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
"pipeline = BinaryClassificationPipeline(component_graph={'My Custom Transformer': [CustomTransformer, 'X', 'y'], 'My Custom Estimator': [CustomEstimator, 'My Custom Transformer.x', 'y']}, "
"parameters={'My Custom Estimator':{'random_arg': False}}, random_seed=0)"
"parameters={'My Custom Estimator':{'random_arg': False}}, random_seed=0)",
mode=black.Mode(**get_black_config),
)
pipeline = generate_pipeline_code(mock_pipeline_with_custom_components)
assert pipeline == expected_code
Expand Down
15 changes: 15 additions & 0 deletions evalml/tests/utils_tests/test_cli_utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import os
from unittest.mock import patch

import black
import pytest
from click.testing import CliRunner
from packaging.requirements import Requirement

from evalml.__main__ import cli
from evalml.utils.cli_utils import (
get_evalml_black_config,
get_evalml_pip_requirements,
get_evalml_root,
get_installed_packages,
Expand Down Expand Up @@ -107,3 +109,16 @@ def test_installed_packages(current_dir):
def test_get_evalml_root(current_dir):
root = os.path.abspath(os.path.join(current_dir, "..", ".."))
assert get_evalml_root() == root


def test_get_evalml_black_config(current_dir):
evalml_path = os.path.abspath(os.path.join(current_dir, "..", "..", ".."))
black_config = get_evalml_black_config(evalml_path)
assert black_config["line_length"] == 88
assert black_config["target_versions"] == set([black.TargetVersion["PY39"]])

black_config = get_evalml_black_config(
os.path.join(current_dir, "..", "..", "random_file"),
)
assert black_config["line_length"] == 88
assert black_config["target_versions"] == set([black.TargetVersion["PY39"]])
35 changes: 35 additions & 0 deletions evalml/utils/cli_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import struct
import sys

import black
import pkg_resources
import tomli
from packaging.requirements import Requirement
Expand Down Expand Up @@ -158,3 +159,37 @@ def get_evalml_pip_requirements(
convert_to_conda=convert_to_conda,
)
return standardized_package_specifiers


def get_evalml_black_config(
evalml_path,
):
"""Gets configuration for black.
Args:
evalml_path: Path to evalml root.
Returns:
Dictionary of black configuration.
"""
black_config = None
try:
toml_dict = None
evalml_path = pathlib.Path(evalml_path, "pyproject.toml")
with open(evalml_path, "rb") as f:
toml_dict = tomli.load(f)
black_config = toml_dict["tool"]["black"]
black_config["line_length"] = black_config.pop("line-length")
target_versions = set(
[
black.TargetVersion[ver.upper()]
for ver in black_config.pop("target-version")
],
)
black_config["target_versions"] = target_versions
except Exception:
black_config = {
"line_length": 88,
"target_versions": set([black.TargetVersion["PY39"]]),
}
return black_config
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ dependencies = [
"pyzmq >= 20.0.0",
"colorama >= 0.4.4",
"cloudpickle >= 1.5.0",
"click >= 7.1.2",
"click >= 8.0.0",
"shap >= 0.40.0",
"statsmodels >= 0.12.2",
"texttable >= 1.6.2",
Expand All @@ -63,6 +63,7 @@ dependencies = [
"vowpalwabbit >= 8.11.0",
"tomli >= 2.0.1",
"packaging >= 23.0",
"black[jupyter] >= 22.3.0",
]

[project.urls]
Expand All @@ -87,7 +88,6 @@ test = [
]
dev = [
"ruff == 0.0.228",
"black[jupyter] >= 22.3.0",
"darglint == 1.8.0",
"pre-commit >= 2.20.0",
"evalml[docs,test]",
Expand Down

0 comments on commit be98201

Please sign in to comment.