Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use black to format pipeline code #4005

Merged
merged 17 commits into from
Feb 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ outputs:
- statsmodels >=0.12.2
- colorama >=0.4.4
- cloudpickle >=1.5.0
- click >=7.1.2
- click>=8.0.0
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

needed for black.

- shap >=0.40.0
- texttable >=1.6.2
- woodwork >=0.21.1
Expand All @@ -45,6 +45,7 @@ outputs:
- python-graphviz >=0.13
- tomli >=2.0.1
- packaging >=23.0
- black[jupyter] >=22.3.0
test:
imports:
- evalml
Expand Down
2 changes: 1 addition & 1 deletion core-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ scikit-optimize>=0.9.0
pyzmq>=20.0.0
colorama>=0.4.4
cloudpickle>=1.5.0
click>=7.1.2
click>=8.0.0
shap>=0.40.0
statsmodels>=0.12.2
texttable>=1.6.2
Expand Down
1 change: 1 addition & 0 deletions docs/source/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ Release Notes
-------------
**Future Releases**
* Enhancements
* Move black to regular dependency and use it for ``generate_pipeline_code`` :pr:`4005`
* Fixes
* Changes
* Increase min catboost to 1.1.1 and xgboost to 1.7.0 to add nullable type support for those estimators :pr:`3996`
Expand Down
10 changes: 9 additions & 1 deletion evalml/pipelines/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""Utility methods for EvalML pipelines."""
import copy
import os

import black
from woodwork import logical_types

from evalml.data_checks import DataCheckActionCode, DataCheckActionOption
Expand Down Expand Up @@ -64,6 +66,7 @@
is_time_series,
)
from evalml.utils import get_time_index, infer_feature_types
from evalml.utils.cli_utils import get_evalml_black_config
from evalml.utils.gen_utils import contains_all_ts_parameters


Expand Down Expand Up @@ -635,7 +638,12 @@ def generate_pipeline_code(element):
),
)
code_strings.append(repr(element))
return "\n".join(code_strings)
pipeline_code = "\n".join(code_strings)
current_dir = os.path.dirname(os.path.abspath(__file__))
evalml_path = os.path.abspath(os.path.join(current_dir, "..", ".."))
black_config = get_evalml_black_config(evalml_path)
pipeline_code = black.format_str(pipeline_code, mode=black.Mode(**black_config))
return pipeline_code


def _make_stacked_ensemble_pipeline(
Expand Down
9 changes: 9 additions & 0 deletions evalml/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
is_time_series,
)
from evalml.utils import infer_feature_types
from evalml.utils.cli_utils import get_evalml_black_config


def pytest_configure(config):
Expand Down Expand Up @@ -2337,3 +2338,11 @@ def categorical_floats_df():
)

return X


@pytest.fixture
def get_black_config():
current_dir = os.path.dirname(os.path.abspath(__file__))
evalml_path = os.path.abspath(os.path.join(current_dir, "..", ".."))
black_config = get_evalml_black_config(evalml_path)
return black_config
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
black==23.1.0
catboost==1.1.1
category-encoders==2.5.1.post0
click==8.1.3
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
black==22.3.0
catboost==1.1.1
category-encoders==2.2.2
click==7.1.2
click>=8.0.0
cloudpickle==1.5.0
colorama==0.4.4
dask==2022.2.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ IPython==8.10.0
PyYAML==5.4
catboost==1.1.1
category-encoders==2.2.2
click==7.1.2
click>=8.0.0
cloudpickle==1.5.0
codecov==2.1.11
colorama==0.4.4
Expand Down
51 changes: 29 additions & 22 deletions evalml/tests/pipeline_tests/test_pipeline_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from unittest.mock import patch

import black
import numpy as np
import pandas as pd
import pytest
Expand Down Expand Up @@ -653,7 +654,7 @@ def test_generate_code_pipeline_errors():
generate_pipeline_code([Imputer(), LogisticRegressionClassifier()])


def test_generate_code_pipeline_json_with_objects():
def test_generate_code_pipeline_json_with_objects(get_black_config):
class CustomEstimator(Estimator):
name = "My Custom Estimator"
hyperparameter_ranges = {}
Expand All @@ -676,12 +677,12 @@ def __init__(self, random_arg=False, numpy_arg=[], random_seed=0):
parameters={"My Custom Estimator": {"numpy_arg": np.array([0])}},
)
generated_pipeline_code = generate_pipeline_code(pipeline)
assert (
generated_pipeline_code
== "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
assert generated_pipeline_code == black.format_str(
"from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
"pipeline = BinaryClassificationPipeline(component_graph={'Imputer': ['Imputer', 'X', 'y'], 'My Custom Estimator': [CustomEstimator, 'Imputer.x', 'y']}, "
"parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'boolean_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None, 'boolean_fill_value': None}, "
"'My Custom Estimator':{'random_arg': False, 'numpy_arg': array([0])}}, custom_name='Mock Binary Pipeline with Transformer', random_seed=0)"
"'My Custom Estimator':{'random_arg': False, 'numpy_arg': array([0])}}, custom_name='Mock Binary Pipeline with Transformer', random_seed=0)",
mode=black.Mode(**get_black_config),
)

pipeline = BinaryClassificationPipeline(
Expand All @@ -690,37 +691,40 @@ def __init__(self, random_arg=False, numpy_arg=[], random_seed=0):
parameters={"My Custom Estimator": {"random_arg": Imputer()}},
)
generated_pipeline_code = generate_pipeline_code(pipeline)
assert (
generated_pipeline_code
== "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
assert generated_pipeline_code == black.format_str(
"from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
"pipeline = BinaryClassificationPipeline(component_graph={'Imputer': ['Imputer', 'X', 'y'], 'My Custom Estimator': [CustomEstimator, 'Imputer.x', 'y']}, "
"parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'boolean_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None, 'boolean_fill_value': None}, "
"'My Custom Estimator':{'random_arg': Imputer(categorical_impute_strategy='most_frequent', numeric_impute_strategy='mean', boolean_impute_strategy='most_frequent', categorical_fill_value=None, numeric_fill_value=None, boolean_fill_value=None), 'numpy_arg': []}}, "
"custom_name='Mock Binary Pipeline with Transformer', random_seed=0)"
"custom_name='Mock Binary Pipeline with Transformer', random_seed=0)",
mode=black.Mode(**get_black_config),
)


def test_generate_code_pipeline():
def test_generate_code_pipeline(get_black_config):
binary_pipeline = BinaryClassificationPipeline(
["Imputer", "Random Forest Classifier"],
)
expected_code = (
expected_code = black.format_str(
"from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
"pipeline = BinaryClassificationPipeline(component_graph={'Imputer': ['Imputer', 'X', 'y'], 'Random Forest Classifier': ['Random Forest Classifier', 'Imputer.x', 'y']}, "
"parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'boolean_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None, 'boolean_fill_value': None}, "
"'Random Forest Classifier':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}}, random_seed=0)"
"'Random Forest Classifier':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}}, random_seed=0)",
mode=black.Mode(**get_black_config),
)

pipeline = generate_pipeline_code(binary_pipeline)
assert expected_code == pipeline

regression_pipeline = RegressionPipeline(
["Imputer", "Random Forest Regressor"],
custom_name="Mock Regression Pipeline",
)
expected_code = (
expected_code = black.format_str(
"from evalml.pipelines.regression_pipeline import RegressionPipeline\n"
"pipeline = RegressionPipeline(component_graph={'Imputer': ['Imputer', 'X', 'y'], 'Random Forest Regressor': ['Random Forest Regressor', 'Imputer.x', 'y']}, parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'boolean_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None, 'boolean_fill_value': None}, "
"'Random Forest Regressor':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}}, custom_name='Mock Regression Pipeline', random_seed=0)"
"'Random Forest Regressor':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}}, custom_name='Mock Regression Pipeline', random_seed=0)",
mode=black.Mode(**get_black_config),
)
pipeline = generate_pipeline_code(regression_pipeline)
assert pipeline == expected_code
Expand All @@ -733,17 +737,18 @@ def test_generate_code_pipeline():
"Random Forest Regressor": {"n_estimators": 50},
},
)
expected_code_params = (
expected_code_params = black.format_str(
"from evalml.pipelines.regression_pipeline import RegressionPipeline\n"
"pipeline = RegressionPipeline(component_graph={'Imputer': ['Imputer', 'X', 'y'], 'Random Forest Regressor': ['Random Forest Regressor', 'Imputer.x', 'y']}, "
"parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'most_frequent', 'boolean_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None, 'boolean_fill_value': None}, "
"'Random Forest Regressor':{'n_estimators': 50, 'max_depth': 6, 'n_jobs': -1}}, custom_name='Mock Regression Pipeline', random_seed=0)"
"'Random Forest Regressor':{'n_estimators': 50, 'max_depth': 6, 'n_jobs': -1}}, custom_name='Mock Regression Pipeline', random_seed=0)",
mode=black.Mode(**get_black_config),
)
pipeline = generate_pipeline_code(regression_pipeline_with_params)
assert pipeline == expected_code_params


def test_generate_code_nonlinear_pipeline():
def test_generate_code_nonlinear_pipeline(get_black_config):
custom_name = "Non Linear Binary Pipeline"
component_graph = {
"Imputer": ["Imputer", "X", "y"],
Expand All @@ -762,7 +767,7 @@ def test_generate_code_nonlinear_pipeline():
component_graph=component_graph,
custom_name=custom_name,
)
expected = (
expected = black.format_str(
"from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
"pipeline = BinaryClassificationPipeline("
"component_graph={'Imputer': ['Imputer', 'X', 'y'], "
Expand All @@ -777,13 +782,14 @@ def test_generate_code_nonlinear_pipeline():
"'Random Forest':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}, "
"'Elastic Net':{'penalty': 'elasticnet', 'C': 1.0, 'l1_ratio': 0.15, 'n_jobs': -1, 'multi_class': 'auto', 'solver': 'saga'}, "
"'Logistic Regression Classifier':{'penalty': 'l2', 'C': 1.0, 'n_jobs': -1, 'multi_class': 'auto', 'solver': 'lbfgs'}}, "
"custom_name='Non Linear Binary Pipeline', random_seed=0)"
"custom_name='Non Linear Binary Pipeline', random_seed=0)",
mode=black.Mode(**get_black_config),
)
pipeline_code = generate_pipeline_code(pipeline)
assert pipeline_code == expected


def test_generate_code_pipeline_with_custom_components():
def test_generate_code_pipeline_with_custom_components(get_black_config):
class CustomTransformer(Transformer):
name = "My Custom Transformer"
hyperparameter_ranges = {}
Expand Down Expand Up @@ -818,10 +824,11 @@ def __init__(self, random_arg=False, random_seed=0):
mock_pipeline_with_custom_components = BinaryClassificationPipeline(
[CustomTransformer, CustomEstimator],
)
expected_code = (
expected_code = black.format_str(
"from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
"pipeline = BinaryClassificationPipeline(component_graph={'My Custom Transformer': [CustomTransformer, 'X', 'y'], 'My Custom Estimator': [CustomEstimator, 'My Custom Transformer.x', 'y']}, "
"parameters={'My Custom Estimator':{'random_arg': False}}, random_seed=0)"
"parameters={'My Custom Estimator':{'random_arg': False}}, random_seed=0)",
mode=black.Mode(**get_black_config),
)
pipeline = generate_pipeline_code(mock_pipeline_with_custom_components)
assert pipeline == expected_code
Expand Down
15 changes: 15 additions & 0 deletions evalml/tests/utils_tests/test_cli_utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import os
from unittest.mock import patch

import black
import pytest
from click.testing import CliRunner
from packaging.requirements import Requirement

from evalml.__main__ import cli
from evalml.utils.cli_utils import (
get_evalml_black_config,
get_evalml_pip_requirements,
get_evalml_root,
get_installed_packages,
Expand Down Expand Up @@ -107,3 +109,16 @@ def test_installed_packages(current_dir):
def test_get_evalml_root(current_dir):
root = os.path.abspath(os.path.join(current_dir, "..", ".."))
assert get_evalml_root() == root


def test_get_evalml_black_config(current_dir):
evalml_path = os.path.abspath(os.path.join(current_dir, "..", "..", ".."))
black_config = get_evalml_black_config(evalml_path)
assert black_config["line_length"] == 88
assert black_config["target_versions"] == set([black.TargetVersion["PY39"]])

black_config = get_evalml_black_config(
os.path.join(current_dir, "..", "..", "random_file"),
)
assert black_config["line_length"] == 88
assert black_config["target_versions"] == set([black.TargetVersion["PY39"]])
35 changes: 35 additions & 0 deletions evalml/utils/cli_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import struct
import sys

import black
import pkg_resources
import tomli
from packaging.requirements import Requirement
Expand Down Expand Up @@ -158,3 +159,37 @@ def get_evalml_pip_requirements(
convert_to_conda=convert_to_conda,
)
return standardized_package_specifiers


def get_evalml_black_config(
evalml_path,
):
"""Gets configuration for black.

Args:
evalml_path: Path to evalml root.

Returns:
Dictionary of black configuration.
"""
black_config = None
try:
toml_dict = None
evalml_path = pathlib.Path(evalml_path, "pyproject.toml")
with open(evalml_path, "rb") as f:
toml_dict = tomli.load(f)
black_config = toml_dict["tool"]["black"]
black_config["line_length"] = black_config.pop("line-length")
target_versions = set(
[
black.TargetVersion[ver.upper()]
for ver in black_config.pop("target-version")
],
)
black_config["target_versions"] = target_versions
except Exception:
black_config = {
"line_length": 88,
"target_versions": set([black.TargetVersion["PY39"]]),
}
return black_config
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ dependencies = [
"pyzmq >= 20.0.0",
"colorama >= 0.4.4",
"cloudpickle >= 1.5.0",
"click >= 7.1.2",
"click >= 8.0.0",
"shap >= 0.40.0",
"statsmodels >= 0.12.2",
"texttable >= 1.6.2",
Expand All @@ -63,6 +63,7 @@ dependencies = [
"vowpalwabbit >= 8.11.0",
"tomli >= 2.0.1",
"packaging >= 23.0",
"black[jupyter] >= 22.3.0",
]

[project.urls]
Expand All @@ -87,7 +88,6 @@ test = [
]
dev = [
"ruff == 0.0.228",
"black[jupyter] >= 22.3.0",
"darglint == 1.8.0",
"pre-commit >= 2.20.0",
"evalml[docs,test]",
Expand Down