Use black to format pipeline code (#4005)

* Use black to format pipeline code * Lint * move black import * Fix dependency tests * Fix dependencies * Change lowest click version to match black * Fix tests * Pull black config * set default black config * Add test cases * Fix tests * fix testing
alteryx · Feb 22, 2023 · be98201 · be98201
1 parent 15c93c4
commit be98201
Show file tree

Hide file tree

Showing 12 changed files with 107 additions and 29 deletions.
diff --git a/.github/meta.yaml b/.github/meta.yaml
@@ -33,7 +33,7 @@ outputs:
         - statsmodels >=0.12.2
         - colorama >=0.4.4
         - cloudpickle >=1.5.0
-        - click >=7.1.2
+        - click>=8.0.0
         - shap >=0.40.0
         - texttable >=1.6.2
         - woodwork >=0.21.1
@@ -45,6 +45,7 @@ outputs:
         - python-graphviz >=0.13
         - tomli >=2.0.1
         - packaging >=23.0
+        - black[jupyter] >=22.3.0
     test:
       imports:
         - evalml

diff --git a/core-requirements.txt b/core-requirements.txt
@@ -6,7 +6,7 @@ scikit-optimize>=0.9.0
 pyzmq>=20.0.0
 colorama>=0.4.4
 cloudpickle>=1.5.0
-click>=7.1.2
+click>=8.0.0
 shap>=0.40.0
 statsmodels>=0.12.2
 texttable>=1.6.2

diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -2,6 +2,7 @@ Release Notes
 -------------
 **Future Releases**
     * Enhancements
+      * Move black to regular dependency and use it for ``generate_pipeline_code`` :pr:`4005`
     * Fixes
     * Changes
         * Increase min catboost to 1.1.1 and xgboost to 1.7.0 to add nullable type support for those estimators :pr:`3996`

diff --git a/evalml/pipelines/utils.py b/evalml/pipelines/utils.py
@@ -1,6 +1,8 @@
 """Utility methods for EvalML pipelines."""
 import copy
+import os
 
+import black
 from woodwork import logical_types
 
 from evalml.data_checks import DataCheckActionCode, DataCheckActionOption
@@ -64,6 +66,7 @@
     is_time_series,
 )
 from evalml.utils import get_time_index, infer_feature_types
+from evalml.utils.cli_utils import get_evalml_black_config
 from evalml.utils.gen_utils import contains_all_ts_parameters
 
 
@@ -635,7 +638,12 @@ def generate_pipeline_code(element):
         ),
     )
     code_strings.append(repr(element))
-    return "\n".join(code_strings)
+    pipeline_code = "\n".join(code_strings)
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    evalml_path = os.path.abspath(os.path.join(current_dir, "..", ".."))
+    black_config = get_evalml_black_config(evalml_path)
+    pipeline_code = black.format_str(pipeline_code, mode=black.Mode(**black_config))
+    return pipeline_code
 
 
 def _make_stacked_ensemble_pipeline(

diff --git a/evalml/tests/conftest.py b/evalml/tests/conftest.py
@@ -47,6 +47,7 @@
     is_time_series,
 )
 from evalml.utils import infer_feature_types
+from evalml.utils.cli_utils import get_evalml_black_config
 
 
 def pytest_configure(config):
@@ -2337,3 +2338,11 @@ def categorical_floats_df():
     )
 
     return X
+
+
+@pytest.fixture
+def get_black_config():
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    evalml_path = os.path.abspath(os.path.join(current_dir, "..", ".."))
+    black_config = get_evalml_black_config(evalml_path)
+    return black_config
diff --git a/evalml/tests/dependency_update_check/latest_dependency_versions.txt b/evalml/tests/dependency_update_check/latest_dependency_versions.txt
@@ -1,3 +1,4 @@
+black==23.1.0
 catboost==1.1.1
 category-encoders==2.5.1.post0
 click==8.1.3

diff --git a/evalml/tests/dependency_update_check/minimum_requirements.txt b/evalml/tests/dependency_update_check/minimum_requirements.txt
@@ -1,6 +1,7 @@
+black==22.3.0
 catboost==1.1.1
 category-encoders==2.2.2
-click==7.1.2
+click>=8.0.0
 cloudpickle==1.5.0
 colorama==0.4.4
 dask==2022.2.0

diff --git a/evalml/tests/dependency_update_check/minimum_test_requirements.txt b/evalml/tests/dependency_update_check/minimum_test_requirements.txt
@@ -2,7 +2,7 @@ IPython==8.10.0
 PyYAML==5.4
 catboost==1.1.1
 category-encoders==2.2.2
-click==7.1.2
+click>=8.0.0
 cloudpickle==1.5.0
 codecov==2.1.11
 colorama==0.4.4

diff --git a/evalml/tests/pipeline_tests/test_pipeline_utils.py b/evalml/tests/pipeline_tests/test_pipeline_utils.py
@@ -1,5 +1,6 @@
 from unittest.mock import patch
 
+import black
 import numpy as np
 import pandas as pd
 import pytest
@@ -653,7 +654,7 @@ def test_generate_code_pipeline_errors():
         generate_pipeline_code([Imputer(), LogisticRegressionClassifier()])
 
 
-def test_generate_code_pipeline_json_with_objects():
+def test_generate_code_pipeline_json_with_objects(get_black_config):
     class CustomEstimator(Estimator):
         name = "My Custom Estimator"
         hyperparameter_ranges = {}
@@ -676,12 +677,12 @@ def __init__(self, random_arg=False, numpy_arg=[], random_seed=0):
         parameters={"My Custom Estimator": {"numpy_arg": np.array([0])}},
     )
     generated_pipeline_code = generate_pipeline_code(pipeline)
-    assert (
-        generated_pipeline_code
-        == "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
+    assert generated_pipeline_code == black.format_str(
+        "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
         "pipeline = BinaryClassificationPipeline(component_graph={'Imputer': ['Imputer', 'X', 'y'], 'My Custom Estimator': [CustomEstimator, 'Imputer.x', 'y']}, "
         "parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'boolean_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None, 'boolean_fill_value': None}, "
-        "'My Custom Estimator':{'random_arg': False, 'numpy_arg': array([0])}}, custom_name='Mock Binary Pipeline with Transformer', random_seed=0)"
+        "'My Custom Estimator':{'random_arg': False, 'numpy_arg': array([0])}}, custom_name='Mock Binary Pipeline with Transformer', random_seed=0)",
+        mode=black.Mode(**get_black_config),
     )
 
     pipeline = BinaryClassificationPipeline(
@@ -690,37 +691,40 @@ def __init__(self, random_arg=False, numpy_arg=[], random_seed=0):
         parameters={"My Custom Estimator": {"random_arg": Imputer()}},
     )
     generated_pipeline_code = generate_pipeline_code(pipeline)
-    assert (
-        generated_pipeline_code
-        == "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
+    assert generated_pipeline_code == black.format_str(
+        "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
         "pipeline = BinaryClassificationPipeline(component_graph={'Imputer': ['Imputer', 'X', 'y'], 'My Custom Estimator': [CustomEstimator, 'Imputer.x', 'y']}, "
         "parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'boolean_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None, 'boolean_fill_value': None}, "
         "'My Custom Estimator':{'random_arg': Imputer(categorical_impute_strategy='most_frequent', numeric_impute_strategy='mean', boolean_impute_strategy='most_frequent', categorical_fill_value=None, numeric_fill_value=None, boolean_fill_value=None), 'numpy_arg': []}}, "
-        "custom_name='Mock Binary Pipeline with Transformer', random_seed=0)"
+        "custom_name='Mock Binary Pipeline with Transformer', random_seed=0)",
+        mode=black.Mode(**get_black_config),
     )
 
 
-def test_generate_code_pipeline():
+def test_generate_code_pipeline(get_black_config):
     binary_pipeline = BinaryClassificationPipeline(
         ["Imputer", "Random Forest Classifier"],
     )
-    expected_code = (
+    expected_code = black.format_str(
         "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
         "pipeline = BinaryClassificationPipeline(component_graph={'Imputer': ['Imputer', 'X', 'y'], 'Random Forest Classifier': ['Random Forest Classifier', 'Imputer.x', 'y']}, "
         "parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'boolean_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None, 'boolean_fill_value': None}, "
-        "'Random Forest Classifier':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}}, random_seed=0)"
+        "'Random Forest Classifier':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}}, random_seed=0)",
+        mode=black.Mode(**get_black_config),
     )
+
     pipeline = generate_pipeline_code(binary_pipeline)
     assert expected_code == pipeline
 
     regression_pipeline = RegressionPipeline(
         ["Imputer", "Random Forest Regressor"],
         custom_name="Mock Regression Pipeline",
     )
-    expected_code = (
+    expected_code = black.format_str(
         "from evalml.pipelines.regression_pipeline import RegressionPipeline\n"
         "pipeline = RegressionPipeline(component_graph={'Imputer': ['Imputer', 'X', 'y'], 'Random Forest Regressor': ['Random Forest Regressor', 'Imputer.x', 'y']}, parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'boolean_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None, 'boolean_fill_value': None}, "
-        "'Random Forest Regressor':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}}, custom_name='Mock Regression Pipeline', random_seed=0)"
+        "'Random Forest Regressor':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}}, custom_name='Mock Regression Pipeline', random_seed=0)",
+        mode=black.Mode(**get_black_config),
     )
     pipeline = generate_pipeline_code(regression_pipeline)
     assert pipeline == expected_code
@@ -733,17 +737,18 @@ def test_generate_code_pipeline():
             "Random Forest Regressor": {"n_estimators": 50},
         },
     )
-    expected_code_params = (
+    expected_code_params = black.format_str(
         "from evalml.pipelines.regression_pipeline import RegressionPipeline\n"
         "pipeline = RegressionPipeline(component_graph={'Imputer': ['Imputer', 'X', 'y'], 'Random Forest Regressor': ['Random Forest Regressor', 'Imputer.x', 'y']}, "
         "parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'most_frequent', 'boolean_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None, 'boolean_fill_value': None}, "
-        "'Random Forest Regressor':{'n_estimators': 50, 'max_depth': 6, 'n_jobs': -1}}, custom_name='Mock Regression Pipeline', random_seed=0)"
+        "'Random Forest Regressor':{'n_estimators': 50, 'max_depth': 6, 'n_jobs': -1}}, custom_name='Mock Regression Pipeline', random_seed=0)",
+        mode=black.Mode(**get_black_config),
     )
     pipeline = generate_pipeline_code(regression_pipeline_with_params)
     assert pipeline == expected_code_params
 
 
-def test_generate_code_nonlinear_pipeline():
+def test_generate_code_nonlinear_pipeline(get_black_config):
     custom_name = "Non Linear Binary Pipeline"
     component_graph = {
         "Imputer": ["Imputer", "X", "y"],
@@ -762,7 +767,7 @@ def test_generate_code_nonlinear_pipeline():
         component_graph=component_graph,
         custom_name=custom_name,
     )
-    expected = (
+    expected = black.format_str(
         "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
         "pipeline = BinaryClassificationPipeline("
         "component_graph={'Imputer': ['Imputer', 'X', 'y'], "
@@ -777,13 +782,14 @@ def test_generate_code_nonlinear_pipeline():
         "'Random Forest':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}, "
         "'Elastic Net':{'penalty': 'elasticnet', 'C': 1.0, 'l1_ratio': 0.15, 'n_jobs': -1, 'multi_class': 'auto', 'solver': 'saga'}, "
         "'Logistic Regression Classifier':{'penalty': 'l2', 'C': 1.0, 'n_jobs': -1, 'multi_class': 'auto', 'solver': 'lbfgs'}}, "
-        "custom_name='Non Linear Binary Pipeline', random_seed=0)"
+        "custom_name='Non Linear Binary Pipeline', random_seed=0)",
+        mode=black.Mode(**get_black_config),
     )
     pipeline_code = generate_pipeline_code(pipeline)
     assert pipeline_code == expected
 
 
-def test_generate_code_pipeline_with_custom_components():
+def test_generate_code_pipeline_with_custom_components(get_black_config):
     class CustomTransformer(Transformer):
         name = "My Custom Transformer"
         hyperparameter_ranges = {}
@@ -818,10 +824,11 @@ def __init__(self, random_arg=False, random_seed=0):
     mock_pipeline_with_custom_components = BinaryClassificationPipeline(
         [CustomTransformer, CustomEstimator],
     )
-    expected_code = (
+    expected_code = black.format_str(
         "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
         "pipeline = BinaryClassificationPipeline(component_graph={'My Custom Transformer': [CustomTransformer, 'X', 'y'], 'My Custom Estimator': [CustomEstimator, 'My Custom Transformer.x', 'y']}, "
-        "parameters={'My Custom Estimator':{'random_arg': False}}, random_seed=0)"
+        "parameters={'My Custom Estimator':{'random_arg': False}}, random_seed=0)",
+        mode=black.Mode(**get_black_config),
     )
     pipeline = generate_pipeline_code(mock_pipeline_with_custom_components)
     assert pipeline == expected_code

diff --git a/evalml/tests/utils_tests/test_cli_utils.py b/evalml/tests/utils_tests/test_cli_utils.py
@@ -1,12 +1,14 @@
 import os
 from unittest.mock import patch
 
+import black
 import pytest
 from click.testing import CliRunner
 from packaging.requirements import Requirement
 
 from evalml.__main__ import cli
 from evalml.utils.cli_utils import (
+    get_evalml_black_config,
     get_evalml_pip_requirements,
     get_evalml_root,
     get_installed_packages,
@@ -107,3 +109,16 @@ def test_installed_packages(current_dir):
 def test_get_evalml_root(current_dir):
     root = os.path.abspath(os.path.join(current_dir, "..", ".."))
     assert get_evalml_root() == root
+
+
+def test_get_evalml_black_config(current_dir):
+    evalml_path = os.path.abspath(os.path.join(current_dir, "..", "..", ".."))
+    black_config = get_evalml_black_config(evalml_path)
+    assert black_config["line_length"] == 88
+    assert black_config["target_versions"] == set([black.TargetVersion["PY39"]])
+
+    black_config = get_evalml_black_config(
+        os.path.join(current_dir, "..", "..", "random_file"),
+    )
+    assert black_config["line_length"] == 88
+    assert black_config["target_versions"] == set([black.TargetVersion["PY39"]])
diff --git a/evalml/utils/cli_utils.py b/evalml/utils/cli_utils.py
@@ -6,6 +6,7 @@
 import struct
 import sys
 
+import black
 import pkg_resources
 import tomli
 from packaging.requirements import Requirement
@@ -158,3 +159,37 @@ def get_evalml_pip_requirements(
         convert_to_conda=convert_to_conda,
     )
     return standardized_package_specifiers
+
+
+def get_evalml_black_config(
+    evalml_path,
+):
+    """Gets configuration for black.
+
+    Args:
+        evalml_path: Path to evalml root.
+
+    Returns:
+        Dictionary of black configuration.
+    """
+    black_config = None
+    try:
+        toml_dict = None
+        evalml_path = pathlib.Path(evalml_path, "pyproject.toml")
+        with open(evalml_path, "rb") as f:
+            toml_dict = tomli.load(f)
+        black_config = toml_dict["tool"]["black"]
+        black_config["line_length"] = black_config.pop("line-length")
+        target_versions = set(
+            [
+                black.TargetVersion[ver.upper()]
+                for ver in black_config.pop("target-version")
+            ],
+        )
+        black_config["target_versions"] = target_versions
+    except Exception:
+        black_config = {
+            "line_length": 88,
+            "target_versions": set([black.TargetVersion["PY39"]]),
+        }
+    return black_config
diff --git a/pyproject.toml b/pyproject.toml
@@ -37,7 +37,7 @@ dependencies = [
     "pyzmq >= 20.0.0",
     "colorama >= 0.4.4",
     "cloudpickle >= 1.5.0",
-    "click >= 7.1.2",
+    "click >= 8.0.0",
     "shap >= 0.40.0",
     "statsmodels >= 0.12.2",
     "texttable >= 1.6.2",
@@ -63,6 +63,7 @@ dependencies = [
     "vowpalwabbit >= 8.11.0",
     "tomli >= 2.0.1",
     "packaging >= 23.0",
+    "black[jupyter] >= 22.3.0",
 ]
 
 [project.urls]
@@ -87,7 +88,6 @@ test = [
 ]
 dev = [
     "ruff == 0.0.228",
-    "black[jupyter] >= 22.3.0",
     "darglint == 1.8.0",
     "pre-commit >= 2.20.0",
     "evalml[docs,test]",