alteryx · jeremyliweishih · Feb 22, 2023 · Feb 15, 2023 · Feb 15, 2023 · Feb 15, 2023
diff --git a/.github/meta.yaml b/.github/meta.yaml
@@ -33,7 +33,7 @@ outputs:
         - statsmodels >=0.12.2
         - colorama >=0.4.4
         - cloudpickle >=1.5.0
-        - click >=7.1.2
+        - click>=8.0.0
         - shap >=0.40.0
         - texttable >=1.6.2
         - woodwork >=0.21.1
@@ -45,6 +45,7 @@ outputs:
         - python-graphviz >=0.13
         - tomli >=2.0.1
         - packaging >=23.0
+        - black[jupyter] >=22.3.0
     test:
       imports:
         - evalml

diff --git a/core-requirements.txt b/core-requirements.txt
@@ -6,7 +6,7 @@ scikit-optimize>=0.9.0
 pyzmq>=20.0.0
 colorama>=0.4.4
 cloudpickle>=1.5.0
-click>=7.1.2
+click>=8.0.0
 shap>=0.40.0
 statsmodels>=0.12.2
 texttable>=1.6.2

diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -2,6 +2,7 @@ Release Notes
 -------------
 **Future Releases**
     * Enhancements
+      * Move black to regular dependency and use it for ``generate_pipeline_code`` :pr:`4005`
     * Fixes
     * Changes
     * Documentation Changes

diff --git a/evalml/pipelines/utils.py b/evalml/pipelines/utils.py
@@ -1,6 +1,7 @@
 """Utility methods for EvalML pipelines."""
 import copy
 
+import black
 from woodwork import logical_types
 
 from evalml.data_checks import DataCheckActionCode, DataCheckActionOption
@@ -635,7 +636,13 @@ def generate_pipeline_code(element):
         ),
     )
     code_strings.append(repr(element))
-    return "\n".join(code_strings)
+    pipeline_code = "\n".join(code_strings)
+    pipeline_code = black.format_str(
+        pipeline_code,
+        mode=black.Mode(target_versions={black.TargetVersion.PY39}, line_length=88),
+    )
+
+    return pipeline_code
 
 
 def _make_stacked_ensemble_pipeline(

diff --git a/evalml/tests/dependency_update_check/latest_dependency_versions.txt b/evalml/tests/dependency_update_check/latest_dependency_versions.txt
@@ -1,3 +1,4 @@
+black==23.1.0
 catboost==1.1.1
 category-encoders==2.5.1.post0
 click==8.1.3

diff --git a/evalml/tests/dependency_update_check/minimum_requirements.txt b/evalml/tests/dependency_update_check/minimum_requirements.txt
@@ -1,6 +1,7 @@
+black==22.3.0
 catboost==0.20
 category-encoders==2.2.2
-click==7.1.2
+click>=8.0.0
 cloudpickle==1.5.0
 colorama==0.4.4
 dask==2022.2.0

diff --git a/evalml/tests/dependency_update_check/minimum_test_requirements.txt b/evalml/tests/dependency_update_check/minimum_test_requirements.txt
@@ -2,7 +2,7 @@ IPython==8.10.0
 PyYAML==5.4
 catboost==0.20
 category-encoders==2.2.2
-click==7.1.2
+click>=8.0.0
 cloudpickle==1.5.0
 codecov==2.1.11
 colorama==0.4.4

diff --git a/evalml/tests/pipeline_tests/test_pipeline_utils.py b/evalml/tests/pipeline_tests/test_pipeline_utils.py
@@ -1,5 +1,6 @@
 from unittest.mock import patch
 
+import black
 import numpy as np
 import pandas as pd
 import pytest
@@ -676,12 +677,12 @@ def __init__(self, random_arg=False, numpy_arg=[], random_seed=0):
         parameters={"My Custom Estimator": {"numpy_arg": np.array([0])}},
     )
     generated_pipeline_code = generate_pipeline_code(pipeline)
-    assert (
-        generated_pipeline_code
-        == "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
+    assert generated_pipeline_code == black.format_str(
+        "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
         "pipeline = BinaryClassificationPipeline(component_graph={'Imputer': ['Imputer', 'X', 'y'], 'My Custom Estimator': [CustomEstimator, 'Imputer.x', 'y']}, "
         "parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'boolean_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None, 'boolean_fill_value': None}, "
-        "'My Custom Estimator':{'random_arg': False, 'numpy_arg': array([0])}}, custom_name='Mock Binary Pipeline with Transformer', random_seed=0)"
+        "'My Custom Estimator':{'random_arg': False, 'numpy_arg': array([0])}}, custom_name='Mock Binary Pipeline with Transformer', random_seed=0)",
+        mode=black.Mode(target_versions={black.TargetVersion.PY39}, line_length=88),
     )
 
     pipeline = BinaryClassificationPipeline(
@@ -690,37 +691,40 @@ def __init__(self, random_arg=False, numpy_arg=[], random_seed=0):
         parameters={"My Custom Estimator": {"random_arg": Imputer()}},
     )
     generated_pipeline_code = generate_pipeline_code(pipeline)
-    assert (
-        generated_pipeline_code
-        == "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
+    assert generated_pipeline_code == black.format_str(
+        "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
         "pipeline = BinaryClassificationPipeline(component_graph={'Imputer': ['Imputer', 'X', 'y'], 'My Custom Estimator': [CustomEstimator, 'Imputer.x', 'y']}, "
         "parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'boolean_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None, 'boolean_fill_value': None}, "
         "'My Custom Estimator':{'random_arg': Imputer(categorical_impute_strategy='most_frequent', numeric_impute_strategy='mean', boolean_impute_strategy='most_frequent', categorical_fill_value=None, numeric_fill_value=None, boolean_fill_value=None), 'numpy_arg': []}}, "
-        "custom_name='Mock Binary Pipeline with Transformer', random_seed=0)"
+        "custom_name='Mock Binary Pipeline with Transformer', random_seed=0)",
+        mode=black.Mode(target_versions={black.TargetVersion.PY39}, line_length=88),
     )
 
 
 def test_generate_code_pipeline():
     binary_pipeline = BinaryClassificationPipeline(
         ["Imputer", "Random Forest Classifier"],
     )
-    expected_code = (
+    expected_code = black.format_str(
         "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
         "pipeline = BinaryClassificationPipeline(component_graph={'Imputer': ['Imputer', 'X', 'y'], 'Random Forest Classifier': ['Random Forest Classifier', 'Imputer.x', 'y']}, "
         "parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'boolean_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None, 'boolean_fill_value': None}, "
-        "'Random Forest Classifier':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}}, random_seed=0)"
+        "'Random Forest Classifier':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}}, random_seed=0)",
+        mode=black.Mode(target_versions={black.TargetVersion.PY39}, line_length=88),
     )
+
     pipeline = generate_pipeline_code(binary_pipeline)
     assert expected_code == pipeline
 
     regression_pipeline = RegressionPipeline(
         ["Imputer", "Random Forest Regressor"],
         custom_name="Mock Regression Pipeline",
     )
-    expected_code = (
+    expected_code = black.format_str(
         "from evalml.pipelines.regression_pipeline import RegressionPipeline\n"
         "pipeline = RegressionPipeline(component_graph={'Imputer': ['Imputer', 'X', 'y'], 'Random Forest Regressor': ['Random Forest Regressor', 'Imputer.x', 'y']}, parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'boolean_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None, 'boolean_fill_value': None}, "
-        "'Random Forest Regressor':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}}, custom_name='Mock Regression Pipeline', random_seed=0)"
+        "'Random Forest Regressor':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}}, custom_name='Mock Regression Pipeline', random_seed=0)",
+        mode=black.Mode(target_versions={black.TargetVersion.PY39}, line_length=88),
     )
     pipeline = generate_pipeline_code(regression_pipeline)
     assert pipeline == expected_code
@@ -733,11 +737,12 @@ def test_generate_code_pipeline():
             "Random Forest Regressor": {"n_estimators": 50},
         },
     )
-    expected_code_params = (
+    expected_code_params = black.format_str(
         "from evalml.pipelines.regression_pipeline import RegressionPipeline\n"
         "pipeline = RegressionPipeline(component_graph={'Imputer': ['Imputer', 'X', 'y'], 'Random Forest Regressor': ['Random Forest Regressor', 'Imputer.x', 'y']}, "
         "parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'most_frequent', 'boolean_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None, 'boolean_fill_value': None}, "
-        "'Random Forest Regressor':{'n_estimators': 50, 'max_depth': 6, 'n_jobs': -1}}, custom_name='Mock Regression Pipeline', random_seed=0)"
+        "'Random Forest Regressor':{'n_estimators': 50, 'max_depth': 6, 'n_jobs': -1}}, custom_name='Mock Regression Pipeline', random_seed=0)",
+        mode=black.Mode(target_versions={black.TargetVersion.PY39}, line_length=88),
     )
     pipeline = generate_pipeline_code(regression_pipeline_with_params)
     assert pipeline == expected_code_params
@@ -762,7 +767,7 @@ def test_generate_code_nonlinear_pipeline():
         component_graph=component_graph,
         custom_name=custom_name,
     )
-    expected = (
+    expected = black.format_str(
         "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
         "pipeline = BinaryClassificationPipeline("
         "component_graph={'Imputer': ['Imputer', 'X', 'y'], "
@@ -777,7 +782,8 @@ def test_generate_code_nonlinear_pipeline():
         "'Random Forest':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}, "
         "'Elastic Net':{'penalty': 'elasticnet', 'C': 1.0, 'l1_ratio': 0.15, 'n_jobs': -1, 'multi_class': 'auto', 'solver': 'saga'}, "
         "'Logistic Regression Classifier':{'penalty': 'l2', 'C': 1.0, 'n_jobs': -1, 'multi_class': 'auto', 'solver': 'lbfgs'}}, "
-        "custom_name='Non Linear Binary Pipeline', random_seed=0)"
+        "custom_name='Non Linear Binary Pipeline', random_seed=0)",
+        mode=black.Mode(target_versions={black.TargetVersion.PY39}, line_length=88),
     )
     pipeline_code = generate_pipeline_code(pipeline)
     assert pipeline_code == expected
@@ -818,10 +824,11 @@ def __init__(self, random_arg=False, random_seed=0):
     mock_pipeline_with_custom_components = BinaryClassificationPipeline(
         [CustomTransformer, CustomEstimator],
     )
-    expected_code = (
+    expected_code = black.format_str(
         "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n"
         "pipeline = BinaryClassificationPipeline(component_graph={'My Custom Transformer': [CustomTransformer, 'X', 'y'], 'My Custom Estimator': [CustomEstimator, 'My Custom Transformer.x', 'y']}, "
-        "parameters={'My Custom Estimator':{'random_arg': False}}, random_seed=0)"
+        "parameters={'My Custom Estimator':{'random_arg': False}}, random_seed=0)",
+        mode=black.Mode(target_versions={black.TargetVersion.PY39}, line_length=88),
     )
     pipeline = generate_pipeline_code(mock_pipeline_with_custom_components)
     assert pipeline == expected_code

diff --git a/pyproject.toml b/pyproject.toml
@@ -37,7 +37,7 @@ dependencies = [
     "pyzmq >= 20.0.0",
     "colorama >= 0.4.4",
     "cloudpickle >= 1.5.0",
-    "click >= 7.1.2",
+    "click >= 8.0.0",
     "shap >= 0.40.0",
     "statsmodels >= 0.12.2",
     "texttable >= 1.6.2",
@@ -63,6 +63,7 @@ dependencies = [
     "vowpalwabbit >= 8.11.0",
     "tomli >= 2.0.1",
     "packaging >= 23.0",
+    "black[jupyter] >= 22.3.0",
 ]
 
 [project.urls]
@@ -87,7 +88,6 @@ test = [
 ]
 dev = [
     "ruff == 0.0.228",
-    "black[jupyter] >= 22.3.0",
     "darglint == 1.8.0",
     "pre-commit >= 2.20.0",
     "evalml[docs,test]",