diff --git a/.github/meta.yaml b/.github/meta.yaml index f2f0d0ea70..bbfe28e91f 100644 --- a/.github/meta.yaml +++ b/.github/meta.yaml @@ -33,7 +33,7 @@ outputs: - statsmodels >=0.12.2 - colorama >=0.4.4 - cloudpickle >=1.5.0 - - click >=7.1.2 + - click>=8.0.0 - shap >=0.40.0 - texttable >=1.6.2 - woodwork >=0.21.1 @@ -45,6 +45,7 @@ outputs: - python-graphviz >=0.13 - tomli >=2.0.1 - packaging >=23.0 + - black[jupyter] >=22.3.0 test: imports: - evalml diff --git a/core-requirements.txt b/core-requirements.txt index f881d49b44..9450adccd0 100644 --- a/core-requirements.txt +++ b/core-requirements.txt @@ -6,7 +6,7 @@ scikit-optimize>=0.9.0 pyzmq>=20.0.0 colorama>=0.4.4 cloudpickle>=1.5.0 -click>=7.1.2 +click>=8.0.0 shap>=0.40.0 statsmodels>=0.12.2 texttable>=1.6.2 diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index 47478f4b08..d7c5687e05 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -2,6 +2,7 @@ Release Notes ------------- **Future Releases** * Enhancements + * Move black to regular dependency and use it for ``generate_pipeline_code`` :pr:`4005` * Fixes * Changes * Increase min catboost to 1.1.1 and xgboost to 1.7.0 to add nullable type support for those estimators :pr:`3996` diff --git a/evalml/pipelines/utils.py b/evalml/pipelines/utils.py index e4d212e218..64716b1b56 100644 --- a/evalml/pipelines/utils.py +++ b/evalml/pipelines/utils.py @@ -1,6 +1,8 @@ """Utility methods for EvalML pipelines.""" import copy +import os +import black from woodwork import logical_types from evalml.data_checks import DataCheckActionCode, DataCheckActionOption @@ -64,6 +66,7 @@ is_time_series, ) from evalml.utils import get_time_index, infer_feature_types +from evalml.utils.cli_utils import get_evalml_black_config from evalml.utils.gen_utils import contains_all_ts_parameters @@ -635,7 +638,12 @@ def generate_pipeline_code(element): ), ) code_strings.append(repr(element)) - return "\n".join(code_strings) + pipeline_code = "\n".join(code_strings) + current_dir = os.path.dirname(os.path.abspath(__file__)) + evalml_path = os.path.abspath(os.path.join(current_dir, "..", "..")) + black_config = get_evalml_black_config(evalml_path) + pipeline_code = black.format_str(pipeline_code, mode=black.Mode(**black_config)) + return pipeline_code def _make_stacked_ensemble_pipeline( diff --git a/evalml/tests/conftest.py b/evalml/tests/conftest.py index a0cab1c828..5cc9baf1c3 100644 --- a/evalml/tests/conftest.py +++ b/evalml/tests/conftest.py @@ -47,6 +47,7 @@ is_time_series, ) from evalml.utils import infer_feature_types +from evalml.utils.cli_utils import get_evalml_black_config def pytest_configure(config): @@ -2337,3 +2338,11 @@ def categorical_floats_df(): ) return X + + +@pytest.fixture +def get_black_config(): + current_dir = os.path.dirname(os.path.abspath(__file__)) + evalml_path = os.path.abspath(os.path.join(current_dir, "..", "..")) + black_config = get_evalml_black_config(evalml_path) + return black_config diff --git a/evalml/tests/dependency_update_check/latest_dependency_versions.txt b/evalml/tests/dependency_update_check/latest_dependency_versions.txt index 099cdc110c..4f7d9ad71d 100644 --- a/evalml/tests/dependency_update_check/latest_dependency_versions.txt +++ b/evalml/tests/dependency_update_check/latest_dependency_versions.txt @@ -1,3 +1,4 @@ +black==23.1.0 catboost==1.1.1 category-encoders==2.5.1.post0 click==8.1.3 diff --git a/evalml/tests/dependency_update_check/minimum_requirements.txt b/evalml/tests/dependency_update_check/minimum_requirements.txt index 1911a1a02c..5dd7500a72 100644 --- a/evalml/tests/dependency_update_check/minimum_requirements.txt +++ b/evalml/tests/dependency_update_check/minimum_requirements.txt @@ -1,6 +1,7 @@ +black==22.3.0 catboost==1.1.1 category-encoders==2.2.2 -click==7.1.2 +click>=8.0.0 cloudpickle==1.5.0 colorama==0.4.4 dask==2022.2.0 diff --git a/evalml/tests/dependency_update_check/minimum_test_requirements.txt b/evalml/tests/dependency_update_check/minimum_test_requirements.txt index 121c8f16b8..1595c8421f 100644 --- a/evalml/tests/dependency_update_check/minimum_test_requirements.txt +++ b/evalml/tests/dependency_update_check/minimum_test_requirements.txt @@ -2,7 +2,7 @@ IPython==8.10.0 PyYAML==5.4 catboost==1.1.1 category-encoders==2.2.2 -click==7.1.2 +click>=8.0.0 cloudpickle==1.5.0 codecov==2.1.11 colorama==0.4.4 diff --git a/evalml/tests/pipeline_tests/test_pipeline_utils.py b/evalml/tests/pipeline_tests/test_pipeline_utils.py index 9bc8c82755..1449600d7d 100644 --- a/evalml/tests/pipeline_tests/test_pipeline_utils.py +++ b/evalml/tests/pipeline_tests/test_pipeline_utils.py @@ -1,5 +1,6 @@ from unittest.mock import patch +import black import numpy as np import pandas as pd import pytest @@ -653,7 +654,7 @@ def test_generate_code_pipeline_errors(): generate_pipeline_code([Imputer(), LogisticRegressionClassifier()]) -def test_generate_code_pipeline_json_with_objects(): +def test_generate_code_pipeline_json_with_objects(get_black_config): class CustomEstimator(Estimator): name = "My Custom Estimator" hyperparameter_ranges = {} @@ -676,12 +677,12 @@ def __init__(self, random_arg=False, numpy_arg=[], random_seed=0): parameters={"My Custom Estimator": {"numpy_arg": np.array([0])}}, ) generated_pipeline_code = generate_pipeline_code(pipeline) - assert ( - generated_pipeline_code - == "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n" + assert generated_pipeline_code == black.format_str( + "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n" "pipeline = BinaryClassificationPipeline(component_graph={'Imputer': ['Imputer', 'X', 'y'], 'My Custom Estimator': [CustomEstimator, 'Imputer.x', 'y']}, " "parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'boolean_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None, 'boolean_fill_value': None}, " - "'My Custom Estimator':{'random_arg': False, 'numpy_arg': array([0])}}, custom_name='Mock Binary Pipeline with Transformer', random_seed=0)" + "'My Custom Estimator':{'random_arg': False, 'numpy_arg': array([0])}}, custom_name='Mock Binary Pipeline with Transformer', random_seed=0)", + mode=black.Mode(**get_black_config), ) pipeline = BinaryClassificationPipeline( @@ -690,26 +691,28 @@ def __init__(self, random_arg=False, numpy_arg=[], random_seed=0): parameters={"My Custom Estimator": {"random_arg": Imputer()}}, ) generated_pipeline_code = generate_pipeline_code(pipeline) - assert ( - generated_pipeline_code - == "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n" + assert generated_pipeline_code == black.format_str( + "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n" "pipeline = BinaryClassificationPipeline(component_graph={'Imputer': ['Imputer', 'X', 'y'], 'My Custom Estimator': [CustomEstimator, 'Imputer.x', 'y']}, " "parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'boolean_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None, 'boolean_fill_value': None}, " "'My Custom Estimator':{'random_arg': Imputer(categorical_impute_strategy='most_frequent', numeric_impute_strategy='mean', boolean_impute_strategy='most_frequent', categorical_fill_value=None, numeric_fill_value=None, boolean_fill_value=None), 'numpy_arg': []}}, " - "custom_name='Mock Binary Pipeline with Transformer', random_seed=0)" + "custom_name='Mock Binary Pipeline with Transformer', random_seed=0)", + mode=black.Mode(**get_black_config), ) -def test_generate_code_pipeline(): +def test_generate_code_pipeline(get_black_config): binary_pipeline = BinaryClassificationPipeline( ["Imputer", "Random Forest Classifier"], ) - expected_code = ( + expected_code = black.format_str( "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n" "pipeline = BinaryClassificationPipeline(component_graph={'Imputer': ['Imputer', 'X', 'y'], 'Random Forest Classifier': ['Random Forest Classifier', 'Imputer.x', 'y']}, " "parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'boolean_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None, 'boolean_fill_value': None}, " - "'Random Forest Classifier':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}}, random_seed=0)" + "'Random Forest Classifier':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}}, random_seed=0)", + mode=black.Mode(**get_black_config), ) + pipeline = generate_pipeline_code(binary_pipeline) assert expected_code == pipeline @@ -717,10 +720,11 @@ def test_generate_code_pipeline(): ["Imputer", "Random Forest Regressor"], custom_name="Mock Regression Pipeline", ) - expected_code = ( + expected_code = black.format_str( "from evalml.pipelines.regression_pipeline import RegressionPipeline\n" "pipeline = RegressionPipeline(component_graph={'Imputer': ['Imputer', 'X', 'y'], 'Random Forest Regressor': ['Random Forest Regressor', 'Imputer.x', 'y']}, parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'boolean_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None, 'boolean_fill_value': None}, " - "'Random Forest Regressor':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}}, custom_name='Mock Regression Pipeline', random_seed=0)" + "'Random Forest Regressor':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}}, custom_name='Mock Regression Pipeline', random_seed=0)", + mode=black.Mode(**get_black_config), ) pipeline = generate_pipeline_code(regression_pipeline) assert pipeline == expected_code @@ -733,17 +737,18 @@ def test_generate_code_pipeline(): "Random Forest Regressor": {"n_estimators": 50}, }, ) - expected_code_params = ( + expected_code_params = black.format_str( "from evalml.pipelines.regression_pipeline import RegressionPipeline\n" "pipeline = RegressionPipeline(component_graph={'Imputer': ['Imputer', 'X', 'y'], 'Random Forest Regressor': ['Random Forest Regressor', 'Imputer.x', 'y']}, " "parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'most_frequent', 'boolean_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None, 'boolean_fill_value': None}, " - "'Random Forest Regressor':{'n_estimators': 50, 'max_depth': 6, 'n_jobs': -1}}, custom_name='Mock Regression Pipeline', random_seed=0)" + "'Random Forest Regressor':{'n_estimators': 50, 'max_depth': 6, 'n_jobs': -1}}, custom_name='Mock Regression Pipeline', random_seed=0)", + mode=black.Mode(**get_black_config), ) pipeline = generate_pipeline_code(regression_pipeline_with_params) assert pipeline == expected_code_params -def test_generate_code_nonlinear_pipeline(): +def test_generate_code_nonlinear_pipeline(get_black_config): custom_name = "Non Linear Binary Pipeline" component_graph = { "Imputer": ["Imputer", "X", "y"], @@ -762,7 +767,7 @@ def test_generate_code_nonlinear_pipeline(): component_graph=component_graph, custom_name=custom_name, ) - expected = ( + expected = black.format_str( "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n" "pipeline = BinaryClassificationPipeline(" "component_graph={'Imputer': ['Imputer', 'X', 'y'], " @@ -777,13 +782,14 @@ def test_generate_code_nonlinear_pipeline(): "'Random Forest':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}, " "'Elastic Net':{'penalty': 'elasticnet', 'C': 1.0, 'l1_ratio': 0.15, 'n_jobs': -1, 'multi_class': 'auto', 'solver': 'saga'}, " "'Logistic Regression Classifier':{'penalty': 'l2', 'C': 1.0, 'n_jobs': -1, 'multi_class': 'auto', 'solver': 'lbfgs'}}, " - "custom_name='Non Linear Binary Pipeline', random_seed=0)" + "custom_name='Non Linear Binary Pipeline', random_seed=0)", + mode=black.Mode(**get_black_config), ) pipeline_code = generate_pipeline_code(pipeline) assert pipeline_code == expected -def test_generate_code_pipeline_with_custom_components(): +def test_generate_code_pipeline_with_custom_components(get_black_config): class CustomTransformer(Transformer): name = "My Custom Transformer" hyperparameter_ranges = {} @@ -818,10 +824,11 @@ def __init__(self, random_arg=False, random_seed=0): mock_pipeline_with_custom_components = BinaryClassificationPipeline( [CustomTransformer, CustomEstimator], ) - expected_code = ( + expected_code = black.format_str( "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n" "pipeline = BinaryClassificationPipeline(component_graph={'My Custom Transformer': [CustomTransformer, 'X', 'y'], 'My Custom Estimator': [CustomEstimator, 'My Custom Transformer.x', 'y']}, " - "parameters={'My Custom Estimator':{'random_arg': False}}, random_seed=0)" + "parameters={'My Custom Estimator':{'random_arg': False}}, random_seed=0)", + mode=black.Mode(**get_black_config), ) pipeline = generate_pipeline_code(mock_pipeline_with_custom_components) assert pipeline == expected_code diff --git a/evalml/tests/utils_tests/test_cli_utils.py b/evalml/tests/utils_tests/test_cli_utils.py index f9ad0224d5..8862cac585 100644 --- a/evalml/tests/utils_tests/test_cli_utils.py +++ b/evalml/tests/utils_tests/test_cli_utils.py @@ -1,12 +1,14 @@ import os from unittest.mock import patch +import black import pytest from click.testing import CliRunner from packaging.requirements import Requirement from evalml.__main__ import cli from evalml.utils.cli_utils import ( + get_evalml_black_config, get_evalml_pip_requirements, get_evalml_root, get_installed_packages, @@ -107,3 +109,16 @@ def test_installed_packages(current_dir): def test_get_evalml_root(current_dir): root = os.path.abspath(os.path.join(current_dir, "..", "..")) assert get_evalml_root() == root + + +def test_get_evalml_black_config(current_dir): + evalml_path = os.path.abspath(os.path.join(current_dir, "..", "..", "..")) + black_config = get_evalml_black_config(evalml_path) + assert black_config["line_length"] == 88 + assert black_config["target_versions"] == set([black.TargetVersion["PY39"]]) + + black_config = get_evalml_black_config( + os.path.join(current_dir, "..", "..", "random_file"), + ) + assert black_config["line_length"] == 88 + assert black_config["target_versions"] == set([black.TargetVersion["PY39"]]) diff --git a/evalml/utils/cli_utils.py b/evalml/utils/cli_utils.py index c24dd5383f..14dd163c55 100644 --- a/evalml/utils/cli_utils.py +++ b/evalml/utils/cli_utils.py @@ -6,6 +6,7 @@ import struct import sys +import black import pkg_resources import tomli from packaging.requirements import Requirement @@ -158,3 +159,37 @@ def get_evalml_pip_requirements( convert_to_conda=convert_to_conda, ) return standardized_package_specifiers + + +def get_evalml_black_config( + evalml_path, +): + """Gets configuration for black. + + Args: + evalml_path: Path to evalml root. + + Returns: + Dictionary of black configuration. + """ + black_config = None + try: + toml_dict = None + evalml_path = pathlib.Path(evalml_path, "pyproject.toml") + with open(evalml_path, "rb") as f: + toml_dict = tomli.load(f) + black_config = toml_dict["tool"]["black"] + black_config["line_length"] = black_config.pop("line-length") + target_versions = set( + [ + black.TargetVersion[ver.upper()] + for ver in black_config.pop("target-version") + ], + ) + black_config["target_versions"] = target_versions + except Exception: + black_config = { + "line_length": 88, + "target_versions": set([black.TargetVersion["PY39"]]), + } + return black_config diff --git a/pyproject.toml b/pyproject.toml index c0c0bc18df..24ff70e71f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ dependencies = [ "pyzmq >= 20.0.0", "colorama >= 0.4.4", "cloudpickle >= 1.5.0", - "click >= 7.1.2", + "click >= 8.0.0", "shap >= 0.40.0", "statsmodels >= 0.12.2", "texttable >= 1.6.2", @@ -63,6 +63,7 @@ dependencies = [ "vowpalwabbit >= 8.11.0", "tomli >= 2.0.1", "packaging >= 23.0", + "black[jupyter] >= 22.3.0", ] [project.urls] @@ -87,7 +88,6 @@ test = [ ] dev = [ "ruff == 0.0.228", - "black[jupyter] >= 22.3.0", "darglint == 1.8.0", "pre-commit >= 2.20.0", "evalml[docs,test]",