Use ruff inplace of black, isort, flake8, and autoflake8. (#1321)

Signed-off-by: Thomas J. Fan <thomasjpfan@gmail.com> Signed-off-by: Kevin Su <pingsutw@apache.org> Co-authored-by: Kevin Su <pingsutw@apache.org>
flyteorg · May 6, 2024 · be22d86 · be22d86
1 parent 83c7a59
commit be22d86
Show file tree

Hide file tree

Showing 34 changed files with 97 additions and 98 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,16 +1,13 @@
 repos:
-- repo: https://github.com/PyCQA/flake8
-  rev:  3.9.2
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  # Ruff version.
+  rev: v0.1.6
   hooks:
-    - id: flake8
-- repo: https://github.com/psf/black
-  rev:  22.3.0
-  hooks:
-    - id: black
-- repo: https://github.com/PyCQA/isort
-  rev:  5.12.0
-  hooks:
-    - id: isort
+    # Run the linter.
+    - id: ruff
+      args: [--fix]
+    # Run the formatter.
+    - id: ruff-format
 - repo: https://github.com/pre-commit/pre-commit-hooks
   rev: v4.0.1
   hooks:

diff --git a/Makefile b/Makefile
@@ -24,10 +24,9 @@ docs-requirements.txt: docs-requirements.in install-piptools
 docs-requirements: docs-requirements.txt
 
 .PHONY: fmt
-fmt: ## Format code with black and isort
-	autoflake --remove-all-unused-imports --ignore-init-module-imports --ignore-pass-after-docstring --in-place -r examples
-	pre-commit run black --all-files || true
-	pre-commit run isort --all-files || true
+fmt: ## Format code with ruff
+	pre-commit run ruff --all-files || true
+	pre-commit run ruff-format --all-files || true
 
 .PHONY: update_boilerplate
 update_boilerplate:

diff --git a/dev-requirements.in b/dev-requirements.in
@@ -1,13 +1,8 @@
 autoflake
 flytekit
-black
 coverage
-flake8
 pre-commit
-flake8-black
-flake8-isort
 codespell
-isort
 mock
 pytest
 mypy

diff --git a/dev-requirements.txt b/dev-requirements.txt
@@ -39,10 +39,6 @@ azure-storage-blob==12.19.0
     # via adlfs
 binaryornot==0.4.4
     # via cookiecutter
-black==24.3.0
-    # via
-    #   -r dev-requirements.in
-    #   flake8-black
 botocore==1.31.17
     # via aiobotocore
 cachetools==5.3.2
@@ -63,7 +59,6 @@ charset-normalizer==3.3.2
     # via requests
 click==8.1.7
     # via
-    #   black
     #   cookiecutter
     #   flytekit
     #   rich-click
@@ -100,15 +95,6 @@ exceptiongroup==1.2.0
     # via pytest
 filelock==3.13.1
     # via virtualenv
-flake8==6.1.0
-    # via
-    #   -r dev-requirements.in
-    #   flake8-black
-    #   flake8-isort
-flake8-black==0.3.6
-    # via -r dev-requirements.in
-flake8-isort==6.1.1
-    # via -r dev-requirements.in
 flyteidl==1.10.6
     # via flytekit
 flytekit==1.10.2
@@ -118,7 +104,6 @@ frozenlist==1.4.0
     #   aiohttp
     #   aiosignal
 fsspec==2023.9.2
-    # via
     #   adlfs
     #   flytekit
     #   gcsfs
@@ -175,10 +160,6 @@ iniconfig==2.0.0
     # via pytest
 isodate==0.6.1
     # via azure-storage-blob
-isort==5.12.0
-    # via
-    #   -r dev-requirements.in
-    #   flake8-isort
 jaraco-classes==3.3.0
     # via keyring
 jeepney==0.8.0
@@ -216,8 +197,6 @@ mashumaro==3.11
     # via
     #   -r dev-requirements.in
     #   flytekit
-mccabe==0.7.0
-    # via flake8
 mdurl==0.1.2
     # via markdown-it-py
 mock==5.1.0
@@ -239,7 +218,6 @@ mypy==1.7.1
     # via -r dev-requirements.in
 mypy-extensions==1.0.0
     # via
-    #   black
     #   mypy
     #   typing-inspect
 nodeenv==1.8.0
@@ -255,17 +233,13 @@ oauthlib==3.2.2
     #   requests-oauthlib
 packaging==23.2
     # via
-    #   black
     #   docker
     #   marshmallow
     #   pytest
 pandas==1.5.3
     # via flytekit
-pathspec==0.11.2
-    # via black
 platformdirs==4.1.0
     # via
-    #   black
     #   virtualenv
 pluggy==1.3.0
     # via pytest
@@ -291,14 +265,8 @@ pyasn1==0.5.1
     #   rsa
 pyasn1-modules==0.3.0
     # via google-auth
-pycodestyle==2.11.1
-    # via flake8
 pycparser==2.21
     # via cffi
-pyflakes==3.1.0
-    # via
-    #   autoflake
-    #   flake8
 pygments==2.17.2
     # via rich
 pyjwt[crypto]==2.8.0

diff --git a/docs/contribute.md b/docs/contribute.md
@@ -284,17 +284,15 @@ if the code in itself is updated and requirements.txt is the same.
 ## Pre-commit hooks
 
 We use [pre-commit](https://pre-commit.com/) to automate linting and code formatting on every commit.
-Configured hooks include [black](https://github.com/psf/black), [isort](https://github.com/PyCQA/isort),
-[flake8](https://github.com/PyCQA/flake8) and linters to ensure newlines are added to the end of files, and there is
-proper spacing in files.
+Configured hooks include [ruff](https://github.com/astral-sh/ruff) to ensure newlines are added to the end of files, and there is proper spacing in files.
 
 We run all those hooks in CI, but if you want to run them locally on every commit, run `pre-commit install` after
 installing the dev environment requirements. In case you want to disable `pre-commit` hooks locally, run
 `pre-commit uninstall`. More info [here](https://pre-commit.com/).
 
 ### Formatting
 
-We use [black](https://github.com/psf/black) and [isort](https://github.com/PyCQA/isort) to autoformat code. They
+We use [ruff](https://github.com/astral-sh/ruff) to autoformat code. They
 are configured as git hooks in `pre-commit`. Run `make fmt` to format your code.
 
 ### Spell-checking

diff --git a/examples/dbt_plugin/dbt_plugin/dbt_example.py b/examples/dbt_plugin/dbt_plugin/dbt_example.py
@@ -28,6 +28,7 @@
 DBT_PROFILES_DIR = "dbt-profiles"
 DBT_PROFILE = "jaffle_shop"
 
+
 # %% [markdown]
 # This task ensures that the jaffle_shop database is created and it also contains
 # some data before scheduling an execution of this workflow.

diff --git a/examples/development_lifecycle/development_lifecycle/task_cache.py b/examples/development_lifecycle/development_lifecycle/task_cache.py
@@ -67,7 +67,8 @@ def hash_pandas_dataframe(df: pandas.DataFrame) -> str:
 
 @task
 def foo_1(  # noqa: F811
-    a: int, b: str  # noqa: F821
+    a: int,
+    b: str,  # noqa: F821
 ) -> Annotated[pandas.DataFrame, HashMethod(hash_pandas_dataframe)]:  # noqa: F821  # noqa: F821
     df = pandas.DataFrame(...)  # noqa: F821
     ...

diff --git a/examples/dolt_plugin/dolt_plugin/dolt_branch_example.py b/examples/dolt_plugin/dolt_plugin/dolt_branch_example.py
@@ -71,6 +71,7 @@ def generate_confs(a: int) -> typing.Tuple[DoltConfig, DoltConfig, DoltConfig]:
 # Return types of `DoltTable` save the `data` to the
 # Dolt database given a connection configuration.
 
+
 # %%
 @task
 def get_confs(a: int) -> typing.Tuple[DoltConfig, DoltTable, DoltConfig]:

diff --git a/examples/dolt_plugin/dolt_plugin/dolt_quickstart_example.py b/examples/dolt_plugin/dolt_plugin/dolt_quickstart_example.py
@@ -31,6 +31,7 @@
 # %% [markdown]
 # We define a task to create a DataFrame and store the table in Dolt.
 
+
 # %%
 @task
 def populate_rabbits(a: int) -> DoltTable:

diff --git a/examples/duckdb_plugin/duckdb_plugin/duckdb_example.py b/examples/duckdb_plugin/duckdb_plugin/duckdb_example.py
@@ -153,7 +153,7 @@ def params_wf(
             [["chainsaw", 500, 10], ["iphone", 300, 2]],
             ["duck", "goose"],
         ]
-    )
+    ),
 ) -> pd.DataFrame:
     return read_df(df=duckdb_params_query(params=params))
 

diff --git a/examples/exploratory_data_analysis/exploratory_data_analysis/notebook.py b/examples/exploratory_data_analysis/exploratory_data_analysis/notebook.py
@@ -50,6 +50,7 @@
 # %% [markdown]
 # Since a task need not be defined, we create a `workflow` and return the MAE score.
 
+
 # %%
 @workflow
 def notebook_wf(

diff --git a/examples/exploratory_data_analysis/exploratory_data_analysis/notebook_and_task.py b/examples/exploratory_data_analysis/exploratory_data_analysis/notebook_and_task.py
@@ -56,6 +56,7 @@ class Hyperparameters(object):
 # Next, we define a `cross_validate` function and a `modeling` task to compute the MAE score of the data against
 # the Gradient Boosting Regressor.
 
+
 # %%
 def cross_validate(model, nfolds, feats, targets):
     score = -1 * (cross_val_score(model, feats, targets, cv=nfolds, scoring="neg_mean_absolute_error"))

diff --git a/examples/exploratory_data_analysis/exploratory_data_analysis/notebooks_as_tasks.py b/examples/exploratory_data_analysis/exploratory_data_analysis/notebooks_as_tasks.py
@@ -60,6 +60,7 @@
 # %% [markdown]
 # We define a `Workflow` to run the notebook tasks.
 
+
 # %%
 @workflow
 def notebook_wf(

diff --git a/examples/feast_integration/feast_integration/feature_eng_tasks.py b/examples/feast_integration/feast_integration/feature_eng_tasks.py
@@ -25,6 +25,7 @@
     "timestamp",
 ]
 
+
 # %% [markdown]
 # Use the [SimpleImputer](https://scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html) class from the `scikit-learn` library
 # to fill in the missing values of the dataset.

diff --git a/examples/flyteinteractive_plugin/flyteinteractive_plugin/vscode.py b/examples/flyteinteractive_plugin/flyteinteractive_plugin/vscode.py
@@ -12,6 +12,7 @@
 # ## Usage
 # ###  Add `@vscode` decorator to task function definition
 
+
 # %%
 @task
 @vscode

diff --git a/examples/forecasting_sales/forecasting_sales/keras_spark_rossmann_estimator.py b/examples/forecasting_sales/forecasting_sales/keras_spark_rossmann_estimator.py
@@ -93,6 +93,7 @@
 # %% [markdown]
 # Next, let's initialize a data class to store the hyperparameters that will be used with the model (`epochs`, `learning_rate`, `batch_size`, etc.).
 
+
 # %%
 @dataclass_json
 @dataclass
@@ -600,7 +601,6 @@ def test(
     test_df: pyspark.sql.DataFrame,
     hp: Hyperparameters,
 ) -> FlyteDirectory:
-
     print("================")
     print("Final prediction")
     print("================")
@@ -649,7 +649,6 @@ def test(
     limits=Resources(mem="1Gi"),
 )
 def horovod_spark_task(data_dir: FlyteDirectory, hp: Hyperparameters, work_dir: FlyteDirectory) -> FlyteDirectory:
-
     max_sales, vocab, train_df, test_df = data_preparation(data_dir, hp)
 
     # working directory will have the model and predictions as separate files

diff --git a/examples/greatexpectations_plugin/greatexpectations_plugin/task_example.py b/examples/greatexpectations_plugin/greatexpectations_plugin/task_example.py
@@ -54,6 +54,7 @@
     context_root_dir=CONTEXT_ROOT_DIR,
 )
 
+
 # %% [markdown]
 # Next, we define a task that validates the data before returning the shape of the DataFrame.
 # %%
@@ -97,6 +98,7 @@ def simple_wf(dataset: str = DATASET_LOCAL) -> int:
     context_root_dir=CONTEXT_ROOT_DIR,
 )
 
+
 # %% [markdown]
 # Next, we define a task that calls the validation logic.
 # %%
@@ -144,6 +146,7 @@ def file_wf(
     task_config=SQLite3Config(uri=SQLITE_DATASET),
 )
 
+
 # %% [markdown]
 # Next, we define a task that validates the data and returns the columns in it.
 # %%

diff --git a/examples/greatexpectations_plugin/greatexpectations_plugin/type_example.py b/examples/greatexpectations_plugin/greatexpectations_plugin/type_example.py
@@ -47,6 +47,7 @@
 #
 # The parameters within the `data_connector_query` convey that we're fetching all those files that have "2019" and "01" in the file names.
 
+
 # %%
 @task(limits=Resources(mem="500Mi"))
 def simple_task(
@@ -67,7 +68,7 @@ def simple_task(
             ),
             context_root_dir=CONTEXT_ROOT_DIR,
         ),
-    ]
+    ],
 ) -> str:
     return f"Validation works for {directory}!"
 
@@ -102,6 +103,7 @@ def simple_wf(directory: str = "my_assets") -> str:
 # The first value that's being sent within `GreatExpectationsType` is `CSVFile` (this is a pre-formatted FlyteFile type).
 # This means that we want to validate the `FlyteFile` data.
 
+
 # %%
 @task(limits=Resources(mem="500Mi"))
 def file_task(dataset: GreatExpectationsType[CSVFile, great_expectations_config]) -> pd.DataFrame:
@@ -137,7 +139,7 @@ def schema_task(
             local_file_path="/tmp/test.parquet",  # noqa: F722
             context_root_dir=CONTEXT_ROOT_DIR,
         ),
-    ]
+    ],
 ) -> int:
     return dataframe.shape[0]
 

diff --git a/examples/hive_plugin/hive_plugin/hive.py b/examples/hive_plugin/hive_plugin/hive.py
@@ -67,6 +67,7 @@ def no_io_wf():
 # There is a helper task that will automatically do the wrapping above. Please be patient as we fill out these docs.
 # :::
 
+
 # %%
 @workflow
 def with_output_wf() -> FlyteSchema:

diff --git a/examples/house_price_prediction/house_price_prediction/house_price_predictor.py b/examples/house_price_prediction/house_price_prediction/house_price_predictor.py
@@ -68,6 +68,7 @@
 #
 # We define a function to compute the price of a house based on multiple factors (`number of bedrooms`, `number of bathrooms`, `area`, `garage space`, and `year built`).
 
+
 # %%
 def gen_price(house) -> int:
     _base_price = int(house["SQUARE_FEET"] * 150)
@@ -125,7 +126,6 @@ def gen_houses(num_houses) -> pd.DataFrame:
 def split_data(
     df: pd.DataFrame, seed: int, split: typing.List[float]
 ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
-
     seed = seed
     val_size = split[1]  # 0.3
     test_size = split[2]  # 0.1
@@ -182,6 +182,7 @@ def split_data(
 # %% [markdown]
 # We define a task to call the aforementioned functions.
 
+
 # %%
 @task(cache=True, cache_version="0.1", limits=Resources(mem="600Mi"))
 def generate_and_split_data(number_of_houses: int, seed: int) -> dataset:
@@ -196,7 +197,6 @@ def generate_and_split_data(number_of_houses: int, seed: int) -> dataset:
 # %%
 @task(cache_version="1.0", cache=True, limits=Resources(mem="600Mi"))
 def fit(loc: str, train: pd.DataFrame, val: pd.DataFrame) -> JoblibSerializedFile:
-
     # fetch the features and target columns from the train dataset
     x = train[train.columns[1:]]
     y = train[train.columns[0]]
@@ -227,7 +227,6 @@ def predict(
     test: pd.DataFrame,
     model_ser: JoblibSerializedFile,
 ) -> typing.List[float]:
-
     # load the model
     model = joblib.load(model_ser)
 
@@ -246,7 +245,6 @@ def predict(
 # %%
 @workflow
 def house_price_predictor_trainer(seed: int = 7, number_of_houses: int = NUM_HOUSES_PER_LOCATION) -> typing.List[float]:
-
     # generate the data and split it into train test, and validation data
     split_data_vals = generate_and_split_data(number_of_houses=number_of_houses, seed=seed)