From a46ed5dc817a9cf6632b86d5e67ea5b52df00040 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger Date: Wed, 3 Nov 2021 15:59:10 -0400 Subject: [PATCH] Cleanup the quality checks and document them --- .circleci/config.yml | 31 ++++++++----- Makefile | 10 +++-- docs/source/index.rst | 1 + docs/source/pr_tests.md | 36 +++++++++++++++- utils/link_tester.py | 96 ----------------------------------------- 5 files changed, 62 insertions(+), 112 deletions(-) delete mode 100644 utils/link_tester.py diff --git a/.circleci/config.yml b/.circleci/config.yml index 06d813bd7bcc..114f832732ce 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -811,24 +811,35 @@ jobs: - run: python utils/custom_init_isort.py --check_only - run: flake8 examples tests src utils - run: python utils/style_doc.py src/transformers docs/source --max_len 119 --check_only - - run: python utils/check_copies.py - - run: python utils/check_table.py - - run: python utils/check_dummies.py - - run: python utils/check_repo.py - - run: python utils/check_inits.py - - run: make deps_table_check_updated - - run: python utils/tests_fetcher.py --sanity_check check_repository_consistency: working_directory: ~/transformers docker: - image: circleci/python:3.6 - resource_class: small + resource_class: large + environment: + TRANSFORMERS_IS_CI: yes parallelism: 1 steps: - checkout - - run: pip install requests - - run: python ./utils/link_tester.py + - restore_cache: + keys: + - v0.4-repository_consistency-{{ checksum "setup.py" }} + - v0.4-{{ checksum "setup.py" }} + - run: pip install --upgrade pip + - run: pip install isort GitPython + - run: pip install .[all,quality] + - save_cache: + key: v0.4-repository_consistency-{{ checksum "setup.py" }} + paths: + - '~/.cache/pip' + - run: python utils/check_copies.py + - run: python utils/check_table.py + - run: python utils/check_dummies.py + - run: python utils/check_repo.py + - run: python utils/check_inits.py + - run: make deps_table_check_updated + - run: python utils/tests_fetcher.py --sanity_check run_tests_layoutlmv2: working_directory: ~/transformers diff --git a/Makefile b/Makefile index 11e96f84c998..51091e099099 100644 --- a/Makefile +++ b/Makefile @@ -31,9 +31,9 @@ deps_table_check_updated: autogenerate_code: deps_table_update -# Check that source code meets quality standards +# Check that the repo is in a good state -extra_quality_checks: +repo-consistency: python utils/check_copies.py python utils/check_table.py python utils/check_dummies.py @@ -42,12 +42,13 @@ extra_quality_checks: python utils/tests_fetcher.py --sanity_check # this target runs checks on all files + quality: black --check $(check_dirs) isort --check-only $(check_dirs) python utils/custom_init_isort.py --check_only flake8 $(check_dirs) - ${MAKE} extra_quality_checks + python utils/style_doc.py src/transformers docs/source --max_len 119 --check_only # Format source code automatically and check is there are any problems left that need manual fixing @@ -56,6 +57,7 @@ extra_style_checks: python utils/style_doc.py src/transformers docs/source --max_len 119 # this target runs checks on all files and potentially modifies some of them + style: black $(check_dirs) isort $(check_dirs) @@ -64,7 +66,7 @@ style: # Super fast fix and check target that only works on relevant modified files since the branch was made -fixup: modified_only_fixup extra_style_checks autogenerate_code extra_quality_checks +fixup: modified_only_fixup extra_style_checks autogenerate_code repo-consistency # Make marked copies of snippets of codes conform to the original diff --git a/docs/source/index.rst b/docs/source/index.rst index 6ea06fd6242e..7e005487b2ee 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -559,6 +559,7 @@ Flax), PyTorch, and/or TensorFlow. testing debugging serialization + pr_tests .. toctree:: :maxdepth: 2 diff --git a/docs/source/pr_tests.md b/docs/source/pr_tests.md index db5b8dd830af..33b0b6565069 100644 --- a/docs/source/pr_tests.md +++ b/docs/source/pr_tests.md @@ -75,7 +75,7 @@ Sphinx is not known for its helpful error messages, so you might have to try a f ## Code and documentation style -Code formatting is applied to all the source files, the examples and the tests using black and isort. We also have a custom tool taking care of the formatting of docstrings and rst files. Both can be launched by executing +Code formatting is applied to all the source files, the examples and the tests using black and isort. We also have a custom tool taking care of the formatting of docstrings and rst files (`utils/style_doc.py`), as well as the order of the lazy imports performed in the Transformers `__init__.py` files (`utils/custom_init_isort.py`). All of this can be launched by executing ```bash make style @@ -93,7 +93,39 @@ This can take a lot of time, so to run the same thing on only the scripts you mo make fixup ``` -Those two commands will also run all the additional checks for the repository consistency. Let's have a look at them. +This last command will also run all the additional checks for the repository consistency. Let's have a look at them. ## Repository consistency +This regroups all the tests to make sure your PR leaves the repository in a good state, and is performed by the `ci/circleci: check_repository_consistency` check. You can locally run that check by executing the following: + +```bash +make repo-consistency +``` + +This checks that: + +- All objects added to the init are documented (performed by `utils/check_repo.py`) +- All `__init__.py` files have the same content in their two sections (performed by `utils/check_inits.py`) +- All code identified as a copy from another module is consistent with the original (performed by `utils/check_copies.py`) +- The translations of the READMEs and the index of the doc have the same model list as the main README (performed by `utils/check_copies.py`) +- The auto-generated tables in the documentation are up to date (performed by `utils/check_table.py`) +- The library has all objects available even if not all optional dependencies are installed (performed by `utils/check_dummies.py`) + +The first two need to be manually fixed, the last four can be fixed automatically for you by running the command + +```bash +make fix-copies +``` + +Additional checks concern PRs that add a new model, mainly that: + +- All models added are in an Auto-mapping (performed by `utils/check_repo.py`) + +- All models are properly tested (performed by `utils/check_repo.py`) + + \ No newline at end of file diff --git a/utils/link_tester.py b/utils/link_tester.py deleted file mode 100644 index 5eb6fed4d5cc..000000000000 --- a/utils/link_tester.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright 2020 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Link tester. - -This little utility reads all the python files in the repository, -scans for links pointing to S3 and tests the links one by one. Raises an error -at the end of the scan if at least one link was reported broken. -""" -import os -import re -import sys - -import requests - - -REGEXP_FIND_S3_LINKS = r"""([\"'])(https:\/\/s3)(.*)?\1""" - - -S3_BUCKET_PREFIX = "https://s3.amazonaws.com/models.huggingface.co/bert" - - -def list_python_files_in_repository(): - """List all python files in the repository. - - This function assumes that the script is executed in the root folder. - """ - source_code_files = [] - for path, subdirs, files in os.walk("."): - if "templates" in path: - continue - for name in files: - if ".py" in name and ".pyc" not in name: - path_to_files = os.path.join(path, name) - source_code_files.append(path_to_files) - - return source_code_files - - -def find_all_links(file_paths): - links = [] - for path in file_paths: - links += scan_code_for_links(path) - - return [link for link in links if link != S3_BUCKET_PREFIX] - - -def scan_code_for_links(source): - """Scans the file to find links using a regular expression. - Returns a list of links. - """ - with open(source, "r") as content: - content = content.read() - raw_links = re.findall(REGEXP_FIND_S3_LINKS, content) - links = [prefix + suffix for _, prefix, suffix in raw_links] - - return links - - -def check_all_links(links): - """Check that the provided links are valid. - - Links are considered valid if a HEAD request to the server - returns a 200 status code. - """ - broken_links = [] - for link in links: - head = requests.head(link) - if head.status_code != 200: - broken_links.append(link) - - return broken_links - - -if __name__ == "__main__": - file_paths = list_python_files_in_repository() - links = find_all_links(file_paths) - broken_links = check_all_links(links) - print("Looking for broken links to pre-trained models/configs/tokenizers...") - if broken_links: - print("The following links did not respond:") - for link in broken_links: - print(f"- {link}") - sys.exit(1) - print("All links are ok.")