diff --git a/.github/workflows/code-cleanup.yml b/.github/workflows/code-cleanup.yml index 0d475bd721..4741ed953f 100644 --- a/.github/workflows/code-cleanup.yml +++ b/.github/workflows/code-cleanup.yml @@ -18,17 +18,23 @@ jobs: sudo chown -R $USER:$USER ${{ github.workspace }} || true - uses: actions/checkout@v5 + with: + fetch-depth: 0 - uses: actions/setup-python@v5 - uses: astral-sh/setup-uv@v4 - name: Run pre-commit id: pre-commit-first uses: pre-commit/action@v3.0.1 + with: + extra_args: --from-ref ${{ github.event.pull_request.base.sha }} --to-ref ${{ github.sha }} continue-on-error: true - name: Re-run pre-commit if failed initially id: pre-commit-retry if: steps.pre-commit-first.outcome == 'failure' uses: pre-commit/action@v3.0.1 + with: + extra_args: --from-ref ${{ github.event.pull_request.base.sha }} --to-ref ${{ github.sha }} continue-on-error: false - name: Commit code changes diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 29d068dccf..81d232da96 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -99,3 +99,11 @@ repos: entry: python bin/hooks/filter_commit_message.py language: python stages: [commit-msg] + + - id: check-commit-message + name: Check commit messages for generated signatures + always_run: true + pass_filenames: false + entry: python bin/hooks/filter_commit_message.py --check + language: python + stages: [pre-commit] diff --git a/bin/hooks/filter_commit_message.py b/bin/hooks/filter_commit_message.py index d22eaf9484..65f1a758fe 100644 --- a/bin/hooks/filter_commit_message.py +++ b/bin/hooks/filter_commit_message.py @@ -13,37 +13,115 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os from pathlib import Path +import subprocess import sys +# Patterns that trigger truncation (everything from this line onwards is removed) +TRUNCATE_PATTERNS = [ + "Generated with", + "Co-Authored-By", +] + + +def filter_text(text: str) -> tuple[str, str | None]: + """Return (filtered_text, first_matched_pattern_or_None).""" + lines = text.splitlines(keepends=True) + filtered_lines: list[str] = [] + matched: str | None = None + for line in lines: + hit = next((p for p in TRUNCATE_PATTERNS if p in line), None) + if hit is not None: + matched = hit + break + filtered_lines.append(line) + return "".join(filtered_lines), matched -def main() -> int: - if len(sys.argv) < 2: - print("Usage: filter_commit_message.py ", file=sys.stderr) - return 1 - commit_msg_file = Path(sys.argv[1]) - if not commit_msg_file.exists(): +def rewrite_file(path: Path) -> int: + if not path.exists(): return 0 + filtered, _ = filter_text(path.read_text()) + path.write_text(filtered) + return 0 - lines = commit_msg_file.read_text().splitlines(keepends=True) - # Patterns that trigger truncation (everything from this line onwards is removed) - truncate_patterns = [ - "Generated with", - "Co-Authored-By", - ] +def check_commits() -> int: + """Check every commit in the range pre-commit was invoked over. - # Find the first line containing any truncate pattern and truncate there - filtered_lines = [] - for line in lines: - if any(pattern in line for pattern in truncate_patterns): - break - filtered_lines.append(line) + Locally on `git commit` no range is supplied, so we no-op rather than + blocking commits on the state of HEAD — the commit-msg hook is in + charge there. In CI, code-cleanup.yml passes `--from-ref/--to-ref` to + pre-commit, which exports PRE_COMMIT_FROM_REF / PRE_COMMIT_TO_REF. + """ + from_ref = os.environ.get("PRE_COMMIT_FROM_REF") + to_ref = os.environ.get("PRE_COMMIT_TO_REF") + if not (from_ref and to_ref): + return 0 + + try: + rev_list = subprocess.run( + ["git", "rev-list", "--reverse", f"{from_ref}..{to_ref}"], + capture_output=True, + text=True, + check=True, + ) + except subprocess.CalledProcessError as e: + print( + f"git rev-list {from_ref}..{to_ref} failed: {e.stderr.strip() or e}", + file=sys.stderr, + ) + return 1 - commit_msg_file.write_text("".join(filtered_lines)) + failures: list[tuple[str, str]] = [] + for sha in rev_list.stdout.split(): + try: + msg = subprocess.run( + ["git", "log", "-1", "--format=%B", sha], + capture_output=True, + text=True, + check=True, + ).stdout + except subprocess.CalledProcessError as e: + print( + f"git log -1 {sha} failed: {e.stderr.strip() or e}", + file=sys.stderr, + ) + return 1 + _, matched = filter_text(msg) + if matched is not None: + failures.append((sha, matched)) + + if failures: + for sha, pattern in failures: + print( + f"{sha[:12]}: contains forbidden pattern: {pattern!r}", + file=sys.stderr, + ) + print( + "\nInstall the commit-msg hook " + "(`pre-commit install -t commit-msg`) or amend the offending " + "commits to strip the trailer.", + file=sys.stderr, + ) + return 1 return 0 +def main() -> int: + if len(sys.argv) < 2: + print( + "Usage: filter_commit_message.py | --check", + file=sys.stderr, + ) + return 1 + + if sys.argv[1] == "--check": + return check_commits() + + return rewrite_file(Path(sys.argv[1])) + + if __name__ == "__main__": sys.exit(main())