From 723ca706f7d634fd59e1fbef76b11ab235ed4cf5 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sat, 11 Apr 2026 11:52:26 +0800 Subject: [PATCH 1/3] ci: fail pre-commit if commits contain generated trailers Expand filter_commit_message.py with a --check mode that walks commits in PRE_COMMIT_FROM_REF..PRE_COMMIT_TO_REF (HEAD locally) and exits non-zero on any forbidden trailer. Wired as a pre-commit-stage hook so CI catches contributors who lack the commit-msg hook. Workflow now uses fetch-depth: 0 so the PR base SHA is reachable. --- .github/workflows/code-cleanup.yml | 2 + .pre-commit-config.yaml | 8 +++ bin/hooks/filter_commit_message.py | 109 ++++++++++++++++++++++++----- 3 files changed, 100 insertions(+), 19 deletions(-) diff --git a/.github/workflows/code-cleanup.yml b/.github/workflows/code-cleanup.yml index 0d475bd721..c6f5788f70 100644 --- a/.github/workflows/code-cleanup.yml +++ b/.github/workflows/code-cleanup.yml @@ -18,6 +18,8 @@ jobs: sudo chown -R $USER:$USER ${{ github.workspace }} || true - uses: actions/checkout@v5 + with: + fetch-depth: 0 - uses: actions/setup-python@v5 - uses: astral-sh/setup-uv@v4 - name: Run pre-commit diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 29d068dccf..16fcd7aa73 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -99,3 +99,11 @@ repos: entry: python bin/hooks/filter_commit_message.py language: python stages: [commit-msg] + + - id: check-commit-message + name: Check HEAD commit message for generated signatures + always_run: true + pass_filenames: false + entry: python bin/hooks/filter_commit_message.py --check + language: python + stages: [pre-commit] diff --git a/bin/hooks/filter_commit_message.py b/bin/hooks/filter_commit_message.py index d22eaf9484..6aac10908c 100644 --- a/bin/hooks/filter_commit_message.py +++ b/bin/hooks/filter_commit_message.py @@ -13,37 +13,108 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os from pathlib import Path +import subprocess import sys +# Patterns that trigger truncation (everything from this line onwards is removed) +TRUNCATE_PATTERNS = [ + "Generated with", + "Co-Authored-By", +] + + +def filter_text(text: str) -> tuple[str, str | None]: + """Return (filtered_text, first_matched_pattern_or_None).""" + lines = text.splitlines(keepends=True) + filtered_lines: list[str] = [] + matched: str | None = None + for line in lines: + hit = next((p for p in TRUNCATE_PATTERNS if p in line), None) + if hit is not None: + matched = hit + break + filtered_lines.append(line) + return "".join(filtered_lines), matched -def main() -> int: - if len(sys.argv) < 2: - print("Usage: filter_commit_message.py ", file=sys.stderr) - return 1 - commit_msg_file = Path(sys.argv[1]) - if not commit_msg_file.exists(): +def rewrite_file(path: Path) -> int: + if not path.exists(): return 0 + filtered, _ = filter_text(path.read_text()) + path.write_text(filtered) + return 0 - lines = commit_msg_file.read_text().splitlines(keepends=True) - # Patterns that trigger truncation (everything from this line onwards is removed) - truncate_patterns = [ - "Generated with", - "Co-Authored-By", - ] +def commits_to_check() -> list[str]: + """Commits to check, oldest first. - # Find the first line containing any truncate pattern and truncate there - filtered_lines = [] - for line in lines: - if any(pattern in line for pattern in truncate_patterns): - break - filtered_lines.append(line) + On a PR, pre-commit exports PRE_COMMIT_FROM_REF / PRE_COMMIT_TO_REF for + the base..head range. Outside that, we only inspect HEAD. + """ + from_ref = os.environ.get("PRE_COMMIT_FROM_REF") + to_ref = os.environ.get("PRE_COMMIT_TO_REF") + if from_ref and to_ref: + result = subprocess.run( + ["git", "rev-list", "--reverse", f"{from_ref}..{to_ref}"], + capture_output=True, + text=True, + check=True, + ) + return result.stdout.split() - commit_msg_file.write_text("".join(filtered_lines)) + result = subprocess.run( + ["git", "rev-parse", "HEAD"], + capture_output=True, + text=True, + check=True, + ) + return [result.stdout.strip()] + + +def check_commits() -> int: + failures: list[tuple[str, str]] = [] + for sha in commits_to_check(): + msg = subprocess.run( + ["git", "log", "-1", "--format=%B", sha], + capture_output=True, + text=True, + check=True, + ).stdout + _, matched = filter_text(msg) + if matched is not None: + failures.append((sha, matched)) + + if failures: + for sha, pattern in failures: + print( + f"{sha[:12]}: contains forbidden pattern: {pattern!r}", + file=sys.stderr, + ) + print( + "\nInstall the commit-msg hook " + "(`pre-commit install -t commit-msg`) or amend the offending " + "commits to strip the trailer.", + file=sys.stderr, + ) + return 1 return 0 +def main() -> int: + if len(sys.argv) < 2: + print( + "Usage: filter_commit_message.py | --check", + file=sys.stderr, + ) + return 1 + + if sys.argv[1] == "--check": + return check_commits() + + return rewrite_file(Path(sys.argv[1])) + + if __name__ == "__main__": sys.exit(main()) From 57d1608dc2d7782bf7b7b9e429acc7a70f290795 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sat, 11 Apr 2026 12:06:20 +0800 Subject: [PATCH 2/3] ci: pass --from-ref/--to-ref to pre-commit so range check actually runs pre-commit/action@v3.0.1 defaults extra_args to --all-files and never injects from/to refs on its own, so PRE_COMMIT_FROM_REF / PRE_COMMIT_TO_REF were never set in CI and the range walk always fell through to HEAD-only. Pass the PR base/head explicitly via extra_args on both action steps. Side effect: formatter hooks now run on changed files instead of all files, which matches the intent of an auto-commit cleanup workflow. --- .github/workflows/code-cleanup.yml | 4 ++++ bin/hooks/filter_commit_message.py | 5 +++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/code-cleanup.yml b/.github/workflows/code-cleanup.yml index c6f5788f70..4741ed953f 100644 --- a/.github/workflows/code-cleanup.yml +++ b/.github/workflows/code-cleanup.yml @@ -25,12 +25,16 @@ jobs: - name: Run pre-commit id: pre-commit-first uses: pre-commit/action@v3.0.1 + with: + extra_args: --from-ref ${{ github.event.pull_request.base.sha }} --to-ref ${{ github.sha }} continue-on-error: true - name: Re-run pre-commit if failed initially id: pre-commit-retry if: steps.pre-commit-first.outcome == 'failure' uses: pre-commit/action@v3.0.1 + with: + extra_args: --from-ref ${{ github.event.pull_request.base.sha }} --to-ref ${{ github.sha }} continue-on-error: false - name: Commit code changes diff --git a/bin/hooks/filter_commit_message.py b/bin/hooks/filter_commit_message.py index 6aac10908c..0f4a790d19 100644 --- a/bin/hooks/filter_commit_message.py +++ b/bin/hooks/filter_commit_message.py @@ -50,8 +50,9 @@ def rewrite_file(path: Path) -> int: def commits_to_check() -> list[str]: """Commits to check, oldest first. - On a PR, pre-commit exports PRE_COMMIT_FROM_REF / PRE_COMMIT_TO_REF for - the base..head range. Outside that, we only inspect HEAD. + When pre-commit is invoked with `--from-ref/--to-ref` (see code-cleanup.yml + for the CI invocation), it exports PRE_COMMIT_FROM_REF / PRE_COMMIT_TO_REF + and we walk that range. Otherwise we only inspect HEAD. """ from_ref = os.environ.get("PRE_COMMIT_FROM_REF") to_ref = os.environ.get("PRE_COMMIT_TO_REF") From d825e87f6d3683444fad6b3c6990881aa2642a96 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sat, 11 Apr 2026 12:40:04 +0800 Subject: [PATCH 3/3] ci: address PR feedback on commit-message check hook - No-op the --check path when PRE_COMMIT_FROM_REF/TO_REF are unset, so the local pre-commit-stage hook can't block commits or amends on the state of an existing bad HEAD; locally the commit-msg hook is in charge. The check only fires meaningfully in CI where the workflow passes the explicit range. - Wrap git rev-list / git log calls in try/except so a missing ref or shallow-clone failure surfaces as a one-line hook error instead of a Python traceback. - Rename the hook to "Check commit messages for generated signatures" since it walks the full PR range, not just HEAD. --- .pre-commit-config.yaml | 2 +- bin/hooks/filter_commit_message.py | 56 +++++++++++++++++------------- 2 files changed, 32 insertions(+), 26 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 16fcd7aa73..81d232da96 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -101,7 +101,7 @@ repos: stages: [commit-msg] - id: check-commit-message - name: Check HEAD commit message for generated signatures + name: Check commit messages for generated signatures always_run: true pass_filenames: false entry: python bin/hooks/filter_commit_message.py --check diff --git a/bin/hooks/filter_commit_message.py b/bin/hooks/filter_commit_message.py index 0f4a790d19..65f1a758fe 100644 --- a/bin/hooks/filter_commit_message.py +++ b/bin/hooks/filter_commit_message.py @@ -47,42 +47,48 @@ def rewrite_file(path: Path) -> int: return 0 -def commits_to_check() -> list[str]: - """Commits to check, oldest first. +def check_commits() -> int: + """Check every commit in the range pre-commit was invoked over. - When pre-commit is invoked with `--from-ref/--to-ref` (see code-cleanup.yml - for the CI invocation), it exports PRE_COMMIT_FROM_REF / PRE_COMMIT_TO_REF - and we walk that range. Otherwise we only inspect HEAD. + Locally on `git commit` no range is supplied, so we no-op rather than + blocking commits on the state of HEAD — the commit-msg hook is in + charge there. In CI, code-cleanup.yml passes `--from-ref/--to-ref` to + pre-commit, which exports PRE_COMMIT_FROM_REF / PRE_COMMIT_TO_REF. """ from_ref = os.environ.get("PRE_COMMIT_FROM_REF") to_ref = os.environ.get("PRE_COMMIT_TO_REF") - if from_ref and to_ref: - result = subprocess.run( + if not (from_ref and to_ref): + return 0 + + try: + rev_list = subprocess.run( ["git", "rev-list", "--reverse", f"{from_ref}..{to_ref}"], capture_output=True, text=True, check=True, ) - return result.stdout.split() - - result = subprocess.run( - ["git", "rev-parse", "HEAD"], - capture_output=True, - text=True, - check=True, - ) - return [result.stdout.strip()] - + except subprocess.CalledProcessError as e: + print( + f"git rev-list {from_ref}..{to_ref} failed: {e.stderr.strip() or e}", + file=sys.stderr, + ) + return 1 -def check_commits() -> int: failures: list[tuple[str, str]] = [] - for sha in commits_to_check(): - msg = subprocess.run( - ["git", "log", "-1", "--format=%B", sha], - capture_output=True, - text=True, - check=True, - ).stdout + for sha in rev_list.stdout.split(): + try: + msg = subprocess.run( + ["git", "log", "-1", "--format=%B", sha], + capture_output=True, + text=True, + check=True, + ).stdout + except subprocess.CalledProcessError as e: + print( + f"git log -1 {sha} failed: {e.stderr.strip() or e}", + file=sys.stderr, + ) + return 1 _, matched = filter_text(msg) if matched is not None: failures.append((sha, matched))