From f1b7bbf60fac33eef2e9e2b53d39776d53d3fe8a Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Wed, 29 Apr 2026 11:27:27 +0300 Subject: [PATCH 1/4] build: allow posting comments on forked PRs --- .../workflows/breaking_changes_detector.yml | 75 +++++----- .../breaking_changes_detector_comment.yml | 131 ++++++++++++++++++ ci/scripts/changed_crates.sh | 60 +------- 3 files changed, 173 insertions(+), 93 deletions(-) create mode 100644 .github/workflows/breaking_changes_detector_comment.yml diff --git a/.github/workflows/breaking_changes_detector.yml b/.github/workflows/breaking_changes_detector.yml index 03a32be519a08..11581be745102 100644 --- a/.github/workflows/breaking_changes_detector.yml +++ b/.github/workflows/breaking_changes_detector.yml @@ -20,8 +20,24 @@ # Only public workspace crates that have file changes are checked. # Internal crates (benchmarks, test-utils, sqllogictest, doc) are excluded. # -# If breaking changes are found, a sticky comment is posted on the PR. -# The comment is removed automatically once the issues are resolved. +# This workflow only runs cargo-semver-checks and uploads the result as an +# artifact. The actual PR comment is posted by a companion workflow +# (`breaking_changes_detector_comment.yml`) that picks up the artifact via +# `workflow_run`. +# +# Why split it? +# "The GITHUB_TOKEN has read-only permissions in pull requests from forked +# repositories." +# — https://docs.github.com/en/actions/reference/events-that-trigger-workflows#pull_request +# A read-only token cannot post comments, so on fork PRs the previous +# single-workflow design failed with HTTP 403. We can't simply broaden the +# trigger here either: cargo-semver-checks compiles PR code (build.rs, proc +# macros), so granting this job a write token would expose it to any code +# in the PR. And ASF infra policy independently forbids `pull_request_target` +# for any workflow that exposes GITHUB_TOKEN +# (https://infra.apache.org/github-actions-policy.html). The companion +# `workflow_run` workflow runs in the base-repo context with write access +# and never executes PR code. name: "Detect breaking changes" @@ -37,11 +53,6 @@ jobs: check-semver: name: Check semver runs-on: ubuntu-latest - outputs: - logs: ${{ steps.check_semver.outputs.logs }} - # Default to "success" so the comment job clears any stale comment - # when the check step is skipped (e.g. no published crates changed). - result: ${{ steps.check_semver.outputs.result || 'success' }} steps: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -85,11 +96,6 @@ jobs: ci/scripts/changed_crates.sh semver-check "origin/${BASE_REF}" $PACKAGES \ 2>&1 | tee /tmp/semver-output.txt EXIT_CODE=${PIPESTATUS[0]} - { - echo "logs<> "$GITHUB_OUTPUT" # Pass the result through an output instead of failing the job: # a detected breaking change should surface as a PR comment, not a # red check, so PR authors aren't confused by an intentional break. @@ -99,28 +105,29 @@ jobs: echo "result=failure" >> "$GITHUB_OUTPUT" fi - # Post or remove a sticky comment on the PR based on the semver check result. - comment-on-pr: - name: Comment on pull request - runs-on: ubuntu-latest - needs: check-semver - if: always() - permissions: - contents: read - pull-requests: write - steps: - - name: Checkout - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - sparse-checkout: ci/scripts - - - name: Update PR comment + # Stage the data the companion comment workflow needs into a single + # directory. We default the result to "success" so the comment + # workflow clears any stale comment when the check step is skipped + # (e.g. no published crates changed). + - name: Stage artifact for comment workflow + if: always() env: - GH_TOKEN: ${{ github.token }} - REPO: ${{ github.repository }} PR_NUMBER: ${{ github.event.pull_request.number }} - CHECK_RESULT: ${{ needs.check-semver.outputs.result }} - SEMVER_LOGS: ${{ needs.check-semver.outputs.logs }} + CHECK_RESULT: ${{ steps.check_semver.outputs.result || 'success' }} run: | - ci/scripts/changed_crates.sh comment \ - "$REPO" "$PR_NUMBER" "$CHECK_RESULT" "$SEMVER_LOGS" + mkdir -p semver-artifact + echo "$PR_NUMBER" > semver-artifact/pr_number + echo "$CHECK_RESULT" > semver-artifact/result + if [ -f /tmp/semver-output.txt ]; then + sed 's/\x1b\[[0-9;]*m//g' /tmp/semver-output.txt > semver-artifact/logs + else + : > semver-artifact/logs + fi + + - name: Upload artifact + if: always() + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: semver-check-result + path: semver-artifact/ + retention-days: 1 diff --git a/.github/workflows/breaking_changes_detector_comment.yml b/.github/workflows/breaking_changes_detector_comment.yml new file mode 100644 index 0000000000000..e00dba2cbe4dd --- /dev/null +++ b/.github/workflows/breaking_changes_detector_comment.yml @@ -0,0 +1,131 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Companion to `breaking_changes_detector.yml` — posts the sticky PR comment. +# +# Why this workflow exists: +# "The GITHUB_TOKEN has read-only permissions in pull requests from forked +# repositories." +# — https://docs.github.com/en/actions/reference/events-that-trigger-workflows#pull_request +# That is why the upstream `pull_request` workflow cannot post the comment +# itself when the PR comes from a fork. +# +# Why not `pull_request_target`? ASF infra policy forbids it: +# "You MUST NOT use `pull_request_target` as a trigger on ANY action that +# exports ANY confidential credentials or tokens such as GITHUB_TOKEN or +# NPM_TOKEN." +# — https://infra.apache.org/github-actions-policy.html +# `workflow_run` is the supported alternative: it runs in the base +# repository's context regardless of where the upstream run was triggered +# from, so the GITHUB_TOKEN here can be granted `pull-requests: write`. See: +# https://docs.github.com/en/actions/reference/events-that-trigger-workflows#workflow_run +# +# Security note: this workflow MUST NOT check out or execute any code from +# the PR. The artifact's contents originate from a workflow run that may +# have compiled fork-controlled code, so PR_NUMBER and CHECK_RESULT are +# validated against strict patterns before being passed to any action. + +name: "Detect breaking changes - Comment" + +on: + workflow_run: + workflows: ["Detect breaking changes"] + types: + - completed + +permissions: + contents: read + +jobs: + comment-on-pr: + name: Comment on pull request + if: github.event.workflow_run.event == 'pull_request' + runs-on: ubuntu-latest + # Scoped to the minimum needed to upsert/delete the sticky comment. + permissions: + pull-requests: write + steps: + - name: Download semver-check artifact + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: semver-check-result + run-id: ${{ github.event.workflow_run.id }} + github-token: ${{ github.token }} + path: ./semver-artifact + + - name: Read and validate artifact + id: read + run: | + set -euo pipefail + # Validate every field: the artifact comes from a workflow run + # that compiled fork-controlled code, so its contents are untrusted. + PR_NUMBER=$(cat ./semver-artifact/pr_number) + if ! [[ "$PR_NUMBER" =~ ^[0-9]+$ ]]; then + echo "Invalid PR number: $PR_NUMBER" >&2 + exit 1 + fi + CHECK_RESULT=$(cat ./semver-artifact/result) + if [[ "$CHECK_RESULT" != "success" && "$CHECK_RESULT" != "failure" ]]; then + echo "Invalid check result: $CHECK_RESULT" >&2 + exit 1 + fi + echo "pr_number=$PR_NUMBER" >> "$GITHUB_OUTPUT" + echo "result=$CHECK_RESULT" >> "$GITHUB_OUTPUT" + + # Multi-line output: random delimiter so a malicious log line can't + # close the heredoc and inject extra output keys. See: + # https://docs.github.com/en/actions/reference/workflow-commands-for-github-actions#multiline-strings + DELIM="EOF_$(openssl rand -hex 16)" + { + echo "logs<<${DELIM}" + cat ./semver-artifact/logs + echo "${DELIM}" + } >> "$GITHUB_OUTPUT" + + # The marker `` is what makes the comment + # "sticky": maintain-one-comment uses it to find and replace (or + # delete) the existing comment instead of stacking new ones. + - name: Upsert sticky comment + if: steps.read.outputs.result != 'success' + uses: actions-cool/maintain-one-comment@909842216bc8e8658364c572ec52100f4c2cc50a # v3.3.0 + with: + token: ${{ secrets.GITHUB_TOKEN }} + number: ${{ steps.read.outputs.pr_number }} + body-include: '' + body: | + + Thank you for opening this pull request! + + Reviewer note: [cargo-semver-checks](https://github.com/obi1kenobi/cargo-semver-checks) reported the current version number is not SemVer-compatible with the changes in this pull request (compared against the base branch). + +
+ Details + + ``` + ${{ steps.read.outputs.logs }} + ``` + +
+ + - name: Delete sticky comment + if: steps.read.outputs.result == 'success' + uses: actions-cool/maintain-one-comment@909842216bc8e8658364c572ec52100f4c2cc50a # v3.3.0 + with: + token: ${{ secrets.GITHUB_TOKEN }} + number: ${{ steps.read.outputs.pr_number }} + body-include: '' + delete: true diff --git a/ci/scripts/changed_crates.sh b/ci/scripts/changed_crates.sh index 2ee76ad010e97..6d014a9492632 100755 --- a/ci/scripts/changed_crates.sh +++ b/ci/scripts/changed_crates.sh @@ -28,17 +28,9 @@ # Run cargo-semver-checks for the given packages against base_ref. # Output and exit code are passed through unchanged; the caller is # responsible for capturing/formatting them. -# -# comment [logs] -# Upsert or delete a sticky PR comment based on check_result. -# check_result: "success" deletes any existing comment, -# anything else upserts the comment with the provided logs. -# Requires GH_TOKEN to be set. set -euo pipefail -MARKER="" - # ── changed-crates ────────────────────────────────────────────────── cmd_changed_crates() { local base_ref="${1:?Usage: changed_crates.sh changed-crates }" @@ -80,62 +72,12 @@ cmd_semver_check() { cargo semver-checks --baseline-rev "$base_ref" "${args[@]}" } -# ── comment ───────────────────────────────────────────────────────── -cmd_comment() { - local repo="${1:?Usage: changed_crates.sh comment [logs]}" - local pr_number="${2:?}" - local check_result="${3:?}" - local logs="${4:-}" - - # Find existing comment with our marker - local comment_id - comment_id=$(gh api "repos/${repo}/issues/${pr_number}/comments" \ - --jq ".[] | select(.body | contains(\"${MARKER}\")) | .id" | head -1) - - echo "existing breaking change comment id $comment_id" - - if [ "$check_result" = "success" ]; then - # Delete the comment if one exists - if [ -n "$comment_id" ]; then - echo "result is success, so deleting breaking change comment" - gh api "repos/${repo}/issues/comments/${comment_id}" --method DELETE - else - echo "result is success and no previous comment to delete" - fi - else - local body="${MARKER} -Thank you for opening this pull request! - -Reviewer note: [cargo-semver-checks](https://github.com/obi1kenobi/cargo-semver-checks) reported the current version number is not SemVer-compatible with the changes in this pull request (compared against the base branch). - -
-Details - -\`\`\` -${logs} -\`\`\` - -
" - - if [ -n "$comment_id" ]; then - echo "comment already exists, updating content" - gh api "repos/${repo}/issues/comments/${comment_id}" \ - --method PATCH --field body="$body" - else - echo "no comment with breaking changes, creating a new one" - gh api "repos/${repo}/issues/${pr_number}/comments" \ - --method POST --field body="$body" - fi - fi -} - # ── main ──────────────────────────────────────────────────────────── -cmd="${1:?Usage: changed_crates.sh [args...]}" +cmd="${1:?Usage: changed_crates.sh [args...]}" shift case "$cmd" in changed-crates) cmd_changed_crates "$@" ;; semver-check) cmd_semver_check "$@" ;; - comment) cmd_comment "$@" ;; *) echo "Unknown command: $cmd" >&2; exit 1 ;; esac From e59f66aae938c7c5614fa17dce2a4de1ed720277 Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Wed, 29 Apr 2026 11:35:00 +0300 Subject: [PATCH 2/4] update comments --- .github/workflows/breaking_changes_detector.yml | 2 +- .github/workflows/breaking_changes_detector_comment.yml | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/breaking_changes_detector.yml b/.github/workflows/breaking_changes_detector.yml index 11581be745102..4703fc0776480 100644 --- a/.github/workflows/breaking_changes_detector.yml +++ b/.github/workflows/breaking_changes_detector.yml @@ -28,7 +28,7 @@ # Why split it? # "The GITHUB_TOKEN has read-only permissions in pull requests from forked # repositories." -# — https://docs.github.com/en/actions/reference/events-that-trigger-workflows#pull_request +# https://docs.github.com/en/actions/reference/events-that-trigger-workflows#pull_request # A read-only token cannot post comments, so on fork PRs the previous # single-workflow design failed with HTTP 403. We can't simply broaden the # trigger here either: cargo-semver-checks compiles PR code (build.rs, proc diff --git a/.github/workflows/breaking_changes_detector_comment.yml b/.github/workflows/breaking_changes_detector_comment.yml index e00dba2cbe4dd..608e50ceb9975 100644 --- a/.github/workflows/breaking_changes_detector_comment.yml +++ b/.github/workflows/breaking_changes_detector_comment.yml @@ -15,12 +15,12 @@ # specific language governing permissions and limitations # under the License. -# Companion to `breaking_changes_detector.yml` — posts the sticky PR comment. +# Companion to `breaking_changes_detector.yml`. Posts the sticky PR comment. # # Why this workflow exists: # "The GITHUB_TOKEN has read-only permissions in pull requests from forked # repositories." -# — https://docs.github.com/en/actions/reference/events-that-trigger-workflows#pull_request +# https://docs.github.com/en/actions/reference/events-that-trigger-workflows#pull_request # That is why the upstream `pull_request` workflow cannot post the comment # itself when the PR comes from a fork. # @@ -28,7 +28,7 @@ # "You MUST NOT use `pull_request_target` as a trigger on ANY action that # exports ANY confidential credentials or tokens such as GITHUB_TOKEN or # NPM_TOKEN." -# — https://infra.apache.org/github-actions-policy.html +# https://infra.apache.org/github-actions-policy.html # `workflow_run` is the supported alternative: it runs in the base # repository's context regardless of where the upstream run was triggered # from, so the GITHUB_TOKEN here can be granted `pull-requests: write`. See: From 3b76ccaabd0c71bd0defe83936d930e372e6371c Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Wed, 29 Apr 2026 11:39:13 +0300 Subject: [PATCH 3/4] fix: download protobuf --- .github/workflows/breaking_changes_detector.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/breaking_changes_detector.yml b/.github/workflows/breaking_changes_detector.yml index 4703fc0776480..4a4c909e7a781 100644 --- a/.github/workflows/breaking_changes_detector.yml +++ b/.github/workflows/breaking_changes_detector.yml @@ -77,6 +77,15 @@ jobs: echo "packages=$PACKAGES" >> "$GITHUB_OUTPUT" echo "Changed crates: $PACKAGES" + # `datafusion-substrait` (and crates that depend on it via sqllogictest) + # have a build script that calls protoc, which is not preinstalled on + # ubuntu-latest runners. + - name: Install Protobuf Compiler + if: steps.changed_crates.outputs.packages != '' + run: | + sudo apt-get update + sudo apt-get install -y protobuf-compiler + - name: Install cargo-semver-checks if: steps.changed_crates.outputs.packages != '' uses: taiki-e/install-action@94cb46f8d6e437890146ffbd78a778b78e623fb2 # v2.74.0 From 672bf7ab17875ee052ebfa969999768b97b09b4c Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Thu, 30 Apr 2026 09:49:09 +0300 Subject: [PATCH 4/4] Update .github/workflows/breaking_changes_detector_comment.yml Co-authored-by: Martin Grigorov --- .github/workflows/breaking_changes_detector_comment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/breaking_changes_detector_comment.yml b/.github/workflows/breaking_changes_detector_comment.yml index 608e50ceb9975..8e79426082557 100644 --- a/.github/workflows/breaking_changes_detector_comment.yml +++ b/.github/workflows/breaking_changes_detector_comment.yml @@ -57,6 +57,7 @@ jobs: runs-on: ubuntu-latest # Scoped to the minimum needed to upsert/delete the sticky comment. permissions: + actions: read pull-requests: write steps: - name: Download semver-check artifact