diff --git a/.github/workflows/breaking_changes_detector.yml b/.github/workflows/breaking_changes_detector.yml index 03a32be519a08..4a4c909e7a781 100644 --- a/.github/workflows/breaking_changes_detector.yml +++ b/.github/workflows/breaking_changes_detector.yml @@ -20,8 +20,24 @@ # Only public workspace crates that have file changes are checked. # Internal crates (benchmarks, test-utils, sqllogictest, doc) are excluded. # -# If breaking changes are found, a sticky comment is posted on the PR. -# The comment is removed automatically once the issues are resolved. +# This workflow only runs cargo-semver-checks and uploads the result as an +# artifact. The actual PR comment is posted by a companion workflow +# (`breaking_changes_detector_comment.yml`) that picks up the artifact via +# `workflow_run`. +# +# Why split it? +# "The GITHUB_TOKEN has read-only permissions in pull requests from forked +# repositories." +# https://docs.github.com/en/actions/reference/events-that-trigger-workflows#pull_request +# A read-only token cannot post comments, so on fork PRs the previous +# single-workflow design failed with HTTP 403. We can't simply broaden the +# trigger here either: cargo-semver-checks compiles PR code (build.rs, proc +# macros), so granting this job a write token would expose it to any code +# in the PR. And ASF infra policy independently forbids `pull_request_target` +# for any workflow that exposes GITHUB_TOKEN +# (https://infra.apache.org/github-actions-policy.html). The companion +# `workflow_run` workflow runs in the base-repo context with write access +# and never executes PR code. name: "Detect breaking changes" @@ -37,11 +53,6 @@ jobs: check-semver: name: Check semver runs-on: ubuntu-latest - outputs: - logs: ${{ steps.check_semver.outputs.logs }} - # Default to "success" so the comment job clears any stale comment - # when the check step is skipped (e.g. no published crates changed). - result: ${{ steps.check_semver.outputs.result || 'success' }} steps: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -66,6 +77,15 @@ jobs: echo "packages=$PACKAGES" >> "$GITHUB_OUTPUT" echo "Changed crates: $PACKAGES" + # `datafusion-substrait` (and crates that depend on it via sqllogictest) + # have a build script that calls protoc, which is not preinstalled on + # ubuntu-latest runners. + - name: Install Protobuf Compiler + if: steps.changed_crates.outputs.packages != '' + run: | + sudo apt-get update + sudo apt-get install -y protobuf-compiler + - name: Install cargo-semver-checks if: steps.changed_crates.outputs.packages != '' uses: taiki-e/install-action@94cb46f8d6e437890146ffbd78a778b78e623fb2 # v2.74.0 @@ -85,11 +105,6 @@ jobs: ci/scripts/changed_crates.sh semver-check "origin/${BASE_REF}" $PACKAGES \ 2>&1 | tee /tmp/semver-output.txt EXIT_CODE=${PIPESTATUS[0]} - { - echo "logs<> "$GITHUB_OUTPUT" # Pass the result through an output instead of failing the job: # a detected breaking change should surface as a PR comment, not a # red check, so PR authors aren't confused by an intentional break. @@ -99,28 +114,29 @@ jobs: echo "result=failure" >> "$GITHUB_OUTPUT" fi - # Post or remove a sticky comment on the PR based on the semver check result. - comment-on-pr: - name: Comment on pull request - runs-on: ubuntu-latest - needs: check-semver - if: always() - permissions: - contents: read - pull-requests: write - steps: - - name: Checkout - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - sparse-checkout: ci/scripts - - - name: Update PR comment + # Stage the data the companion comment workflow needs into a single + # directory. We default the result to "success" so the comment + # workflow clears any stale comment when the check step is skipped + # (e.g. no published crates changed). + - name: Stage artifact for comment workflow + if: always() env: - GH_TOKEN: ${{ github.token }} - REPO: ${{ github.repository }} PR_NUMBER: ${{ github.event.pull_request.number }} - CHECK_RESULT: ${{ needs.check-semver.outputs.result }} - SEMVER_LOGS: ${{ needs.check-semver.outputs.logs }} + CHECK_RESULT: ${{ steps.check_semver.outputs.result || 'success' }} run: | - ci/scripts/changed_crates.sh comment \ - "$REPO" "$PR_NUMBER" "$CHECK_RESULT" "$SEMVER_LOGS" + mkdir -p semver-artifact + echo "$PR_NUMBER" > semver-artifact/pr_number + echo "$CHECK_RESULT" > semver-artifact/result + if [ -f /tmp/semver-output.txt ]; then + sed 's/\x1b\[[0-9;]*m//g' /tmp/semver-output.txt > semver-artifact/logs + else + : > semver-artifact/logs + fi + + - name: Upload artifact + if: always() + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: semver-check-result + path: semver-artifact/ + retention-days: 1 diff --git a/.github/workflows/breaking_changes_detector_comment.yml b/.github/workflows/breaking_changes_detector_comment.yml new file mode 100644 index 0000000000000..8e79426082557 --- /dev/null +++ b/.github/workflows/breaking_changes_detector_comment.yml @@ -0,0 +1,132 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Companion to `breaking_changes_detector.yml`. Posts the sticky PR comment. +# +# Why this workflow exists: +# "The GITHUB_TOKEN has read-only permissions in pull requests from forked +# repositories." +# https://docs.github.com/en/actions/reference/events-that-trigger-workflows#pull_request +# That is why the upstream `pull_request` workflow cannot post the comment +# itself when the PR comes from a fork. +# +# Why not `pull_request_target`? ASF infra policy forbids it: +# "You MUST NOT use `pull_request_target` as a trigger on ANY action that +# exports ANY confidential credentials or tokens such as GITHUB_TOKEN or +# NPM_TOKEN." +# https://infra.apache.org/github-actions-policy.html +# `workflow_run` is the supported alternative: it runs in the base +# repository's context regardless of where the upstream run was triggered +# from, so the GITHUB_TOKEN here can be granted `pull-requests: write`. See: +# https://docs.github.com/en/actions/reference/events-that-trigger-workflows#workflow_run +# +# Security note: this workflow MUST NOT check out or execute any code from +# the PR. The artifact's contents originate from a workflow run that may +# have compiled fork-controlled code, so PR_NUMBER and CHECK_RESULT are +# validated against strict patterns before being passed to any action. + +name: "Detect breaking changes - Comment" + +on: + workflow_run: + workflows: ["Detect breaking changes"] + types: + - completed + +permissions: + contents: read + +jobs: + comment-on-pr: + name: Comment on pull request + if: github.event.workflow_run.event == 'pull_request' + runs-on: ubuntu-latest + # Scoped to the minimum needed to upsert/delete the sticky comment. + permissions: + actions: read + pull-requests: write + steps: + - name: Download semver-check artifact + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: semver-check-result + run-id: ${{ github.event.workflow_run.id }} + github-token: ${{ github.token }} + path: ./semver-artifact + + - name: Read and validate artifact + id: read + run: | + set -euo pipefail + # Validate every field: the artifact comes from a workflow run + # that compiled fork-controlled code, so its contents are untrusted. + PR_NUMBER=$(cat ./semver-artifact/pr_number) + if ! [[ "$PR_NUMBER" =~ ^[0-9]+$ ]]; then + echo "Invalid PR number: $PR_NUMBER" >&2 + exit 1 + fi + CHECK_RESULT=$(cat ./semver-artifact/result) + if [[ "$CHECK_RESULT" != "success" && "$CHECK_RESULT" != "failure" ]]; then + echo "Invalid check result: $CHECK_RESULT" >&2 + exit 1 + fi + echo "pr_number=$PR_NUMBER" >> "$GITHUB_OUTPUT" + echo "result=$CHECK_RESULT" >> "$GITHUB_OUTPUT" + + # Multi-line output: random delimiter so a malicious log line can't + # close the heredoc and inject extra output keys. See: + # https://docs.github.com/en/actions/reference/workflow-commands-for-github-actions#multiline-strings + DELIM="EOF_$(openssl rand -hex 16)" + { + echo "logs<<${DELIM}" + cat ./semver-artifact/logs + echo "${DELIM}" + } >> "$GITHUB_OUTPUT" + + # The marker `` is what makes the comment + # "sticky": maintain-one-comment uses it to find and replace (or + # delete) the existing comment instead of stacking new ones. + - name: Upsert sticky comment + if: steps.read.outputs.result != 'success' + uses: actions-cool/maintain-one-comment@909842216bc8e8658364c572ec52100f4c2cc50a # v3.3.0 + with: + token: ${{ secrets.GITHUB_TOKEN }} + number: ${{ steps.read.outputs.pr_number }} + body-include: '' + body: | + + Thank you for opening this pull request! + + Reviewer note: [cargo-semver-checks](https://github.com/obi1kenobi/cargo-semver-checks) reported the current version number is not SemVer-compatible with the changes in this pull request (compared against the base branch). + +
+ Details + + ``` + ${{ steps.read.outputs.logs }} + ``` + +
+ + - name: Delete sticky comment + if: steps.read.outputs.result == 'success' + uses: actions-cool/maintain-one-comment@909842216bc8e8658364c572ec52100f4c2cc50a # v3.3.0 + with: + token: ${{ secrets.GITHUB_TOKEN }} + number: ${{ steps.read.outputs.pr_number }} + body-include: '' + delete: true diff --git a/ci/scripts/changed_crates.sh b/ci/scripts/changed_crates.sh index 2ee76ad010e97..6d014a9492632 100755 --- a/ci/scripts/changed_crates.sh +++ b/ci/scripts/changed_crates.sh @@ -28,17 +28,9 @@ # Run cargo-semver-checks for the given packages against base_ref. # Output and exit code are passed through unchanged; the caller is # responsible for capturing/formatting them. -# -# comment [logs] -# Upsert or delete a sticky PR comment based on check_result. -# check_result: "success" deletes any existing comment, -# anything else upserts the comment with the provided logs. -# Requires GH_TOKEN to be set. set -euo pipefail -MARKER="" - # ── changed-crates ────────────────────────────────────────────────── cmd_changed_crates() { local base_ref="${1:?Usage: changed_crates.sh changed-crates }" @@ -80,62 +72,12 @@ cmd_semver_check() { cargo semver-checks --baseline-rev "$base_ref" "${args[@]}" } -# ── comment ───────────────────────────────────────────────────────── -cmd_comment() { - local repo="${1:?Usage: changed_crates.sh comment [logs]}" - local pr_number="${2:?}" - local check_result="${3:?}" - local logs="${4:-}" - - # Find existing comment with our marker - local comment_id - comment_id=$(gh api "repos/${repo}/issues/${pr_number}/comments" \ - --jq ".[] | select(.body | contains(\"${MARKER}\")) | .id" | head -1) - - echo "existing breaking change comment id $comment_id" - - if [ "$check_result" = "success" ]; then - # Delete the comment if one exists - if [ -n "$comment_id" ]; then - echo "result is success, so deleting breaking change comment" - gh api "repos/${repo}/issues/comments/${comment_id}" --method DELETE - else - echo "result is success and no previous comment to delete" - fi - else - local body="${MARKER} -Thank you for opening this pull request! - -Reviewer note: [cargo-semver-checks](https://github.com/obi1kenobi/cargo-semver-checks) reported the current version number is not SemVer-compatible with the changes in this pull request (compared against the base branch). - -
-Details - -\`\`\` -${logs} -\`\`\` - -
" - - if [ -n "$comment_id" ]; then - echo "comment already exists, updating content" - gh api "repos/${repo}/issues/comments/${comment_id}" \ - --method PATCH --field body="$body" - else - echo "no comment with breaking changes, creating a new one" - gh api "repos/${repo}/issues/${pr_number}/comments" \ - --method POST --field body="$body" - fi - fi -} - # ── main ──────────────────────────────────────────────────────────── -cmd="${1:?Usage: changed_crates.sh [args...]}" +cmd="${1:?Usage: changed_crates.sh [args...]}" shift case "$cmd" in changed-crates) cmd_changed_crates "$@" ;; semver-check) cmd_semver_check "$@" ;; - comment) cmd_comment "$@" ;; *) echo "Unknown command: $cmd" >&2; exit 1 ;; esac