diff --git a/.github/actions/run-semver-check/action.yml b/.github/actions/run-semver-check/action.yml
new file mode 100644
index 0000000000000..4fc1ac729d879
--- /dev/null
+++ b/.github/actions/run-semver-check/action.yml
@@ -0,0 +1,58 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Run semver check
+description: Run cargo-semver-checks, preserve logs, and expose success/failure without failing the job.
+inputs:
+ baseline_ref:
+ description: Baseline git ref or tag for cargo-semver-checks.
+ required: true
+ packages:
+ description: Space-separated package names to check.
+ required: true
+ log_path:
+ description: Path where semver output should be copied.
+ required: true
+outputs:
+ result:
+ description: "Semver check result: success or failure."
+ value: ${{ steps.check.outputs.result }}
+runs:
+ using: composite
+ steps:
+ - name: Run cargo-semver-checks
+ id: check
+ shell: bash
+ env:
+ BASELINE_REF: ${{ inputs.baseline_ref }}
+ PACKAGES: ${{ inputs.packages }}
+ LOG_PATH: ${{ inputs.log_path }}
+ run: |
+ set +e
+ # `tee` lets cargo's output stream live into the Actions log
+ # while we also keep a copy for the PR comment.
+ ci/scripts/changed_crates.sh semver-check "$BASELINE_REF" $PACKAGES \
+ 2>&1 | tee "$LOG_PATH"
+ EXIT_CODE=${PIPESTATUS[0]}
+ # Pass the result through an output instead of failing the job:
+ # semver breakage should surface as a comment/label signal while this
+ # CI job status stays green.
+ if [ "$EXIT_CODE" -eq 0 ]; then
+ echo "result=success" >> "$GITHUB_OUTPUT"
+ else
+ echo "result=failure" >> "$GITHUB_OUTPUT"
+ fi
diff --git a/.github/actions/setup-semver-check/action.yml b/.github/actions/setup-semver-check/action.yml
new file mode 100644
index 0000000000000..c414214807c46
--- /dev/null
+++ b/.github/actions/setup-semver-check/action.yml
@@ -0,0 +1,69 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Setup semver check
+description: Fetch PR base, determine changed crates, and install tools required by cargo-semver-checks.
+inputs:
+ base_ref:
+ description: Pull request base branch ref.
+ required: true
+ repository:
+ description: Repository to fetch the base branch from.
+ required: true
+outputs:
+ packages:
+ description: Space-separated changed publishable crate names.
+ value: ${{ steps.changed_crates.outputs.packages }}
+runs:
+ using: composite
+ steps:
+ # `origin` may point at a fork (when a contributor runs this locally) or
+ # at a stale ref. Fetch the base branch from the PR's upstream repo into
+ # a dedicated `apache/` ref so the baseline is unambiguous and the
+ # same ref name works locally (`git remote add apache ...`) and in CI.
+ - name: Fetch base branch
+ shell: bash
+ env:
+ BASE_REF: ${{ inputs.base_ref }}
+ REPO: ${{ inputs.repository }}
+ run: git fetch "https://github.com/${REPO}.git" "${BASE_REF}:refs/remotes/apache/${BASE_REF}"
+
+ - name: Determine changed crates
+ id: changed_crates
+ shell: bash
+ env:
+ BASE_REF: ${{ inputs.base_ref }}
+ run: |
+ PACKAGES=$(ci/scripts/changed_crates.sh changed-crates "apache/${BASE_REF}")
+ echo "packages=$PACKAGES" >> "$GITHUB_OUTPUT"
+ echo "Changed crates: $PACKAGES"
+
+ # `datafusion-substrait` (and crates that depend on it via sqllogictest)
+ # have a build script that calls protoc, which is not preinstalled on
+ # ubuntu-latest runners.
+ - name: Install Protobuf Compiler
+ if: steps.changed_crates.outputs.packages != ''
+ shell: bash
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y protobuf-compiler
+
+ - name: Install cargo-semver-checks
+ if: steps.changed_crates.outputs.packages != ''
+ uses: taiki-e/install-action@c070f87102a1c75b3183910f391c1cb887fe13c8 # v2.77.6
+ with:
+ tool: cargo-semver-checks
diff --git a/.github/actions/stage-semver-artifact/action.yml b/.github/actions/stage-semver-artifact/action.yml
new file mode 100644
index 0000000000000..3e6e4b1d80542
--- /dev/null
+++ b/.github/actions/stage-semver-artifact/action.yml
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Stage semver artifact
+description: Stage semver result fields and sanitized logs for the comment workflow.
+inputs:
+ artifact_dir:
+ description: Directory to write artifact files into.
+ required: true
+ pr_number:
+ description: Pull request number.
+ required: true
+ result:
+ description: "Semver result: success or failure."
+ required: true
+ baseline_ref:
+ description: Baseline git ref or tag used by the semver check.
+ required: true
+ log_path:
+ description: Path to raw semver output log.
+ required: true
+ latest_release_tag:
+ description: Latest release tag, for blocking release-baseline checks.
+ required: false
+ default: ""
+runs:
+ using: composite
+ steps:
+ - name: Stage artifact files
+ shell: bash
+ env:
+ ARTIFACT_DIR: ${{ inputs.artifact_dir }}
+ PR_NUMBER: ${{ inputs.pr_number }}
+ RESULT: ${{ inputs.result }}
+ BASELINE_REF: ${{ inputs.baseline_ref }}
+ LOG_PATH: ${{ inputs.log_path }}
+ LATEST_RELEASE_TAG: ${{ inputs.latest_release_tag }}
+ run: |
+ mkdir -p "$ARTIFACT_DIR"
+ echo "$PR_NUMBER" > "$ARTIFACT_DIR/pr_number"
+ echo "$RESULT" > "$ARTIFACT_DIR/result"
+ echo "$BASELINE_REF" > "$ARTIFACT_DIR/baseline_ref"
+ if [ -n "$LATEST_RELEASE_TAG" ]; then
+ echo "$LATEST_RELEASE_TAG" > "$ARTIFACT_DIR/latest_release_tag"
+ fi
+ if [ -f "$LOG_PATH" ]; then
+ sed 's/\x1b\[[0-9;]*m//g' "$LOG_PATH" > "$ARTIFACT_DIR/logs"
+ else
+ : > "$ARTIFACT_DIR/logs"
+ fi
diff --git a/.github/workflows/breaking_changes_detector.yml b/.github/workflows/breaking_changes_detector.yml
index 67ab985228b47..41340391d8dfa 100644
--- a/.github/workflows/breaking_changes_detector.yml
+++ b/.github/workflows/breaking_changes_detector.yml
@@ -50,8 +50,8 @@ permissions:
contents: read
jobs:
- check-semver:
- name: Check semver
+ check-semver-advisory:
+ name: Check semver - advisory PR-local
runs-on: ubuntu-latest
steps:
- name: Checkout
@@ -59,83 +59,112 @@ jobs:
with:
fetch-depth: 0
- # `origin` may point at a fork (when a contributor runs this locally) or
- # at a stale ref. Fetch the base branch from the PR's upstream repo into
- # a dedicated `apache/` ref so the baseline is unambiguous and the
- # same ref name works locally (`git remote add apache ...`) and in CI.
- - name: Fetch base branch
- env:
- BASE_REF: ${{ github.base_ref }}
- REPO: ${{ github.repository }}
- run: git fetch "https://github.com/${REPO}.git" "${BASE_REF}:refs/remotes/apache/${BASE_REF}"
-
- - name: Determine changed crates
- id: changed_crates
- env:
- BASE_REF: ${{ github.base_ref }}
- run: |
- PACKAGES=$(ci/scripts/changed_crates.sh changed-crates "apache/${BASE_REF}")
- echo "packages=$PACKAGES" >> "$GITHUB_OUTPUT"
- echo "Changed crates: $PACKAGES"
-
- # `datafusion-substrait` (and crates that depend on it via sqllogictest)
- # have a build script that calls protoc, which is not preinstalled on
- # ubuntu-latest runners.
- - name: Install Protobuf Compiler
- if: steps.changed_crates.outputs.packages != ''
- run: |
- sudo apt-get update
- sudo apt-get install -y protobuf-compiler
-
- - name: Install cargo-semver-checks
- if: steps.changed_crates.outputs.packages != ''
- uses: taiki-e/install-action@c070f87102a1c75b3183910f391c1cb887fe13c8 # v2.77.6
+ - name: Setup semver check
+ id: semver_setup
+ uses: ./.github/actions/setup-semver-check
with:
- tool: cargo-semver-checks
+ base_ref: ${{ github.base_ref }}
+ repository: ${{ github.repository }}
- - name: Run cargo-semver-checks
+ - name: Run cargo-semver-checks against PR base branch
id: check_semver
- if: steps.changed_crates.outputs.packages != ''
- env:
- BASE_REF: ${{ github.base_ref }}
- PACKAGES: ${{ steps.changed_crates.outputs.packages }}
- run: |
- set +e
- # `tee` lets cargo's output stream live into the Actions log
- # while we also keep a copy for the PR comment.
- # Using `apache` remote here to point to the repository the pull request is against
- ci/scripts/changed_crates.sh semver-check "apache/${BASE_REF}" $PACKAGES \
- 2>&1 | tee /tmp/semver-output.txt
- EXIT_CODE=${PIPESTATUS[0]}
- # Pass the result through an output instead of failing the job:
- # a detected breaking change should surface as a PR comment, not a
- # red check, so PR authors aren't confused by an intentional break.
- if [ "$EXIT_CODE" -eq 0 ]; then
- echo "result=success" >> "$GITHUB_OUTPUT"
- else
- echo "result=failure" >> "$GITHUB_OUTPUT"
- fi
+ if: steps.semver_setup.outputs.packages != ''
+ uses: ./.github/actions/run-semver-check
+ with:
+ baseline_ref: apache/${{ github.base_ref }}
+ packages: ${{ steps.semver_setup.outputs.packages }}
+ log_path: /tmp/advisory-semver-output.txt
# Stage the data the companion comment workflow needs into a single
# directory. We default the result to "success" so the comment
# workflow clears any stale comment when the check step is skipped
# (e.g. no published crates changed).
- - name: Stage artifact for comment workflow
+ - name: Stage advisory artifact for comment workflow
if: always()
- env:
- PR_NUMBER: ${{ github.event.pull_request.number }}
- CHECK_RESULT: ${{ steps.check_semver.outputs.result || 'success' }}
+ uses: ./.github/actions/stage-semver-artifact
+ with:
+ artifact_dir: semver-advisory-artifact
+ pr_number: ${{ github.event.pull_request.number }}
+ result: ${{ steps.check_semver.outputs.result || 'success' }}
+ baseline_ref: apache/${{ github.base_ref }}
+ log_path: /tmp/advisory-semver-output.txt
+
+ - name: Upload advisory artifact
+ if: always()
+ uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+ with:
+ name: semver-advisory-check-result
+ path: semver-advisory-artifact/
+ retention-days: 1
+
+ check-semver-blocking:
+ name: Check semver - blocking latest release
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout
+ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+ with:
+ fetch-depth: 0
+
+ - name: Setup semver check
+ id: semver_setup
+ uses: ./.github/actions/setup-semver-check
+ with:
+ base_ref: ${{ github.base_ref }}
+ repository: ${{ github.repository }}
+
+ - name: Determine latest stable release tag
+ id: latest_release
run: |
- mkdir -p semver-artifact
- echo "$PR_NUMBER" > semver-artifact/pr_number
- echo "$CHECK_RESULT" > semver-artifact/result
- if [ -f /tmp/semver-output.txt ]; then
- sed 's/\x1b\[[0-9;]*m//g' /tmp/semver-output.txt > semver-artifact/logs
- else
- : > semver-artifact/logs
- fi
-
- - name: Upload artifact
+ git fetch --tags --force
+ LATEST_RELEASE_TAG=$(ci/scripts/changed_crates.sh latest-release-tag)
+ echo "tag=$LATEST_RELEASE_TAG" >> "$GITHUB_OUTPUT"
+ echo "Latest stable release tag: $LATEST_RELEASE_TAG"
+
+ - name: Run cargo-semver-checks against latest stable release
+ id: check_semver
+ if: steps.semver_setup.outputs.packages != ''
+ uses: ./.github/actions/run-semver-check
+ with:
+ baseline_ref: ${{ steps.latest_release.outputs.tag }}
+ packages: ${{ steps.semver_setup.outputs.packages }}
+ log_path: /tmp/blocking-semver-output.txt
+
+ - name: Stage blocking artifact for comment workflow
+ if: always()
+ uses: ./.github/actions/stage-semver-artifact
+ with:
+ artifact_dir: semver-blocking-artifact
+ pr_number: ${{ github.event.pull_request.number }}
+ result: ${{ steps.check_semver.outputs.result || 'success' }}
+ baseline_ref: ${{ steps.latest_release.outputs.tag }}
+ latest_release_tag: ${{ steps.latest_release.outputs.tag }}
+ log_path: /tmp/blocking-semver-output.txt
+
+ - name: Upload blocking artifact
+ if: always()
+ uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+ with:
+ name: semver-blocking-check-result
+ path: semver-blocking-artifact/
+ retention-days: 1
+
+ # workflow_run workflows execute from the default branch. While this PR is
+ # under review, the default-branch comment workflow still expects the old
+ # single-artifact contract. Upload a legacy-compatible copy sourced from
+ # the blocking latest-release signal so in-flight PR checks keep working.
+ - name: Stage legacy artifact for default-branch comment workflow
+ if: always()
+ uses: ./.github/actions/stage-semver-artifact
+ with:
+ artifact_dir: semver-artifact
+ pr_number: ${{ github.event.pull_request.number }}
+ result: ${{ steps.check_semver.outputs.result || 'success' }}
+ baseline_ref: ${{ steps.latest_release.outputs.tag }}
+ latest_release_tag: ${{ steps.latest_release.outputs.tag }}
+ log_path: /tmp/blocking-semver-output.txt
+
+ - name: Upload legacy artifact
if: always()
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
diff --git a/.github/workflows/breaking_changes_detector_comment.yml b/.github/workflows/breaking_changes_detector_comment.yml
index 579c61cb9d5c7..d2178036ead68 100644
--- a/.github/workflows/breaking_changes_detector_comment.yml
+++ b/.github/workflows/breaking_changes_detector_comment.yml
@@ -53,7 +53,8 @@ permissions:
# A dedicated label, separate from the existing `api change` label.
# `api change` may be applied manually for behavioral changes that aren't
# strictly API changes, so we can't safely auto-remove it when this check
-# passes. This auto-managed label is fully owned by the workflow.
+# passes. This auto-managed label is fully owned by the workflow and tracks
+# only the blocking latest-release semver signal.
env:
BREAKING_CHANGE_LABEL: "auto detected api change"
@@ -62,53 +63,113 @@ jobs:
name: Comment on pull request
if: github.event.workflow_run.event == 'pull_request'
runs-on: ubuntu-latest
- # Scoped to the minimum needed to upsert/delete the sticky comment.
+ # Scoped to the minimum needed to upsert/delete the sticky comment and label.
permissions:
actions: read
+ issues: write
pull-requests: write
steps:
- - name: Download semver-check artifact
+ - name: Download advisory semver-check artifact
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
- name: semver-check-result
+ name: semver-advisory-check-result
run-id: ${{ github.event.workflow_run.id }}
github-token: ${{ github.token }}
- path: ./semver-artifact
+ path: ./semver-advisory-artifact
- - name: Read and validate artifact
+ - name: Download blocking semver-check artifact
+ uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
+ with:
+ name: semver-blocking-check-result
+ run-id: ${{ github.event.workflow_run.id }}
+ github-token: ${{ github.token }}
+ path: ./semver-blocking-artifact
+
+ - name: Read and validate artifacts
id: read
run: |
set -euo pipefail
- # Validate every field: the artifact comes from a workflow run
- # that compiled fork-controlled code, so its contents are untrusted.
- PR_NUMBER=$(cat ./semver-artifact/pr_number)
- if ! [[ "$PR_NUMBER" =~ ^[0-9]+$ ]]; then
- echo "Invalid PR number: $PR_NUMBER" >&2
+ require_regex() {
+ local name=$1
+ local value=$2
+ local regex=$3
+ if ! [[ "$value" =~ $regex ]]; then
+ echo "Invalid $name: $value" >&2
+ exit 1
+ fi
+ }
+
+ require_result() {
+ local name=$1
+ local value=$2
+ if [[ "$value" != "success" && "$value" != "failure" ]]; then
+ echo "Invalid $name check result: $value" >&2
+ exit 1
+ fi
+ }
+
+ write_output() {
+ local key=$1
+ local value=$2
+ echo "$key=$value" >> "$GITHUB_OUTPUT"
+ }
+
+ write_multiline_output() {
+ local key=$1
+ local file=$2
+ # Random delimiter so a malicious log line can't close the heredoc
+ # and inject extra output keys. See:
+ # https://docs.github.com/en/actions/reference/workflow-commands-for-github-actions#multiline-strings
+ local delim
+ delim="EOF_$(openssl rand -hex 16)"
+ {
+ echo "${key}<<${delim}"
+ cat "$file"
+ echo "$delim"
+ } >> "$GITHUB_OUTPUT"
+ }
+
+ # Validate every field: the artifacts come from workflow runs
+ # that compiled fork-controlled code, so their contents are untrusted.
+ ADVISORY_PR_NUMBER=$(cat ./semver-advisory-artifact/pr_number)
+ BLOCKING_PR_NUMBER=$(cat ./semver-blocking-artifact/pr_number)
+ require_regex "advisory PR number" "$ADVISORY_PR_NUMBER" '^[0-9]+$'
+ require_regex "blocking PR number" "$BLOCKING_PR_NUMBER" '^[0-9]+$'
+ if [ "$ADVISORY_PR_NUMBER" != "$BLOCKING_PR_NUMBER" ]; then
+ echo "Mismatched PR numbers: $ADVISORY_PR_NUMBER != $BLOCKING_PR_NUMBER" >&2
exit 1
fi
- CHECK_RESULT=$(cat ./semver-artifact/result)
- if [[ "$CHECK_RESULT" != "success" && "$CHECK_RESULT" != "failure" ]]; then
- echo "Invalid check result: $CHECK_RESULT" >&2
+
+ ADVISORY_RESULT=$(cat ./semver-advisory-artifact/result)
+ BLOCKING_RESULT=$(cat ./semver-blocking-artifact/result)
+ require_result "advisory" "$ADVISORY_RESULT"
+ require_result "blocking" "$BLOCKING_RESULT"
+
+ ADVISORY_BASELINE=$(cat ./semver-advisory-artifact/baseline_ref)
+ BLOCKING_BASELINE=$(cat ./semver-blocking-artifact/baseline_ref)
+ LATEST_RELEASE_TAG=$(cat ./semver-blocking-artifact/latest_release_tag)
+ require_regex "advisory baseline" "$ADVISORY_BASELINE" '^[A-Za-z0-9._/-]+$'
+ require_regex "blocking baseline" "$BLOCKING_BASELINE" '^[A-Za-z0-9._/-]+$'
+ require_regex "latest release tag" "$LATEST_RELEASE_TAG" '^[0-9]+\.[0-9]+\.[0-9]+$'
+ if [ "$BLOCKING_BASELINE" != "$LATEST_RELEASE_TAG" ]; then
+ echo "Mismatched blocking baseline and latest release tag: $BLOCKING_BASELINE != $LATEST_RELEASE_TAG" >&2
exit 1
fi
- echo "pr_number=$PR_NUMBER" >> "$GITHUB_OUTPUT"
- echo "result=$CHECK_RESULT" >> "$GITHUB_OUTPUT"
-
- # Multi-line output: random delimiter so a malicious log line can't
- # close the heredoc and inject extra output keys. See:
- # https://docs.github.com/en/actions/reference/workflow-commands-for-github-actions#multiline-strings
- DELIM="EOF_$(openssl rand -hex 16)"
- {
- echo "logs<<${DELIM}"
- cat ./semver-artifact/logs
- echo "${DELIM}"
- } >> "$GITHUB_OUTPUT"
+
+ write_output "pr_number" "$ADVISORY_PR_NUMBER"
+ write_output "advisory_result" "$ADVISORY_RESULT"
+ write_output "blocking_result" "$BLOCKING_RESULT"
+ write_output "advisory_baseline" "$ADVISORY_BASELINE"
+ write_output "blocking_baseline" "$BLOCKING_BASELINE"
+ write_output "latest_release_tag" "$LATEST_RELEASE_TAG"
+ write_multiline_output "advisory_logs" ./semver-advisory-artifact/logs
+ write_multiline_output "blocking_logs" ./semver-blocking-artifact/logs
# The marker `` is what makes the comment
# "sticky": maintain-one-comment uses it to find and replace (or
# delete) the existing comment instead of stacking new ones.
- name: Upsert sticky comment
- if: steps.read.outputs.result != 'success'
+ if: steps.read.outputs.advisory_result != 'success' || steps.read.outputs.blocking_result != 'success'
uses: actions-cool/maintain-one-comment@909842216bc8e8658364c572ec52100f4c2cc50a # v3.3.0
with:
token: ${{ secrets.GITHUB_TOKEN }}
@@ -118,19 +179,52 @@ jobs:
Thank you for opening this pull request!
- Reviewer note: [cargo-semver-checks](https://github.com/obi1kenobi/cargo-semver-checks) reported the current version number is not SemVer-compatible with the changes in this pull request (compared against the base branch).
+ [cargo-semver-checks](https://github.com/obi1kenobi/cargo-semver-checks) reported semver compatibility warnings for changed published crates.
+
+ DataFusion runs two SemVer checks:
+
+ - **Latest release compatibility** compares this PR with the latest stable release tag. This is the blocking user-facing compatibility signal and applies the `${{ env.BREAKING_CHANGE_LABEL }}` label when it fails.
+ - **Base branch compatibility** compares this PR with its base branch. This is advisory reviewer information for API churn on unreleased `main`.
+
+ Interpretation guide:
+
+ - **Base branch warning, latest release passes**: advisory only. The PR changes API that exists on unreleased `main`, but not in the latest release. Reviewers should confirm the unreleased API change is intentional.
+ - **Latest release warning, base branch passes**: blocking. The base branch may already contain an unreleased breaking API change, but users upgrading from the latest release can still be affected.
+
+ ## Latest release compatibility — blocking
+
+ Result: `${{ steps.read.outputs.blocking_result }}`
+ Latest release tag: `${{ steps.read.outputs.latest_release_tag }}`
+
+ A `failure` here means changed published crates are not SemVer-compatible with the latest stable release tag.
+
+
+ Latest release compatibility details
+
+ ```
+ ${{ steps.read.outputs.blocking_logs }}
+ ```
+
+
+
+ ## Base branch compatibility — advisory
+
+ Result: `${{ steps.read.outputs.advisory_result }}`
+ Baseline: `${{ steps.read.outputs.advisory_baseline }}`
+
+ A `failure` here means changed published crates are not SemVer-compatible with the PR base branch. This is advisory review information only.
- Details
+ Base branch compatibility details
```
- ${{ steps.read.outputs.logs }}
+ ${{ steps.read.outputs.advisory_logs }}
```
- name: Delete sticky comment
- if: steps.read.outputs.result == 'success'
+ if: steps.read.outputs.advisory_result == 'success' && steps.read.outputs.blocking_result == 'success'
uses: actions-cool/maintain-one-comment@909842216bc8e8658364c572ec52100f4c2cc50a # v3.3.0
with:
token: ${{ secrets.GITHUB_TOKEN }}
@@ -139,7 +233,7 @@ jobs:
delete: true
- name: Add "auto detected api change" label
- if: steps.read.outputs.result != 'success'
+ if: steps.read.outputs.blocking_result != 'success'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO: ${{ github.repository }}
@@ -149,7 +243,7 @@ jobs:
--add-label "$BREAKING_CHANGE_LABEL"
- name: Remove "auto detected api change" label
- if: steps.read.outputs.result == 'success'
+ if: steps.read.outputs.blocking_result == 'success'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO: ${{ github.repository }}
diff --git a/ci/scripts/changed_crates.sh b/ci/scripts/changed_crates.sh
index 6d014a9492632..17f148e3349a3 100755
--- a/ci/scripts/changed_crates.sh
+++ b/ci/scripts/changed_crates.sh
@@ -24,10 +24,15 @@
# Only published workspace members (those without `publish = false`) are
# considered.
#
-# semver-check
-# Run cargo-semver-checks for the given packages against base_ref.
-# Output and exit code are passed through unchanged; the caller is
-# responsible for capturing/formatting them.
+# latest-release-tag
+# Print the latest stable release tag. RC and other pre-release tags are
+# ignored. Tags must be plain semver values like `53.1.0`.
+#
+# semver-check
+# Run cargo-semver-checks for the given packages against baseline_ref.
+# baseline_ref can be a tag or any git ref. Output and exit code are
+# passed through unchanged; the caller is responsible for capturing and
+# formatting them.
set -euo pipefail
@@ -59,9 +64,25 @@ cmd_changed_crates() {
done <<<"$crates" | xargs
}
+# ── latest-release-tag ──────────────────────────────────────────────
+cmd_latest_release_tag() {
+ local latest_release_tag
+ latest_release_tag=$(git tag --list \
+ | grep -E '^[0-9]+\.[0-9]+\.[0-9]+$' \
+ | sort -V \
+ | tail -n1 || true)
+
+ if [ -z "$latest_release_tag" ]; then
+ echo "No stable release tags found" >&2
+ return 1
+ fi
+
+ echo "$latest_release_tag"
+}
+
# ── semver-check ────────────────────────────────────────────────────
cmd_semver_check() {
- local base_ref="${1:?Usage: changed_crates.sh semver-check }"
+ local base_ref="${1:?Usage: changed_crates.sh semver-check }"
shift
local args=()
@@ -73,11 +94,12 @@ cmd_semver_check() {
}
# ── main ────────────────────────────────────────────────────────────
-cmd="${1:?Usage: changed_crates.sh [args...]}"
+cmd="${1:?Usage: changed_crates.sh [args...]}"
shift
case "$cmd" in
- changed-crates) cmd_changed_crates "$@" ;;
- semver-check) cmd_semver_check "$@" ;;
+ changed-crates) cmd_changed_crates "$@" ;;
+ latest-release-tag) cmd_latest_release_tag "$@" ;;
+ semver-check) cmd_semver_check "$@" ;;
*) echo "Unknown command: $cmd" >&2; exit 1 ;;
esac
diff --git a/ci/scripts/test_changed_crates.sh b/ci/scripts/test_changed_crates.sh
new file mode 100755
index 0000000000000..6d3d5886f5221
--- /dev/null
+++ b/ci/scripts/test_changed_crates.sh
@@ -0,0 +1,113 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -euo pipefail
+
+SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)
+CHANGED_CRATES_SH="$SCRIPT_DIR/changed_crates.sh"
+TMP_ROOT=$(mktemp -d)
+trap 'rm -rf "$TMP_ROOT"' EXIT
+
+setup_git_repo() {
+ local repo_dir=$1
+ git -C "$repo_dir" init --quiet
+ git -C "$repo_dir" config user.email test@example.com
+ git -C "$repo_dir" config user.name test
+ git -C "$repo_dir" commit --quiet --allow-empty -m init
+}
+
+new_git_repo() {
+ local repo_dir
+ repo_dir=$(mktemp -d "$TMP_ROOT/repo.XXXXXX")
+ setup_git_repo "$repo_dir"
+ echo "$repo_dir"
+}
+
+run_latest_release_tag() {
+ local repo_dir=$1
+ (cd "$repo_dir" && "$CHANGED_CRATES_SH" latest-release-tag)
+}
+
+assert_eq() {
+ local expected=$1
+ local actual=$2
+ local message=$3
+ if [ "$actual" != "$expected" ]; then
+ echo "FAIL: $message" >&2
+ echo "expected: $expected" >&2
+ echo "actual: $actual" >&2
+ exit 1
+ fi
+}
+
+tag_repo() {
+ local repo_dir=$1
+ shift
+
+ for tag in "$@"; do
+ git -C "$repo_dir" tag "$tag"
+ done
+}
+
+assert_latest_release_tag() {
+ local test_name=$1
+ local expected=$2
+ shift 2
+
+ local repo_dir
+ repo_dir=$(new_git_repo)
+ tag_repo "$repo_dir" "$@"
+
+ local actual
+ actual=$(run_latest_release_tag "$repo_dir")
+ assert_eq "$expected" "$actual" "$test_name"
+}
+
+assert_latest_release_tag_fails() {
+ local test_name=$1
+ shift
+
+ local repo_dir
+ repo_dir=$(new_git_repo)
+ tag_repo "$repo_dir" "$@"
+
+ if run_latest_release_tag "$repo_dir" >"$TMP_ROOT/out" 2>"$TMP_ROOT/err"; then
+ echo "FAIL: $test_name" >&2
+ exit 1
+ fi
+ assert_eq "No stable release tags found" "$(cat "$TMP_ROOT/err")" "$test_name"
+}
+
+assert_latest_release_tag "stable tag wins over newer RC" \
+ "53.1.0" \
+ "53.0.0" "53.1.0-rc1" "53.1.0" "54.0.0-rc1"
+
+assert_latest_release_tag "semver sort handles double-digit versions" \
+ "10.0.0" \
+ "9.9.9" "10.0.0" "10.0.1-rc1"
+
+assert_latest_release_tag "malformed and namespaced tags are ignored" \
+ "2.0.0" \
+ "ballista-9.0.0" "python-99.0.0" "2.0" "2.0.0" "3.0.0-alpha1"
+
+assert_latest_release_tag_fails "no tags error"
+
+assert_latest_release_tag_fails "only RC tags error" \
+ "53.1.0-rc1" "54.0.0-rc1"
+
+echo "changed_crates.sh tests passed"
diff --git a/datafusion/expr/src/higher_order_function.rs b/datafusion/expr/src/higher_order_function.rs
index 3dc143b8e5211..a14ff61813f37 100644
--- a/datafusion/expr/src/higher_order_function.rs
+++ b/datafusion/expr/src/higher_order_function.rs
@@ -87,8 +87,6 @@ pub struct HigherOrderSignature {
pub type_signature: HigherOrderTypeSignature,
/// The volatility of the function. See [Volatility] for more information.
pub volatility: Volatility,
- /// Whether [HigherOrderUDF::coerce_values_for_lambdas] should be called
- pub coerce_values_for_lambdas: bool,
/// The max number of times to call [HigherOrderUDF::lambda_parameters] before raising an error.
/// Used to guard against implementations that causes an infinite loop by endlessly returning
/// [LambdaParametersProgress::Partial]. Defaults to 256
@@ -103,7 +101,6 @@ impl HigherOrderSignature {
HigherOrderSignature {
type_signature,
volatility,
- coerce_values_for_lambdas: false,
lambda_parameters_max_iterations: LAMBDA_PARAMETERS_MAX_ITERATIONS,
}
}
@@ -113,7 +110,6 @@ impl HigherOrderSignature {
Self {
type_signature: HigherOrderTypeSignature::UserDefined,
volatility,
- coerce_values_for_lambdas: false,
lambda_parameters_max_iterations: LAMBDA_PARAMETERS_MAX_ITERATIONS,
}
}
@@ -123,7 +119,6 @@ impl HigherOrderSignature {
Self {
type_signature: HigherOrderTypeSignature::VariadicAny,
volatility,
- coerce_values_for_lambdas: false,
lambda_parameters_max_iterations: LAMBDA_PARAMETERS_MAX_ITERATIONS,
}
}
@@ -133,18 +128,9 @@ impl HigherOrderSignature {
Self {
type_signature: HigherOrderTypeSignature::Any(arg_count),
volatility,
- coerce_values_for_lambdas: false,
lambda_parameters_max_iterations: LAMBDA_PARAMETERS_MAX_ITERATIONS,
}
}
-
- /// Set [Self::coerce_values_for_lambdas] to true to indicate that [HigherOrderUDF::coerce_values_for_lambdas]
- /// should be called
- pub fn with_coerce_values_for_lambdas(mut self) -> Self {
- self.coerce_values_for_lambdas = true;
-
- self
- }
}
impl PartialEq for dyn HigherOrderUDF {
@@ -621,12 +607,12 @@ pub trait HigherOrderUDF: Debug + DynEq + DynHash + Send + Sync + Any {
///
/// assert_eq!(
/// coerce_to,
- /// vec![
+ /// Some(vec![
/// // return the same type for the array being reduced
/// DataType::new_list(DataType::Float32, true),
/// // coerce the initial value to the output of the merge lambda
/// DataType::Float32,
- /// ]
+ /// ])
/// );
///
/// ```
@@ -636,7 +622,7 @@ pub trait HigherOrderUDF: Debug + DynEq + DynHash + Send + Sync + Any {
///
/// The implementation can assume that some other part of the code has coerced
/// the actual argument types to match [`Self::signature`], except the coercion defined by
- /// [Self::coerce_values_for_lambdas], if applicable.
+ /// [Self::coerce_values_for_lambdas].
///
/// [`HigherOrderFunction`]: crate::expr::HigherOrderFunction
/// [`HigherOrderFunction::lambda_parameters`]: crate::expr::HigherOrderFunction::lambda_parameters
@@ -649,8 +635,7 @@ pub trait HigherOrderUDF: Debug + DynEq + DynHash + Send + Sync + Any {
/// Coerce value arguments of a function call to types that the function can evaluate also taking into
/// account the *output type of it's lambdas*. This differs from [HigherOrderUDF::coerce_value_types]
/// that only has access to the type of it's value arguments because it's called before the output type
- /// of lambdas are known. So that this method is called, the function must have it's
- /// [HigherOrderSignature::coerce_values_for_lambdas] set to true
+ /// of lambdas are known.
///
/// See the [type coercion module](crate::type_coercion)
/// documentation for more details on type coercion
@@ -659,29 +644,27 @@ pub trait HigherOrderUDF: Debug + DynEq + DynHash + Send + Sync + Any {
/// * `fields`: The argument types of the value arguments of this function, or the output type of lambdas
///
/// # Return value
- /// A Vec with the same number of [ValueOrLambda::Value] in `fields`. DataFusion will `CAST` the
- /// function call arguments to these specific types.
+ /// If `Some`, contains a Vec with the same number of [ValueOrLambda::Value] in `fields`.
+ /// DataFusion will `CAST` the function call arguments to these specific types. If `None`, no
+ /// coercion will be applied beyond the one defined by the function signature.
///
/// For example, a flexible array_reduce implementation (see [Self::lambda_parameters] docs), when working
/// with the expression below, may want to coerce it's initial value argument, the *integer* `0`,
- /// to match the output it's merge function, which is a *float*:
+ /// to match the output of it's merge function, which is a *float*:
///
/// `array_reduce([1.2, 2.1], 0, (acc, v) -> acc + v + 1.5, v -> v > 2.0)`
fn coerce_values_for_lambdas(
&self,
_fields: &[ValueOrLambda],
- ) -> Result> {
- not_impl_err!(
- "{} coerce_values_for_lambdas is not implemented",
- self.name()
- )
+ ) -> Result