diff --git a/.github/actions/run-semver-check/action.yml b/.github/actions/run-semver-check/action.yml new file mode 100644 index 0000000000000..4fc1ac729d879 --- /dev/null +++ b/.github/actions/run-semver-check/action.yml @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Run semver check +description: Run cargo-semver-checks, preserve logs, and expose success/failure without failing the job. +inputs: + baseline_ref: + description: Baseline git ref or tag for cargo-semver-checks. + required: true + packages: + description: Space-separated package names to check. + required: true + log_path: + description: Path where semver output should be copied. + required: true +outputs: + result: + description: "Semver check result: success or failure." + value: ${{ steps.check.outputs.result }} +runs: + using: composite + steps: + - name: Run cargo-semver-checks + id: check + shell: bash + env: + BASELINE_REF: ${{ inputs.baseline_ref }} + PACKAGES: ${{ inputs.packages }} + LOG_PATH: ${{ inputs.log_path }} + run: | + set +e + # `tee` lets cargo's output stream live into the Actions log + # while we also keep a copy for the PR comment. + ci/scripts/changed_crates.sh semver-check "$BASELINE_REF" $PACKAGES \ + 2>&1 | tee "$LOG_PATH" + EXIT_CODE=${PIPESTATUS[0]} + # Pass the result through an output instead of failing the job: + # semver breakage should surface as a comment/label signal while this + # CI job status stays green. + if [ "$EXIT_CODE" -eq 0 ]; then + echo "result=success" >> "$GITHUB_OUTPUT" + else + echo "result=failure" >> "$GITHUB_OUTPUT" + fi diff --git a/.github/actions/setup-semver-check/action.yml b/.github/actions/setup-semver-check/action.yml new file mode 100644 index 0000000000000..c414214807c46 --- /dev/null +++ b/.github/actions/setup-semver-check/action.yml @@ -0,0 +1,69 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Setup semver check +description: Fetch PR base, determine changed crates, and install tools required by cargo-semver-checks. +inputs: + base_ref: + description: Pull request base branch ref. + required: true + repository: + description: Repository to fetch the base branch from. + required: true +outputs: + packages: + description: Space-separated changed publishable crate names. + value: ${{ steps.changed_crates.outputs.packages }} +runs: + using: composite + steps: + # `origin` may point at a fork (when a contributor runs this locally) or + # at a stale ref. Fetch the base branch from the PR's upstream repo into + # a dedicated `apache/` ref so the baseline is unambiguous and the + # same ref name works locally (`git remote add apache ...`) and in CI. + - name: Fetch base branch + shell: bash + env: + BASE_REF: ${{ inputs.base_ref }} + REPO: ${{ inputs.repository }} + run: git fetch "https://github.com/${REPO}.git" "${BASE_REF}:refs/remotes/apache/${BASE_REF}" + + - name: Determine changed crates + id: changed_crates + shell: bash + env: + BASE_REF: ${{ inputs.base_ref }} + run: | + PACKAGES=$(ci/scripts/changed_crates.sh changed-crates "apache/${BASE_REF}") + echo "packages=$PACKAGES" >> "$GITHUB_OUTPUT" + echo "Changed crates: $PACKAGES" + + # `datafusion-substrait` (and crates that depend on it via sqllogictest) + # have a build script that calls protoc, which is not preinstalled on + # ubuntu-latest runners. + - name: Install Protobuf Compiler + if: steps.changed_crates.outputs.packages != '' + shell: bash + run: | + sudo apt-get update + sudo apt-get install -y protobuf-compiler + + - name: Install cargo-semver-checks + if: steps.changed_crates.outputs.packages != '' + uses: taiki-e/install-action@c070f87102a1c75b3183910f391c1cb887fe13c8 # v2.77.6 + with: + tool: cargo-semver-checks diff --git a/.github/actions/stage-semver-artifact/action.yml b/.github/actions/stage-semver-artifact/action.yml new file mode 100644 index 0000000000000..3e6e4b1d80542 --- /dev/null +++ b/.github/actions/stage-semver-artifact/action.yml @@ -0,0 +1,64 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Stage semver artifact +description: Stage semver result fields and sanitized logs for the comment workflow. +inputs: + artifact_dir: + description: Directory to write artifact files into. + required: true + pr_number: + description: Pull request number. + required: true + result: + description: "Semver result: success or failure." + required: true + baseline_ref: + description: Baseline git ref or tag used by the semver check. + required: true + log_path: + description: Path to raw semver output log. + required: true + latest_release_tag: + description: Latest release tag, for blocking release-baseline checks. + required: false + default: "" +runs: + using: composite + steps: + - name: Stage artifact files + shell: bash + env: + ARTIFACT_DIR: ${{ inputs.artifact_dir }} + PR_NUMBER: ${{ inputs.pr_number }} + RESULT: ${{ inputs.result }} + BASELINE_REF: ${{ inputs.baseline_ref }} + LOG_PATH: ${{ inputs.log_path }} + LATEST_RELEASE_TAG: ${{ inputs.latest_release_tag }} + run: | + mkdir -p "$ARTIFACT_DIR" + echo "$PR_NUMBER" > "$ARTIFACT_DIR/pr_number" + echo "$RESULT" > "$ARTIFACT_DIR/result" + echo "$BASELINE_REF" > "$ARTIFACT_DIR/baseline_ref" + if [ -n "$LATEST_RELEASE_TAG" ]; then + echo "$LATEST_RELEASE_TAG" > "$ARTIFACT_DIR/latest_release_tag" + fi + if [ -f "$LOG_PATH" ]; then + sed 's/\x1b\[[0-9;]*m//g' "$LOG_PATH" > "$ARTIFACT_DIR/logs" + else + : > "$ARTIFACT_DIR/logs" + fi diff --git a/.github/workflows/breaking_changes_detector.yml b/.github/workflows/breaking_changes_detector.yml index 67ab985228b47..41340391d8dfa 100644 --- a/.github/workflows/breaking_changes_detector.yml +++ b/.github/workflows/breaking_changes_detector.yml @@ -50,8 +50,8 @@ permissions: contents: read jobs: - check-semver: - name: Check semver + check-semver-advisory: + name: Check semver - advisory PR-local runs-on: ubuntu-latest steps: - name: Checkout @@ -59,83 +59,112 @@ jobs: with: fetch-depth: 0 - # `origin` may point at a fork (when a contributor runs this locally) or - # at a stale ref. Fetch the base branch from the PR's upstream repo into - # a dedicated `apache/` ref so the baseline is unambiguous and the - # same ref name works locally (`git remote add apache ...`) and in CI. - - name: Fetch base branch - env: - BASE_REF: ${{ github.base_ref }} - REPO: ${{ github.repository }} - run: git fetch "https://github.com/${REPO}.git" "${BASE_REF}:refs/remotes/apache/${BASE_REF}" - - - name: Determine changed crates - id: changed_crates - env: - BASE_REF: ${{ github.base_ref }} - run: | - PACKAGES=$(ci/scripts/changed_crates.sh changed-crates "apache/${BASE_REF}") - echo "packages=$PACKAGES" >> "$GITHUB_OUTPUT" - echo "Changed crates: $PACKAGES" - - # `datafusion-substrait` (and crates that depend on it via sqllogictest) - # have a build script that calls protoc, which is not preinstalled on - # ubuntu-latest runners. - - name: Install Protobuf Compiler - if: steps.changed_crates.outputs.packages != '' - run: | - sudo apt-get update - sudo apt-get install -y protobuf-compiler - - - name: Install cargo-semver-checks - if: steps.changed_crates.outputs.packages != '' - uses: taiki-e/install-action@c070f87102a1c75b3183910f391c1cb887fe13c8 # v2.77.6 + - name: Setup semver check + id: semver_setup + uses: ./.github/actions/setup-semver-check with: - tool: cargo-semver-checks + base_ref: ${{ github.base_ref }} + repository: ${{ github.repository }} - - name: Run cargo-semver-checks + - name: Run cargo-semver-checks against PR base branch id: check_semver - if: steps.changed_crates.outputs.packages != '' - env: - BASE_REF: ${{ github.base_ref }} - PACKAGES: ${{ steps.changed_crates.outputs.packages }} - run: | - set +e - # `tee` lets cargo's output stream live into the Actions log - # while we also keep a copy for the PR comment. - # Using `apache` remote here to point to the repository the pull request is against - ci/scripts/changed_crates.sh semver-check "apache/${BASE_REF}" $PACKAGES \ - 2>&1 | tee /tmp/semver-output.txt - EXIT_CODE=${PIPESTATUS[0]} - # Pass the result through an output instead of failing the job: - # a detected breaking change should surface as a PR comment, not a - # red check, so PR authors aren't confused by an intentional break. - if [ "$EXIT_CODE" -eq 0 ]; then - echo "result=success" >> "$GITHUB_OUTPUT" - else - echo "result=failure" >> "$GITHUB_OUTPUT" - fi + if: steps.semver_setup.outputs.packages != '' + uses: ./.github/actions/run-semver-check + with: + baseline_ref: apache/${{ github.base_ref }} + packages: ${{ steps.semver_setup.outputs.packages }} + log_path: /tmp/advisory-semver-output.txt # Stage the data the companion comment workflow needs into a single # directory. We default the result to "success" so the comment # workflow clears any stale comment when the check step is skipped # (e.g. no published crates changed). - - name: Stage artifact for comment workflow + - name: Stage advisory artifact for comment workflow if: always() - env: - PR_NUMBER: ${{ github.event.pull_request.number }} - CHECK_RESULT: ${{ steps.check_semver.outputs.result || 'success' }} + uses: ./.github/actions/stage-semver-artifact + with: + artifact_dir: semver-advisory-artifact + pr_number: ${{ github.event.pull_request.number }} + result: ${{ steps.check_semver.outputs.result || 'success' }} + baseline_ref: apache/${{ github.base_ref }} + log_path: /tmp/advisory-semver-output.txt + + - name: Upload advisory artifact + if: always() + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: semver-advisory-check-result + path: semver-advisory-artifact/ + retention-days: 1 + + check-semver-blocking: + name: Check semver - blocking latest release + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + + - name: Setup semver check + id: semver_setup + uses: ./.github/actions/setup-semver-check + with: + base_ref: ${{ github.base_ref }} + repository: ${{ github.repository }} + + - name: Determine latest stable release tag + id: latest_release run: | - mkdir -p semver-artifact - echo "$PR_NUMBER" > semver-artifact/pr_number - echo "$CHECK_RESULT" > semver-artifact/result - if [ -f /tmp/semver-output.txt ]; then - sed 's/\x1b\[[0-9;]*m//g' /tmp/semver-output.txt > semver-artifact/logs - else - : > semver-artifact/logs - fi - - - name: Upload artifact + git fetch --tags --force + LATEST_RELEASE_TAG=$(ci/scripts/changed_crates.sh latest-release-tag) + echo "tag=$LATEST_RELEASE_TAG" >> "$GITHUB_OUTPUT" + echo "Latest stable release tag: $LATEST_RELEASE_TAG" + + - name: Run cargo-semver-checks against latest stable release + id: check_semver + if: steps.semver_setup.outputs.packages != '' + uses: ./.github/actions/run-semver-check + with: + baseline_ref: ${{ steps.latest_release.outputs.tag }} + packages: ${{ steps.semver_setup.outputs.packages }} + log_path: /tmp/blocking-semver-output.txt + + - name: Stage blocking artifact for comment workflow + if: always() + uses: ./.github/actions/stage-semver-artifact + with: + artifact_dir: semver-blocking-artifact + pr_number: ${{ github.event.pull_request.number }} + result: ${{ steps.check_semver.outputs.result || 'success' }} + baseline_ref: ${{ steps.latest_release.outputs.tag }} + latest_release_tag: ${{ steps.latest_release.outputs.tag }} + log_path: /tmp/blocking-semver-output.txt + + - name: Upload blocking artifact + if: always() + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: semver-blocking-check-result + path: semver-blocking-artifact/ + retention-days: 1 + + # workflow_run workflows execute from the default branch. While this PR is + # under review, the default-branch comment workflow still expects the old + # single-artifact contract. Upload a legacy-compatible copy sourced from + # the blocking latest-release signal so in-flight PR checks keep working. + - name: Stage legacy artifact for default-branch comment workflow + if: always() + uses: ./.github/actions/stage-semver-artifact + with: + artifact_dir: semver-artifact + pr_number: ${{ github.event.pull_request.number }} + result: ${{ steps.check_semver.outputs.result || 'success' }} + baseline_ref: ${{ steps.latest_release.outputs.tag }} + latest_release_tag: ${{ steps.latest_release.outputs.tag }} + log_path: /tmp/blocking-semver-output.txt + + - name: Upload legacy artifact if: always() uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: diff --git a/.github/workflows/breaking_changes_detector_comment.yml b/.github/workflows/breaking_changes_detector_comment.yml index 579c61cb9d5c7..d2178036ead68 100644 --- a/.github/workflows/breaking_changes_detector_comment.yml +++ b/.github/workflows/breaking_changes_detector_comment.yml @@ -53,7 +53,8 @@ permissions: # A dedicated label, separate from the existing `api change` label. # `api change` may be applied manually for behavioral changes that aren't # strictly API changes, so we can't safely auto-remove it when this check -# passes. This auto-managed label is fully owned by the workflow. +# passes. This auto-managed label is fully owned by the workflow and tracks +# only the blocking latest-release semver signal. env: BREAKING_CHANGE_LABEL: "auto detected api change" @@ -62,53 +63,113 @@ jobs: name: Comment on pull request if: github.event.workflow_run.event == 'pull_request' runs-on: ubuntu-latest - # Scoped to the minimum needed to upsert/delete the sticky comment. + # Scoped to the minimum needed to upsert/delete the sticky comment and label. permissions: actions: read + issues: write pull-requests: write steps: - - name: Download semver-check artifact + - name: Download advisory semver-check artifact uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: - name: semver-check-result + name: semver-advisory-check-result run-id: ${{ github.event.workflow_run.id }} github-token: ${{ github.token }} - path: ./semver-artifact + path: ./semver-advisory-artifact - - name: Read and validate artifact + - name: Download blocking semver-check artifact + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: semver-blocking-check-result + run-id: ${{ github.event.workflow_run.id }} + github-token: ${{ github.token }} + path: ./semver-blocking-artifact + + - name: Read and validate artifacts id: read run: | set -euo pipefail - # Validate every field: the artifact comes from a workflow run - # that compiled fork-controlled code, so its contents are untrusted. - PR_NUMBER=$(cat ./semver-artifact/pr_number) - if ! [[ "$PR_NUMBER" =~ ^[0-9]+$ ]]; then - echo "Invalid PR number: $PR_NUMBER" >&2 + require_regex() { + local name=$1 + local value=$2 + local regex=$3 + if ! [[ "$value" =~ $regex ]]; then + echo "Invalid $name: $value" >&2 + exit 1 + fi + } + + require_result() { + local name=$1 + local value=$2 + if [[ "$value" != "success" && "$value" != "failure" ]]; then + echo "Invalid $name check result: $value" >&2 + exit 1 + fi + } + + write_output() { + local key=$1 + local value=$2 + echo "$key=$value" >> "$GITHUB_OUTPUT" + } + + write_multiline_output() { + local key=$1 + local file=$2 + # Random delimiter so a malicious log line can't close the heredoc + # and inject extra output keys. See: + # https://docs.github.com/en/actions/reference/workflow-commands-for-github-actions#multiline-strings + local delim + delim="EOF_$(openssl rand -hex 16)" + { + echo "${key}<<${delim}" + cat "$file" + echo "$delim" + } >> "$GITHUB_OUTPUT" + } + + # Validate every field: the artifacts come from workflow runs + # that compiled fork-controlled code, so their contents are untrusted. + ADVISORY_PR_NUMBER=$(cat ./semver-advisory-artifact/pr_number) + BLOCKING_PR_NUMBER=$(cat ./semver-blocking-artifact/pr_number) + require_regex "advisory PR number" "$ADVISORY_PR_NUMBER" '^[0-9]+$' + require_regex "blocking PR number" "$BLOCKING_PR_NUMBER" '^[0-9]+$' + if [ "$ADVISORY_PR_NUMBER" != "$BLOCKING_PR_NUMBER" ]; then + echo "Mismatched PR numbers: $ADVISORY_PR_NUMBER != $BLOCKING_PR_NUMBER" >&2 exit 1 fi - CHECK_RESULT=$(cat ./semver-artifact/result) - if [[ "$CHECK_RESULT" != "success" && "$CHECK_RESULT" != "failure" ]]; then - echo "Invalid check result: $CHECK_RESULT" >&2 + + ADVISORY_RESULT=$(cat ./semver-advisory-artifact/result) + BLOCKING_RESULT=$(cat ./semver-blocking-artifact/result) + require_result "advisory" "$ADVISORY_RESULT" + require_result "blocking" "$BLOCKING_RESULT" + + ADVISORY_BASELINE=$(cat ./semver-advisory-artifact/baseline_ref) + BLOCKING_BASELINE=$(cat ./semver-blocking-artifact/baseline_ref) + LATEST_RELEASE_TAG=$(cat ./semver-blocking-artifact/latest_release_tag) + require_regex "advisory baseline" "$ADVISORY_BASELINE" '^[A-Za-z0-9._/-]+$' + require_regex "blocking baseline" "$BLOCKING_BASELINE" '^[A-Za-z0-9._/-]+$' + require_regex "latest release tag" "$LATEST_RELEASE_TAG" '^[0-9]+\.[0-9]+\.[0-9]+$' + if [ "$BLOCKING_BASELINE" != "$LATEST_RELEASE_TAG" ]; then + echo "Mismatched blocking baseline and latest release tag: $BLOCKING_BASELINE != $LATEST_RELEASE_TAG" >&2 exit 1 fi - echo "pr_number=$PR_NUMBER" >> "$GITHUB_OUTPUT" - echo "result=$CHECK_RESULT" >> "$GITHUB_OUTPUT" - - # Multi-line output: random delimiter so a malicious log line can't - # close the heredoc and inject extra output keys. See: - # https://docs.github.com/en/actions/reference/workflow-commands-for-github-actions#multiline-strings - DELIM="EOF_$(openssl rand -hex 16)" - { - echo "logs<<${DELIM}" - cat ./semver-artifact/logs - echo "${DELIM}" - } >> "$GITHUB_OUTPUT" + + write_output "pr_number" "$ADVISORY_PR_NUMBER" + write_output "advisory_result" "$ADVISORY_RESULT" + write_output "blocking_result" "$BLOCKING_RESULT" + write_output "advisory_baseline" "$ADVISORY_BASELINE" + write_output "blocking_baseline" "$BLOCKING_BASELINE" + write_output "latest_release_tag" "$LATEST_RELEASE_TAG" + write_multiline_output "advisory_logs" ./semver-advisory-artifact/logs + write_multiline_output "blocking_logs" ./semver-blocking-artifact/logs # The marker `` is what makes the comment # "sticky": maintain-one-comment uses it to find and replace (or # delete) the existing comment instead of stacking new ones. - name: Upsert sticky comment - if: steps.read.outputs.result != 'success' + if: steps.read.outputs.advisory_result != 'success' || steps.read.outputs.blocking_result != 'success' uses: actions-cool/maintain-one-comment@909842216bc8e8658364c572ec52100f4c2cc50a # v3.3.0 with: token: ${{ secrets.GITHUB_TOKEN }} @@ -118,19 +179,52 @@ jobs: Thank you for opening this pull request! - Reviewer note: [cargo-semver-checks](https://github.com/obi1kenobi/cargo-semver-checks) reported the current version number is not SemVer-compatible with the changes in this pull request (compared against the base branch). + [cargo-semver-checks](https://github.com/obi1kenobi/cargo-semver-checks) reported semver compatibility warnings for changed published crates. + + DataFusion runs two SemVer checks: + + - **Latest release compatibility** compares this PR with the latest stable release tag. This is the blocking user-facing compatibility signal and applies the `${{ env.BREAKING_CHANGE_LABEL }}` label when it fails. + - **Base branch compatibility** compares this PR with its base branch. This is advisory reviewer information for API churn on unreleased `main`. + + Interpretation guide: + + - **Base branch warning, latest release passes**: advisory only. The PR changes API that exists on unreleased `main`, but not in the latest release. Reviewers should confirm the unreleased API change is intentional. + - **Latest release warning, base branch passes**: blocking. The base branch may already contain an unreleased breaking API change, but users upgrading from the latest release can still be affected. + + ## Latest release compatibility — blocking + + Result: `${{ steps.read.outputs.blocking_result }}` + Latest release tag: `${{ steps.read.outputs.latest_release_tag }}` + + A `failure` here means changed published crates are not SemVer-compatible with the latest stable release tag. + +
+ Latest release compatibility details + + ``` + ${{ steps.read.outputs.blocking_logs }} + ``` + +
+ + ## Base branch compatibility — advisory + + Result: `${{ steps.read.outputs.advisory_result }}` + Baseline: `${{ steps.read.outputs.advisory_baseline }}` + + A `failure` here means changed published crates are not SemVer-compatible with the PR base branch. This is advisory review information only.
- Details + Base branch compatibility details ``` - ${{ steps.read.outputs.logs }} + ${{ steps.read.outputs.advisory_logs }} ```
- name: Delete sticky comment - if: steps.read.outputs.result == 'success' + if: steps.read.outputs.advisory_result == 'success' && steps.read.outputs.blocking_result == 'success' uses: actions-cool/maintain-one-comment@909842216bc8e8658364c572ec52100f4c2cc50a # v3.3.0 with: token: ${{ secrets.GITHUB_TOKEN }} @@ -139,7 +233,7 @@ jobs: delete: true - name: Add "auto detected api change" label - if: steps.read.outputs.result != 'success' + if: steps.read.outputs.blocking_result != 'success' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} REPO: ${{ github.repository }} @@ -149,7 +243,7 @@ jobs: --add-label "$BREAKING_CHANGE_LABEL" - name: Remove "auto detected api change" label - if: steps.read.outputs.result == 'success' + if: steps.read.outputs.blocking_result == 'success' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} REPO: ${{ github.repository }} diff --git a/ci/scripts/changed_crates.sh b/ci/scripts/changed_crates.sh index 6d014a9492632..17f148e3349a3 100755 --- a/ci/scripts/changed_crates.sh +++ b/ci/scripts/changed_crates.sh @@ -24,10 +24,15 @@ # Only published workspace members (those without `publish = false`) are # considered. # -# semver-check -# Run cargo-semver-checks for the given packages against base_ref. -# Output and exit code are passed through unchanged; the caller is -# responsible for capturing/formatting them. +# latest-release-tag +# Print the latest stable release tag. RC and other pre-release tags are +# ignored. Tags must be plain semver values like `53.1.0`. +# +# semver-check +# Run cargo-semver-checks for the given packages against baseline_ref. +# baseline_ref can be a tag or any git ref. Output and exit code are +# passed through unchanged; the caller is responsible for capturing and +# formatting them. set -euo pipefail @@ -59,9 +64,25 @@ cmd_changed_crates() { done <<<"$crates" | xargs } +# ── latest-release-tag ────────────────────────────────────────────── +cmd_latest_release_tag() { + local latest_release_tag + latest_release_tag=$(git tag --list \ + | grep -E '^[0-9]+\.[0-9]+\.[0-9]+$' \ + | sort -V \ + | tail -n1 || true) + + if [ -z "$latest_release_tag" ]; then + echo "No stable release tags found" >&2 + return 1 + fi + + echo "$latest_release_tag" +} + # ── semver-check ──────────────────────────────────────────────────── cmd_semver_check() { - local base_ref="${1:?Usage: changed_crates.sh semver-check }" + local base_ref="${1:?Usage: changed_crates.sh semver-check }" shift local args=() @@ -73,11 +94,12 @@ cmd_semver_check() { } # ── main ──────────────────────────────────────────────────────────── -cmd="${1:?Usage: changed_crates.sh [args...]}" +cmd="${1:?Usage: changed_crates.sh [args...]}" shift case "$cmd" in - changed-crates) cmd_changed_crates "$@" ;; - semver-check) cmd_semver_check "$@" ;; + changed-crates) cmd_changed_crates "$@" ;; + latest-release-tag) cmd_latest_release_tag "$@" ;; + semver-check) cmd_semver_check "$@" ;; *) echo "Unknown command: $cmd" >&2; exit 1 ;; esac diff --git a/ci/scripts/test_changed_crates.sh b/ci/scripts/test_changed_crates.sh new file mode 100755 index 0000000000000..6d3d5886f5221 --- /dev/null +++ b/ci/scripts/test_changed_crates.sh @@ -0,0 +1,113 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -euo pipefail + +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) +CHANGED_CRATES_SH="$SCRIPT_DIR/changed_crates.sh" +TMP_ROOT=$(mktemp -d) +trap 'rm -rf "$TMP_ROOT"' EXIT + +setup_git_repo() { + local repo_dir=$1 + git -C "$repo_dir" init --quiet + git -C "$repo_dir" config user.email test@example.com + git -C "$repo_dir" config user.name test + git -C "$repo_dir" commit --quiet --allow-empty -m init +} + +new_git_repo() { + local repo_dir + repo_dir=$(mktemp -d "$TMP_ROOT/repo.XXXXXX") + setup_git_repo "$repo_dir" + echo "$repo_dir" +} + +run_latest_release_tag() { + local repo_dir=$1 + (cd "$repo_dir" && "$CHANGED_CRATES_SH" latest-release-tag) +} + +assert_eq() { + local expected=$1 + local actual=$2 + local message=$3 + if [ "$actual" != "$expected" ]; then + echo "FAIL: $message" >&2 + echo "expected: $expected" >&2 + echo "actual: $actual" >&2 + exit 1 + fi +} + +tag_repo() { + local repo_dir=$1 + shift + + for tag in "$@"; do + git -C "$repo_dir" tag "$tag" + done +} + +assert_latest_release_tag() { + local test_name=$1 + local expected=$2 + shift 2 + + local repo_dir + repo_dir=$(new_git_repo) + tag_repo "$repo_dir" "$@" + + local actual + actual=$(run_latest_release_tag "$repo_dir") + assert_eq "$expected" "$actual" "$test_name" +} + +assert_latest_release_tag_fails() { + local test_name=$1 + shift + + local repo_dir + repo_dir=$(new_git_repo) + tag_repo "$repo_dir" "$@" + + if run_latest_release_tag "$repo_dir" >"$TMP_ROOT/out" 2>"$TMP_ROOT/err"; then + echo "FAIL: $test_name" >&2 + exit 1 + fi + assert_eq "No stable release tags found" "$(cat "$TMP_ROOT/err")" "$test_name" +} + +assert_latest_release_tag "stable tag wins over newer RC" \ + "53.1.0" \ + "53.0.0" "53.1.0-rc1" "53.1.0" "54.0.0-rc1" + +assert_latest_release_tag "semver sort handles double-digit versions" \ + "10.0.0" \ + "9.9.9" "10.0.0" "10.0.1-rc1" + +assert_latest_release_tag "malformed and namespaced tags are ignored" \ + "2.0.0" \ + "ballista-9.0.0" "python-99.0.0" "2.0" "2.0.0" "3.0.0-alpha1" + +assert_latest_release_tag_fails "no tags error" + +assert_latest_release_tag_fails "only RC tags error" \ + "53.1.0-rc1" "54.0.0-rc1" + +echo "changed_crates.sh tests passed" diff --git a/datafusion/expr/src/higher_order_function.rs b/datafusion/expr/src/higher_order_function.rs index 3dc143b8e5211..a14ff61813f37 100644 --- a/datafusion/expr/src/higher_order_function.rs +++ b/datafusion/expr/src/higher_order_function.rs @@ -87,8 +87,6 @@ pub struct HigherOrderSignature { pub type_signature: HigherOrderTypeSignature, /// The volatility of the function. See [Volatility] for more information. pub volatility: Volatility, - /// Whether [HigherOrderUDF::coerce_values_for_lambdas] should be called - pub coerce_values_for_lambdas: bool, /// The max number of times to call [HigherOrderUDF::lambda_parameters] before raising an error. /// Used to guard against implementations that causes an infinite loop by endlessly returning /// [LambdaParametersProgress::Partial]. Defaults to 256 @@ -103,7 +101,6 @@ impl HigherOrderSignature { HigherOrderSignature { type_signature, volatility, - coerce_values_for_lambdas: false, lambda_parameters_max_iterations: LAMBDA_PARAMETERS_MAX_ITERATIONS, } } @@ -113,7 +110,6 @@ impl HigherOrderSignature { Self { type_signature: HigherOrderTypeSignature::UserDefined, volatility, - coerce_values_for_lambdas: false, lambda_parameters_max_iterations: LAMBDA_PARAMETERS_MAX_ITERATIONS, } } @@ -123,7 +119,6 @@ impl HigherOrderSignature { Self { type_signature: HigherOrderTypeSignature::VariadicAny, volatility, - coerce_values_for_lambdas: false, lambda_parameters_max_iterations: LAMBDA_PARAMETERS_MAX_ITERATIONS, } } @@ -133,18 +128,9 @@ impl HigherOrderSignature { Self { type_signature: HigherOrderTypeSignature::Any(arg_count), volatility, - coerce_values_for_lambdas: false, lambda_parameters_max_iterations: LAMBDA_PARAMETERS_MAX_ITERATIONS, } } - - /// Set [Self::coerce_values_for_lambdas] to true to indicate that [HigherOrderUDF::coerce_values_for_lambdas] - /// should be called - pub fn with_coerce_values_for_lambdas(mut self) -> Self { - self.coerce_values_for_lambdas = true; - - self - } } impl PartialEq for dyn HigherOrderUDF { @@ -621,12 +607,12 @@ pub trait HigherOrderUDF: Debug + DynEq + DynHash + Send + Sync + Any { /// /// assert_eq!( /// coerce_to, - /// vec![ + /// Some(vec![ /// // return the same type for the array being reduced /// DataType::new_list(DataType::Float32, true), /// // coerce the initial value to the output of the merge lambda /// DataType::Float32, - /// ] + /// ]) /// ); /// /// ``` @@ -636,7 +622,7 @@ pub trait HigherOrderUDF: Debug + DynEq + DynHash + Send + Sync + Any { /// /// The implementation can assume that some other part of the code has coerced /// the actual argument types to match [`Self::signature`], except the coercion defined by - /// [Self::coerce_values_for_lambdas], if applicable. + /// [Self::coerce_values_for_lambdas]. /// /// [`HigherOrderFunction`]: crate::expr::HigherOrderFunction /// [`HigherOrderFunction::lambda_parameters`]: crate::expr::HigherOrderFunction::lambda_parameters @@ -649,8 +635,7 @@ pub trait HigherOrderUDF: Debug + DynEq + DynHash + Send + Sync + Any { /// Coerce value arguments of a function call to types that the function can evaluate also taking into /// account the *output type of it's lambdas*. This differs from [HigherOrderUDF::coerce_value_types] /// that only has access to the type of it's value arguments because it's called before the output type - /// of lambdas are known. So that this method is called, the function must have it's - /// [HigherOrderSignature::coerce_values_for_lambdas] set to true + /// of lambdas are known. /// /// See the [type coercion module](crate::type_coercion) /// documentation for more details on type coercion @@ -659,29 +644,27 @@ pub trait HigherOrderUDF: Debug + DynEq + DynHash + Send + Sync + Any { /// * `fields`: The argument types of the value arguments of this function, or the output type of lambdas /// /// # Return value - /// A Vec with the same number of [ValueOrLambda::Value] in `fields`. DataFusion will `CAST` the - /// function call arguments to these specific types. + /// If `Some`, contains a Vec with the same number of [ValueOrLambda::Value] in `fields`. + /// DataFusion will `CAST` the function call arguments to these specific types. If `None`, no + /// coercion will be applied beyond the one defined by the function signature. /// /// For example, a flexible array_reduce implementation (see [Self::lambda_parameters] docs), when working /// with the expression below, may want to coerce it's initial value argument, the *integer* `0`, - /// to match the output it's merge function, which is a *float*: + /// to match the output of it's merge function, which is a *float*: /// /// `array_reduce([1.2, 2.1], 0, (acc, v) -> acc + v + 1.5, v -> v > 2.0)` fn coerce_values_for_lambdas( &self, _fields: &[ValueOrLambda], - ) -> Result> { - not_impl_err!( - "{} coerce_values_for_lambdas is not implemented", - self.name() - ) + ) -> Result>> { + Ok(None) } /// What type will be returned by this function, given the arguments? /// /// The implementation can assume that some other part of the code has coerced /// the actual argument types to match [`Self::signature`], including the coercion - /// defined by [Self::coerce_values_for_lambdas], if applicable. + /// defined by [Self::coerce_values_for_lambdas]. /// /// # Example creating `Field` /// diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 86616daf08c73..8c26f23dafe81 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -158,9 +158,9 @@ pub fn fields_with_udf( /// argument must be coerced to match `signature`. /// For lambda arguments, returns a clone of the associated data /// -/// Note this does not invokes [HigherOrderUDF::coerce_values_for_lambdas] -/// if requested by the function signature. If that's required, use -/// [value_fields_with_higher_order_udf_and_lambdas] instead +/// Note this does not invokes [HigherOrderUDF::coerce_values_for_lambdas]. +/// If that's required, use [value_fields_with_higher_order_udf_and_lambdas] +/// instead /// /// For more details on coercion in general, please see the /// [`type_coercion`](crate::type_coercion) module. @@ -235,8 +235,8 @@ pub fn value_fields_with_higher_order_udf( /// Performs type coercion for higher order function arguments, /// including those defined by [HigherOrderUDF::coerce_values_for_lambdas], -/// if defined by the signature. Note that compared to -/// [value_fields_with_higher_order_udf], this function requires +/// if it returns `Some(...)` instead of the default `None`. Note that +/// compared to [value_fields_with_higher_order_udf], this function requires /// the [ValueOrLambda::Lambda] variant to contain the output field of the lambda. /// /// For value arguments, returns the field to which each @@ -251,16 +251,16 @@ pub fn value_fields_with_higher_order_udf_and_lambdas( ) -> Result>> { let mut new_fields = value_fields_with_higher_order_udf(current_fields, func)?; - if func.signature().coerce_values_for_lambdas { - let new_types = new_fields - .iter() - .map(|f| match f { - ValueOrLambda::Value(f) => ValueOrLambda::Value(f.data_type().clone()), - ValueOrLambda::Lambda(f) => ValueOrLambda::Lambda(f.data_type().clone()), - }) - .collect::>(); + let new_types = new_fields + .iter() + .map(|f| match f { + ValueOrLambda::Value(f) => ValueOrLambda::Value(f.data_type().clone()), + ValueOrLambda::Lambda(f) => ValueOrLambda::Lambda(f.data_type().clone()), + }) + .collect::>(); - let mut new_value_types = func.coerce_values_for_lambdas(&new_types)?.into_iter(); + if let Some(new_value_types) = func.coerce_values_for_lambdas(&new_types)? { + let mut new_value_types = new_value_types.into_iter(); let value_types_count = new_types .iter() @@ -1851,7 +1851,7 @@ mod tests { fn coerce_values_for_lambdas( &self, fields: &[ValueOrLambda], - ) -> Result> { + ) -> Result>> { // thoerical impl of array_reduce without finish let [ ValueOrLambda::Value(list), @@ -1862,7 +1862,7 @@ mod tests { unreachable!() }; - Ok(vec![list.clone(), merge.clone()]) + Ok(Some(vec![list.clone(), merge.clone()])) } fn lambda_parameters( @@ -1925,8 +1925,7 @@ mod tests { #[test] fn test_higher_order_function_coerce_values_for_lambdas() { let fun = MockHigherOrderUDF { - signature: HigherOrderSignature::variadic_any(Volatility::Immutable) - .with_coerce_values_for_lambdas(), + signature: HigherOrderSignature::variadic_any(Volatility::Immutable), coerced_value_types: vec![], };