From 8ae2e0ccff2d3e0e97f61537315606c5ae8ed02a Mon Sep 17 00:00:00 2001 From: Yicong Huang <17627829+Yicong-Huang@users.noreply.github.com> Date: Fri, 1 May 2026 00:33:25 -0700 Subject: [PATCH] ci: merge binary license checks into build workflow (#4597) ### What changes were proposed in this PR? This PR merges the standalone binary dependency license checks into the existing `Build` workflow. Instead of running a separate `Check binary dependency licenses` workflow with four extra jobs, the license checks now run inside the existing build jobs: - npm license validation runs in the Linux `frontend` job after the production build. - JVM license validation runs in the `scala` job after backend tests. - Python license validation runs in the Python 3.12 matrix entry. - agent-service license validation runs in the Linux `agent-service` job. The dedicated `.github/workflows/check-binary-licenses.yml` workflow is removed so PR checks are less noisy while keeping the underlying validations. ### Any related issues, documentation, discussions? Closes #4596 ### How was this PR tested? Not run locally. This change only updates GitHub Actions workflow configuration, and the expected validation path is to let GitHub Actions execute the updated workflow on the PR. ### Was this PR authored or co-authored using generative AI tooling? Generated-by: Codex GPT-5 --- .github/workflows/check-binary-licenses.yml | 180 -------------------- .github/workflows/github-action-build.yml | 61 ++++++- 2 files changed, 58 insertions(+), 183 deletions(-) delete mode 100644 .github/workflows/check-binary-licenses.yml diff --git a/.github/workflows/check-binary-licenses.yml b/.github/workflows/check-binary-licenses.yml deleted file mode 100644 index ed033c4f76b..00000000000 --- a/.github/workflows/check-binary-licenses.yml +++ /dev/null @@ -1,180 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Check binary dependency licenses - -on: - pull_request: - workflow_dispatch: - -permissions: - contents: read - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} - -jobs: - check-jvm-deps: - name: Check JVM dependencies - runs-on: ubuntu-22.04 - env: - JAVA_OPTS: -Xms2048M -Xmx2048M -Xss6M -XX:ReservedCodeCacheSize=256M -Dfile.encoding=UTF-8 - JVM_OPTS: -Xms2048M -Xmx2048M -Xss6M -XX:ReservedCodeCacheSize=256M -Dfile.encoding=UTF-8 - services: - postgres: - image: postgres - env: - POSTGRES_PASSWORD: postgres - ports: - - 5432:5432 - options: >- - --health-cmd="pg_isready -U postgres" - --health-interval=10s - --health-timeout=5s - --health-retries=5 - steps: - - uses: actions/checkout@v5 - - uses: actions/setup-java@v5 - with: - distribution: temurin - java-version: 11 - - uses: actions/setup-python@v6 - with: - python-version: '3.12' - - uses: sbt/setup-sbt@508b753e53cb6095967669e0911487d2b9bc9f41 # v1.1.22 - - uses: coursier/cache-action@90c37294538be80a558fd665531fcdc2b467b475 # v8.1.0 - with: - extraSbtFiles: '["*.sbt", "project/**.{scala,sbt}", "project/build.properties" ]' - - name: Create databases (required by sbt dist wiring) - run: | - psql -h localhost -U postgres -f sql/texera_ddl.sql - psql -h localhost -U postgres -f sql/iceberg_postgres_catalog.sql - psql -h localhost -U postgres -f sql/texera_lakefs.sql - env: - PGPASSWORD: postgres - - name: Build distributable bundles - # Build every dist-producing module so the union of bundled jars can - # be diffed against LICENSE-binary. - run: sbt 'clean; ConfigService/dist; AccessControlService/dist; FileService/dist; ComputingUnitManagingService/dist; WorkflowCompilingService/dist; WorkflowExecutionService/dist' - - name: Unzip each dist - run: | - mkdir -p /tmp/dists - for zip in \ - config-service/target/universal/config-service-*.zip \ - access-control-service/target/universal/access-control-service-*.zip \ - file-service/target/universal/file-service-*.zip \ - computing-unit-managing-service/target/universal/computing-unit-managing-service-*.zip \ - workflow-compiling-service/target/universal/workflow-compiling-service-*.zip \ - amber/target/universal/amber-*.zip; do - unzip -q "$zip" -d /tmp/dists/ - done - - name: Check bundled jars against LICENSE-binary - run: | - ./bin/licensing/check_binary_deps.py jar \ - /tmp/dists/config-service-*/lib \ - /tmp/dists/access-control-service-*/lib \ - /tmp/dists/file-service-*/lib \ - /tmp/dists/computing-unit-managing-service-*/lib \ - /tmp/dists/workflow-compiling-service-*/lib \ - /tmp/dists/amber-*/lib - - - name: Audit per-dep license preservation (advisory) - # Surface jars whose upstream LICENSE preserves embedded third-party - # copyrights so reviewers can confirm a per-dep licenses/LICENSE-*.txt - # file is in place. Advisory: prints findings, does not fail the job. - if: always() - run: | - ./bin/licensing/audit_jar_licenses.py \ - /tmp/dists/config-service-*/lib \ - /tmp/dists/access-control-service-*/lib \ - /tmp/dists/file-service-*/lib \ - /tmp/dists/computing-unit-managing-service-*/lib \ - /tmp/dists/workflow-compiling-service-*/lib \ - /tmp/dists/amber-*/lib - - check-npm-deps: - name: Check npm dependencies - # Mirrors the frontend job in .github/workflows/github-action-build.yml. - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v5 - - uses: actions/setup-node@v5 - with: - node-version: 20.19.0 - architecture: x64 - - uses: actions/cache@v4 - with: - path: frontend/.yarn/cache - key: ${{ runner.os }}-x64-20.19.0-yarn-cache-v4-${{ hashFiles('**/yarn.lock') }} - restore-keys: | - ${{ runner.os }}-x64-20.19.0-yarn-cache-v4- - - name: Prepare Yarn 4.14.1 - run: corepack enable && corepack prepare yarn@4.14.1 --activate - - uses: actions/setup-python@v6 - with: - python-version: '3.12' - - name: Install frontend dependencies - timeout-minutes: 20 - run: yarn --cwd frontend install --immutable --inline-builds --network-timeout=100000 - - name: Production build (emits 3rdpartylicenses.json) - run: yarn --cwd frontend run build:ci - - name: Check bundled npm packages against LICENSE-binary - run: ./bin/licensing/check_binary_deps.py npm frontend/dist/3rdpartylicenses.json - - check-python-deps: - name: Check Python dependencies - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v5 - - uses: actions/setup-python@v6 - with: - python-version: '3.12' - - name: Install Python dependencies - run: | - python -m pip install --upgrade pip - pip install -r amber/requirements.txt - pip install -r amber/operator-requirements.txt - pip install pip-licenses - - name: Generate pip-licenses manifest - run: pip-licenses --format=csv --ignore-packages pip-licenses prettytable wcwidth > /tmp/pip-licenses.csv - - name: Check installed Python packages against LICENSE-binary - run: ./bin/licensing/check_binary_deps.py python /tmp/pip-licenses.csv - - check-agent-service-deps: - name: Check agent-service dependencies - # Mirrors the agent-service job in .github/workflows/github-action-build.yml. - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v5 - - name: Setup Bun - run: | - curl -fsSL https://bun.sh/install | bash -s -- bun-v1.3.3 - echo "$HOME/.bun/bin" >> $GITHUB_PATH - - uses: actions/setup-python@v6 - with: - python-version: '3.12' - - name: Install agent-service production dependencies - working-directory: agent-service - run: bun install --production --frozen-lockfile - - name: Generate license manifest - working-directory: agent-service - run: | - mkdir -p dist - bun run bin/collect-licenses.ts > dist/3rdpartylicenses.json - - name: Check bundled agent-service packages against LICENSE-binary - run: ./bin/licensing/check_binary_deps.py agent-npm agent-service/dist/3rdpartylicenses.json diff --git a/.github/workflows/github-action-build.yml b/.github/workflows/github-action-build.yml index f7a72b8da5a..a0782f71aa8 100644 --- a/.github/workflows/github-action-build.yml +++ b/.github/workflows/github-action-build.yml @@ -73,10 +73,13 @@ jobs: run: yarn --cwd frontend install --immutable --inline-builds --network-timeout=100000 - name: Lint with Prettier & ESLint run: yarn --cwd frontend format:ci - - name: Run frontend unit tests - run: yarn --cwd frontend run test:ci - name: Prod build run: yarn --cwd frontend run build:ci + - name: Check bundled npm packages against LICENSE-binary + if: matrix.os == 'ubuntu-latest' + run: ./bin/licensing/check_binary_deps.py npm frontend/dist/3rdpartylicenses.json + - name: Run frontend unit tests + run: yarn --cwd frontend run test:ci scala: strategy: @@ -134,6 +137,41 @@ jobs: psql -h localhost -U postgres -f sql/texera_lakefs.sql env: PGPASSWORD: postgres + - name: Build distributable bundles for license check + # Build every dist-producing module so the union of bundled jars can + # be diffed against LICENSE-binary. + run: sbt 'clean; ConfigService/dist; AccessControlService/dist; FileService/dist; ComputingUnitManagingService/dist; WorkflowCompilingService/dist; WorkflowExecutionService/dist' + - name: Unzip JVM distributable bundles + run: | + mkdir -p /tmp/dists + for zip in \ + config-service/target/universal/config-service-*.zip \ + access-control-service/target/universal/access-control-service-*.zip \ + file-service/target/universal/file-service-*.zip \ + computing-unit-managing-service/target/universal/computing-unit-managing-service-*.zip \ + workflow-compiling-service/target/universal/workflow-compiling-service-*.zip \ + amber/target/universal/amber-*.zip; do + unzip -q "$zip" -d /tmp/dists/ + done + - name: Check bundled jars against LICENSE-binary + run: | + ./bin/licensing/check_binary_deps.py jar \ + /tmp/dists/config-service-*/lib \ + /tmp/dists/access-control-service-*/lib \ + /tmp/dists/file-service-*/lib \ + /tmp/dists/computing-unit-managing-service-*/lib \ + /tmp/dists/workflow-compiling-service-*/lib \ + /tmp/dists/amber-*/lib + - name: Audit per-dep license preservation (advisory) + if: always() + run: | + ./bin/licensing/audit_jar_licenses.py \ + /tmp/dists/config-service-*/lib \ + /tmp/dists/access-control-service-*/lib \ + /tmp/dists/file-service-*/lib \ + /tmp/dists/computing-unit-managing-service-*/lib \ + /tmp/dists/workflow-compiling-service-*/lib \ + /tmp/dists/amber-*/lib - name: Create texera_db_for_test_cases run: psql -h localhost -U postgres -v DB_NAME=texera_db_for_test_cases -f sql/texera_ddl.sql env: @@ -167,6 +205,13 @@ jobs: python -m pip install --upgrade pip if [ -f amber/requirements.txt ]; then pip install -r amber/requirements.txt; fi if [ -f amber/operator-requirements.txt ]; then pip install -r amber/operator-requirements.txt; fi + if [ "${{ matrix.python-version }}" = "3.12" ]; then pip install pip-licenses; fi + - name: Generate pip-licenses manifest + if: matrix.python-version == '3.12' + run: pip-licenses --format=csv --ignore-packages pip-licenses prettytable wcwidth > /tmp/pip-licenses.csv + - name: Check installed Python packages against LICENSE-binary + if: matrix.python-version == '3.12' + run: ./bin/licensing/check_binary_deps.py python /tmp/pip-licenses.csv - name: Install PostgreSQL run: sudo apt-get update && sudo apt-get install -y postgresql - name: Start PostgreSQL Service @@ -198,7 +243,17 @@ jobs: run: | curl -fsSL https://bun.sh/install | bash -s -- bun-v${{ matrix.bun-version }} echo "$HOME/.bun/bin" >> $GITHUB_PATH - - name: Install dependencies + - name: Install production dependencies + run: bun install --production --frozen-lockfile + - name: Generate agent-service license manifest + if: matrix.os == 'ubuntu-latest' + run: | + mkdir -p dist + bun run bin/collect-licenses.ts > dist/3rdpartylicenses.json + - name: Check bundled agent-service packages against LICENSE-binary + if: matrix.os == 'ubuntu-latest' + run: ../bin/licensing/check_binary_deps.py agent-npm dist/3rdpartylicenses.json + - name: Install development dependencies run: bun install --frozen-lockfile - name: Lint with Prettier run: bun run format:check