diff --git a/.github/workflows/check-binary-licenses.yml b/.github/workflows/check-binary-licenses.yml deleted file mode 100644 index ed033c4f76b..00000000000 --- a/.github/workflows/check-binary-licenses.yml +++ /dev/null @@ -1,180 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Check binary dependency licenses - -on: - pull_request: - workflow_dispatch: - -permissions: - contents: read - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} - -jobs: - check-jvm-deps: - name: Check JVM dependencies - runs-on: ubuntu-22.04 - env: - JAVA_OPTS: -Xms2048M -Xmx2048M -Xss6M -XX:ReservedCodeCacheSize=256M -Dfile.encoding=UTF-8 - JVM_OPTS: -Xms2048M -Xmx2048M -Xss6M -XX:ReservedCodeCacheSize=256M -Dfile.encoding=UTF-8 - services: - postgres: - image: postgres - env: - POSTGRES_PASSWORD: postgres - ports: - - 5432:5432 - options: >- - --health-cmd="pg_isready -U postgres" - --health-interval=10s - --health-timeout=5s - --health-retries=5 - steps: - - uses: actions/checkout@v5 - - uses: actions/setup-java@v5 - with: - distribution: temurin - java-version: 11 - - uses: actions/setup-python@v6 - with: - python-version: '3.12' - - uses: sbt/setup-sbt@508b753e53cb6095967669e0911487d2b9bc9f41 # v1.1.22 - - uses: coursier/cache-action@90c37294538be80a558fd665531fcdc2b467b475 # v8.1.0 - with: - extraSbtFiles: '["*.sbt", "project/**.{scala,sbt}", "project/build.properties" ]' - - name: Create databases (required by sbt dist wiring) - run: | - psql -h localhost -U postgres -f sql/texera_ddl.sql - psql -h localhost -U postgres -f sql/iceberg_postgres_catalog.sql - psql -h localhost -U postgres -f sql/texera_lakefs.sql - env: - PGPASSWORD: postgres - - name: Build distributable bundles - # Build every dist-producing module so the union of bundled jars can - # be diffed against LICENSE-binary. - run: sbt 'clean; ConfigService/dist; AccessControlService/dist; FileService/dist; ComputingUnitManagingService/dist; WorkflowCompilingService/dist; WorkflowExecutionService/dist' - - name: Unzip each dist - run: | - mkdir -p /tmp/dists - for zip in \ - config-service/target/universal/config-service-*.zip \ - access-control-service/target/universal/access-control-service-*.zip \ - file-service/target/universal/file-service-*.zip \ - computing-unit-managing-service/target/universal/computing-unit-managing-service-*.zip \ - workflow-compiling-service/target/universal/workflow-compiling-service-*.zip \ - amber/target/universal/amber-*.zip; do - unzip -q "$zip" -d /tmp/dists/ - done - - name: Check bundled jars against LICENSE-binary - run: | - ./bin/licensing/check_binary_deps.py jar \ - /tmp/dists/config-service-*/lib \ - /tmp/dists/access-control-service-*/lib \ - /tmp/dists/file-service-*/lib \ - /tmp/dists/computing-unit-managing-service-*/lib \ - /tmp/dists/workflow-compiling-service-*/lib \ - /tmp/dists/amber-*/lib - - - name: Audit per-dep license preservation (advisory) - # Surface jars whose upstream LICENSE preserves embedded third-party - # copyrights so reviewers can confirm a per-dep licenses/LICENSE-*.txt - # file is in place. Advisory: prints findings, does not fail the job. - if: always() - run: | - ./bin/licensing/audit_jar_licenses.py \ - /tmp/dists/config-service-*/lib \ - /tmp/dists/access-control-service-*/lib \ - /tmp/dists/file-service-*/lib \ - /tmp/dists/computing-unit-managing-service-*/lib \ - /tmp/dists/workflow-compiling-service-*/lib \ - /tmp/dists/amber-*/lib - - check-npm-deps: - name: Check npm dependencies - # Mirrors the frontend job in .github/workflows/github-action-build.yml. - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v5 - - uses: actions/setup-node@v5 - with: - node-version: 20.19.0 - architecture: x64 - - uses: actions/cache@v4 - with: - path: frontend/.yarn/cache - key: ${{ runner.os }}-x64-20.19.0-yarn-cache-v4-${{ hashFiles('**/yarn.lock') }} - restore-keys: | - ${{ runner.os }}-x64-20.19.0-yarn-cache-v4- - - name: Prepare Yarn 4.14.1 - run: corepack enable && corepack prepare yarn@4.14.1 --activate - - uses: actions/setup-python@v6 - with: - python-version: '3.12' - - name: Install frontend dependencies - timeout-minutes: 20 - run: yarn --cwd frontend install --immutable --inline-builds --network-timeout=100000 - - name: Production build (emits 3rdpartylicenses.json) - run: yarn --cwd frontend run build:ci - - name: Check bundled npm packages against LICENSE-binary - run: ./bin/licensing/check_binary_deps.py npm frontend/dist/3rdpartylicenses.json - - check-python-deps: - name: Check Python dependencies - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v5 - - uses: actions/setup-python@v6 - with: - python-version: '3.12' - - name: Install Python dependencies - run: | - python -m pip install --upgrade pip - pip install -r amber/requirements.txt - pip install -r amber/operator-requirements.txt - pip install pip-licenses - - name: Generate pip-licenses manifest - run: pip-licenses --format=csv --ignore-packages pip-licenses prettytable wcwidth > /tmp/pip-licenses.csv - - name: Check installed Python packages against LICENSE-binary - run: ./bin/licensing/check_binary_deps.py python /tmp/pip-licenses.csv - - check-agent-service-deps: - name: Check agent-service dependencies - # Mirrors the agent-service job in .github/workflows/github-action-build.yml. - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v5 - - name: Setup Bun - run: | - curl -fsSL https://bun.sh/install | bash -s -- bun-v1.3.3 - echo "$HOME/.bun/bin" >> $GITHUB_PATH - - uses: actions/setup-python@v6 - with: - python-version: '3.12' - - name: Install agent-service production dependencies - working-directory: agent-service - run: bun install --production --frozen-lockfile - - name: Generate license manifest - working-directory: agent-service - run: | - mkdir -p dist - bun run bin/collect-licenses.ts > dist/3rdpartylicenses.json - - name: Check bundled agent-service packages against LICENSE-binary - run: ./bin/licensing/check_binary_deps.py agent-npm agent-service/dist/3rdpartylicenses.json diff --git a/.github/workflows/github-action-build.yml b/.github/workflows/github-action-build.yml index f7a72b8da5a..a0782f71aa8 100644 --- a/.github/workflows/github-action-build.yml +++ b/.github/workflows/github-action-build.yml @@ -73,10 +73,13 @@ jobs: run: yarn --cwd frontend install --immutable --inline-builds --network-timeout=100000 - name: Lint with Prettier & ESLint run: yarn --cwd frontend format:ci - - name: Run frontend unit tests - run: yarn --cwd frontend run test:ci - name: Prod build run: yarn --cwd frontend run build:ci + - name: Check bundled npm packages against LICENSE-binary + if: matrix.os == 'ubuntu-latest' + run: ./bin/licensing/check_binary_deps.py npm frontend/dist/3rdpartylicenses.json + - name: Run frontend unit tests + run: yarn --cwd frontend run test:ci scala: strategy: @@ -134,6 +137,41 @@ jobs: psql -h localhost -U postgres -f sql/texera_lakefs.sql env: PGPASSWORD: postgres + - name: Build distributable bundles for license check + # Build every dist-producing module so the union of bundled jars can + # be diffed against LICENSE-binary. + run: sbt 'clean; ConfigService/dist; AccessControlService/dist; FileService/dist; ComputingUnitManagingService/dist; WorkflowCompilingService/dist; WorkflowExecutionService/dist' + - name: Unzip JVM distributable bundles + run: | + mkdir -p /tmp/dists + for zip in \ + config-service/target/universal/config-service-*.zip \ + access-control-service/target/universal/access-control-service-*.zip \ + file-service/target/universal/file-service-*.zip \ + computing-unit-managing-service/target/universal/computing-unit-managing-service-*.zip \ + workflow-compiling-service/target/universal/workflow-compiling-service-*.zip \ + amber/target/universal/amber-*.zip; do + unzip -q "$zip" -d /tmp/dists/ + done + - name: Check bundled jars against LICENSE-binary + run: | + ./bin/licensing/check_binary_deps.py jar \ + /tmp/dists/config-service-*/lib \ + /tmp/dists/access-control-service-*/lib \ + /tmp/dists/file-service-*/lib \ + /tmp/dists/computing-unit-managing-service-*/lib \ + /tmp/dists/workflow-compiling-service-*/lib \ + /tmp/dists/amber-*/lib + - name: Audit per-dep license preservation (advisory) + if: always() + run: | + ./bin/licensing/audit_jar_licenses.py \ + /tmp/dists/config-service-*/lib \ + /tmp/dists/access-control-service-*/lib \ + /tmp/dists/file-service-*/lib \ + /tmp/dists/computing-unit-managing-service-*/lib \ + /tmp/dists/workflow-compiling-service-*/lib \ + /tmp/dists/amber-*/lib - name: Create texera_db_for_test_cases run: psql -h localhost -U postgres -v DB_NAME=texera_db_for_test_cases -f sql/texera_ddl.sql env: @@ -167,6 +205,13 @@ jobs: python -m pip install --upgrade pip if [ -f amber/requirements.txt ]; then pip install -r amber/requirements.txt; fi if [ -f amber/operator-requirements.txt ]; then pip install -r amber/operator-requirements.txt; fi + if [ "${{ matrix.python-version }}" = "3.12" ]; then pip install pip-licenses; fi + - name: Generate pip-licenses manifest + if: matrix.python-version == '3.12' + run: pip-licenses --format=csv --ignore-packages pip-licenses prettytable wcwidth > /tmp/pip-licenses.csv + - name: Check installed Python packages against LICENSE-binary + if: matrix.python-version == '3.12' + run: ./bin/licensing/check_binary_deps.py python /tmp/pip-licenses.csv - name: Install PostgreSQL run: sudo apt-get update && sudo apt-get install -y postgresql - name: Start PostgreSQL Service @@ -198,7 +243,17 @@ jobs: run: | curl -fsSL https://bun.sh/install | bash -s -- bun-v${{ matrix.bun-version }} echo "$HOME/.bun/bin" >> $GITHUB_PATH - - name: Install dependencies + - name: Install production dependencies + run: bun install --production --frozen-lockfile + - name: Generate agent-service license manifest + if: matrix.os == 'ubuntu-latest' + run: | + mkdir -p dist + bun run bin/collect-licenses.ts > dist/3rdpartylicenses.json + - name: Check bundled agent-service packages against LICENSE-binary + if: matrix.os == 'ubuntu-latest' + run: ../bin/licensing/check_binary_deps.py agent-npm dist/3rdpartylicenses.json + - name: Install development dependencies run: bun install --frozen-lockfile - name: Lint with Prettier run: bun run format:check