Skip to content

build

build #1885

Workflow file for this run

name: build
on:
pull_request: {}
push:
branches:
- main
schedule:
- cron: '0 0 * * *'
workflow_dispatch:
inputs:
dataset:
description: "Dataset Size"
required: false
default: "small"
type: choice
options:
- tiny
- small
- large
- mnist
- openorca
- used-cars
- youtube-trending
- meta-kaggle
- chest-xray-pneumonia
revs:
description: "Comma-separated list of DVC revisions"
required: false
default: ""
type: string
clouds:
description: "Run s3/gs/azure benchmarks"
required: false
default: false
type: boolean
env:
DVC_TEST: "true"
FORCE_COLOR: "1"
DATASET: ${{ (github.event_name == 'schedule' && 'mnist') || github.event.inputs.dataset || 'small' }}
REVS: ${{ github.event.inputs.revs || 'main,3.10.0,2.58.2' }}
permissions:
contents: read
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: install requirements
run: |
pip install -U pip
pip install wheel
pip install -r requirements.txt
gen:
runs-on: ubuntu-latest
outputs:
tests: ${{ steps.tests.outputs.tests }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: install reqs
run: |
pip install -U pip
pip install -r requirements.txt
pip install "dvc[testing] @ git+https://github.com/iterative/dvc"
- id: tests
run: echo "tests=$(./scripts/ci/list_tests.sh)" >> $GITHUB_OUTPUT
build:
needs: [gen]
timeout-minutes: 180
name: run ${{ matrix.test.name }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
test: ${{fromJson(needs.gen.outputs.tests)}}
steps:
- uses: actions/setup-python@v5
with:
python-version: "3.10"
- uses: actions/checkout@v4
- name: install requirements
run: |
pip install -r requirements.txt
pip install "dvc[testing] @ git+https://github.com/iterative/dvc"
- name: run benchmarks
shell: bash
env:
DVC_BENCH_AZURE_CONN_STR: ${{ secrets.DVC_BENCH_AZURE_CONN_STR }}
run: pytest --benchmark-save ${{ matrix.test.name }} --benchmark-group-by func --dvc-revs ${REVS} --pyargs ${{ matrix.test.path }} --dataset ${DATASET}
- name: upload raw results
uses: actions/upload-artifact@v3
with:
name: .benchmarks
path: .benchmarks
gen_s3:
runs-on: ubuntu-latest
outputs:
tests: ${{ steps.tests.outputs.tests }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: install reqs
run: |
pip install -U pip
pip install -r requirements.txt
pip install "dvc[testing] @ git+https://github.com/iterative/dvc"
pip install "dvc_s3[tests] @ git+https://github.com/iterative/dvc-s3"
- id: tests
run: echo "tests=$(./scripts/ci/list_tests_cloud.sh s3)" >> $GITHUB_OUTPUT
build_s3:
if: ${{ github.event_name == 'schedule' || github.event.inputs.clouds == 'true' }}
needs: [gen_s3]
strategy:
fail-fast: false
matrix:
test: ${{fromJson(needs.gen_s3.outputs.tests)}}
runs-on: ubuntu-latest
name: run ${{ matrix.test.name }}
timeout-minutes: 480
continue-on-error: true
permissions:
id-token: write
steps:
- name: configure AWS credentials
if: ${{ github.event_name == 'schedule' }}
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::342840881361:role/dvc-bench-gha
role-duration-seconds: 28800
aws-region: us-east-1
- uses: actions/setup-python@v5
with:
python-version: "3.10"
- uses: actions/checkout@v4
- name: install requirements
run: |
pip install -r requirements.txt
pip install "dvc[testing] @ git+https://github.com/iterative/dvc"
pip install "dvc-s3[tests] @ git+https://github.com/iterative/dvc-s3"
- name: configure real S3 DVC env
if: ${{ github.event_name == 'schedule' }}
run: |
echo "DVC_TEST_AWS_REPO_BUCKET=dvc-bench-ci" >> "$GITHUB_ENV"
- name: run benchmarks
shell: bash
run: pytest --benchmark-save ${{ matrix.test.name}} --benchmark-group-by func --dvc-revs ${REVS} --dvc-install-deps s3 --pyargs ${{ matrix.test.path }} --dataset ${DATASET}
- name: upload raw results
uses: actions/upload-artifact@v3
with:
name: .benchmarks
path: .benchmarks
gen_azure:
runs-on: ubuntu-latest
outputs:
tests: ${{ steps.tests.outputs.tests }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: install reqs
run: |
pip install -U pip
pip install -r requirements.txt
pip install "dvc[testing] @ git+https://github.com/iterative/dvc"
pip install "dvc-azure[tests] @ git+https://github.com/iterative/dvc-azure"
- id: tests
run: echo "tests=$(./scripts/ci/list_tests_cloud.sh azure)" >> $GITHUB_OUTPUT
build_azure:
if: ${{ github.event_name == 'schedule' || github.event.inputs.clouds == 'true' }}
needs: [gen_azure]
strategy:
fail-fast: false
matrix:
test: ${{fromJson(needs.gen_azure.outputs.tests)}}
runs-on: ubuntu-latest
name: run ${{ matrix.test.name }}
timeout-minutes: 480
continue-on-error: true
permissions:
id-token: write
environment: ${{ github.event_name == 'schedule' && 'github-actions' || ''}}
steps:
- uses: actions/setup-python@v5
with:
python-version: "3.10"
- uses: actions/checkout@v4
- name: 'Az CLI login'
if: ${{ github.event_name == 'schedule' }}
uses: azure/login@v2
with:
client-id: ${{ vars.AZURE_CLIENT_ID }}
tenant-id: ${{ vars.AZURE_TENANT_ID }}
subscription-id: ${{ vars.AZURE_SUBSCRIPTION_ID }}
- name: 'Set connection string'
if: ${{ github.event_name == 'schedule' }}
id: set-conn-str
run: |
connection_string=$(az storage account show-connection-string -g dvc-bench-ci -n dvcbenchci | jq -c '.connectionString')
echo "::add-mask::$connection_string"
echo "DVC_TEST_AZURE_CONNECTION_STRING=$connection_string" >> $GITHUB_ENV
- name: install requirements
run: |
pip install -r requirements.txt
pip install "dvc[testing] @ git+https://github.com/iterative/dvc"
pip install "dvc-azure[tests] @ git+https://github.com/iterative/dvc-azure"
- name: setup env
if: ${{ github.event_name == 'schedule' }}
run: |
echo "DVC_TEST_AZURE_PATH=az://dvc-bench-ci" >> $GITHUB_ENV
- name: run benchmarks
shell: bash
run: |
pytest --benchmark-save ${{ matrix.test.name}} --benchmark-group-by func --dvc-revs ${REVS} --dvc-install-deps azure --pyargs ${{ matrix.test.path }} --dataset ${DATASET}
- name: upload raw results
uses: actions/upload-artifact@v3
with:
name: .benchmarks
path: .benchmarks
gen_gs:
runs-on: ubuntu-latest
outputs:
tests: ${{ steps.tests.outputs.tests }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: install reqs
run: |
pip install -U pip
pip install -r requirements.txt
pip install "dvc[testing] @ git+https://github.com/iterative/dvc"
pip install "dvc-gs[tests] @ git+https://github.com/iterative/dvc-gs"
- id: tests
run: echo "tests=$(./scripts/ci/list_tests_cloud.sh gs)" >> $GITHUB_OUTPUT
build_gs:
if: ${{ github.event_name == 'schedule' || github.event.inputs.clouds == 'true' }}
needs: [gen_gs]
strategy:
fail-fast: false
matrix:
test: ${{fromJson(needs.gen_gs.outputs.tests)}}
runs-on: ubuntu-latest
name: run ${{ matrix.test.name }}
timeout-minutes: 480
continue-on-error: true
permissions:
id-token: write
steps:
- uses: actions/setup-python@v5
with:
python-version: "3.10"
- uses: actions/checkout@v4
- id: 'auth'
if: ${{ github.event_name == 'schedule' }}
name: 'Authenticate to GCP'
uses: 'google-github-actions/auth@v2.1.3'
with:
create_credentials_file: true
workload_identity_provider: 'projects/385088528371/locations/global/workloadIdentityPools/iterative-sandbox/providers/github'
service_account: 'dvc-bench@iterative-sandbox.iam.gserviceaccount.com'
- name: install requirements
run: |
pip install -r requirements.txt
pip install "dvc[testing] @ git+https://github.com/iterative/dvc"
pip install "dvc-gs[tests] @ git+https://github.com/iterative/dvc-gs"
- name: configure real GS DVC env
if: ${{ github.event_name == 'schedule' }}
run: |
echo "DVC_TEST_GS_BUCKET=dvc-bench" >> "$GITHUB_ENV"
- name: run benchmarks
shell: bash
run: pytest --benchmark-save ${{ matrix.test.name}} --benchmark-group-by func --dvc-revs ${REVS} --dvc-install-deps gs --pyargs ${{ matrix.test.path }} --dataset ${DATASET}
- name: upload raw results
uses: actions/upload-artifact@v3
with:
name: .benchmarks
path: .benchmarks
notify:
if: github.event_name != 'workflow_dispatch' && github.ref == 'refs/heads/main' && failure()
needs: [build, build_s3, build_azure, build_gs]
runs-on: ubuntu-latest
steps:
- name: Slack Notification
uses: rtCamp/action-slack-notify@v2.3.0
env:
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: 'Benchmarks failed on main :confused_dog:'
SLACK_TITLE: ":dvc:"
SLACK_USERNAME: dvc-bench
compare:
name: join results and publish
needs: [build, build_s3, build_azure, build_gs]
if: always() && !contains(needs.*.result, 'failure')
runs-on: ubuntu-latest
environment: aws
permissions:
pages: write
pull-requests: write
contents: write
id-token: write
steps:
- uses: iterative/setup-cml@v3
- uses: actions/setup-python@v5
with:
python-version: "3.10"
- uses: actions/checkout@v4
- name: install requirements
run: |
pip install -U pip
pip install -r requirements.txt
pip install "dvc[testing] @ git+https://github.com/iterative/dvc"
- name: download ubuntu results
uses: actions/download-artifact@v3
- name: compare results
shell: bash
run: ./scripts/ci/gen_html.sh
- name: configure AWS credentials
if: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::260760892802:role/dvc-bench
aws-region: us-east-2
- name: upload to s3
if: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
run: aws s3 cp html/index.html s3://dvc-bench/pages/${{ github.event_name }}/${{ github.run_id }}_${{ github.run_attempt }}.html
- name: generate pages
if: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
run: |
mv html/index.html html/latest.html
mkdir html/workflow_dispatch html/schedule
LATEST_SCHEDULE="$(aws s3 ls s3://dvc-bench/pages/schedule/ --recursive | sort -r | head -n 30 | awk '{print $4}')"
LATEST_DISPATCH="$(aws s3 ls s3://dvc-bench/pages/workflow_dispatch/ --recursive | sort -r | head -n 30 | awk '{print $4}')"
echo "$LATEST_SCHEDULE" | xargs -I '{}' aws s3 cp s3://dvc-bench/'{}' html/schedule
echo "$LATEST_DISPATCH" | xargs -I '{}' aws s3 cp s3://dvc-bench/'{}' html/workflow_dispatch
cp html/schedule/$(echo "$LATEST_SCHEDULE" | head -1 | xargs basename) html/schedule/latest.html
cp html/workflow_dispatch/$(echo "$LATEST_DISPATCH" | head -1 | xargs basename) html/workflow_dispatch/latest.html
- name: generate listings
uses: jayanta525/github-pages-directory-listing@v4.0.0
with:
FOLDER: html
- name: post comment
env:
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
if: ${{ github.event_name == 'pull_request' && ! github.event.pull_request.head.repo.fork }}
run: |
cml comment update --watermark-title='dvc-bench report' report.md
- name: Setup Pages
id: pages
uses: actions/configure-pages@v5
- name: Upload artifact
uses: actions/upload-pages-artifact@v3
with:
path: ./html
deploy:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
permissions:
pages: write
id-token: write
runs-on: ubuntu-latest
needs: compare
if: always() && !contains(needs.*.result, 'failure') && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4