Skip to content

Commit

Permalink
Refactor performance regression workflow (#283)
Browse files Browse the repository at this point in the history
  • Loading branch information
popematt committed Aug 22, 2023
1 parent c73cf2a commit 3354034
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 142 deletions.
29 changes: 0 additions & 29 deletions .github/workflows/PR-content-check.yml

This file was deleted.

214 changes: 101 additions & 113 deletions .github/workflows/performance-regression.yml
Original file line number Diff line number Diff line change
@@ -1,152 +1,140 @@
name: Performance Regression Detector
name: Performance Regression

on:
pull_request:
branches: [ master ]
paths:
# Workflow will only run if something in this path is changed
- 'amazon/**'

jobs:
pr-content-check:
uses: ./.github/workflows/PR-content-check.yml
env:
report_statistics: 'file_size,time_mean,time_error,ops/s_mean,ops/s_error,memory_usage_peak'
compare_statistics: 'file_size,time_mean'
data_size: '100'
spec_defaults: '{warmups:100,iterations:100}'
specs: '{command:read,format:ion_text} {command:write,format:ion_text} {command:read,format:ion_binary} {command:write,format:ion_binary}'
test_data_id: 'generated-test-data'
run_cli: 'python amazon/ionbenchmark/ion_benchmark_cli.py'

detect-regression:
name: Detect Regression
# commented out while working on workflow itself
# todo: re-enable when skipping is valued above running all the time
# needs: PR-Content-Check
# if: ${{ needs.PR-Content-Check.outputs.result == 'pass' }}
runs-on: macos-latest
strategy:
matrix:
python-version: ['3.9', '3.10', 'pypy-3.7', 'pypy-3.8']
fail-fast: false
steps:
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}

jobs:
generate-test-data:
name: Generate Data
runs-on: ubuntu-latest
steps:
# Generates data used for benchmarking
- name: Checkout ion-data-generator
uses: actions/checkout@v3
with:
repository: amazon-ion/ion-data-generator
ref: main
path: ion-data-generator

- name: Build ion-data-generator
run: cd ion-data-generator && mvn clean install

run: mvn clean install
- name: Generate test Ion Data
env:
jar_file: ion-data-generator/target/ion-data-generator-1.0-SNAPSHOT.jar
schema_dir: ion-data-generator/tst/com/amazon/ion/workflow
jar_file: target/ion-data-generator-1.0-SNAPSHOT.jar
schema_dir: tst/com/amazon/ion/workflow
run: |
mkdir -p testData
for test in nestedStruct nestedList sexp realWorldDataSchema01 realWorldDataSchema02 realWorldDataSchema03
do
java -jar $jar_file generate -S 50000 --input-ion-schema $schema_dir/${test}.isl testData/${test}.10n
java -jar $jar_file generate -S ${{env.data_size}} --input-ion-schema $schema_dir/${test}.isl testData/${test}.10n
done
- name: Upload test Ion Data to artifacts
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v3
with:
name: test Ion Data
name: ${{env.test_data_id}}
path: testData

# Generates performance results for the current commit
- name: Checkout the current commit of the Ion Python
uses: actions/checkout@v3
prepopulate-pip-cache:
# Since all the "Check" jobs can run in parallel, caching _could_ have basically no effect. In order to speed things
# up, this step can run in parallel with the "Generate Data" job, pre-caching all the dependencies.
name: Setup PIP Cache
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
ref: ${{ github.event.pull_request.head.sha }}
path: ion-python-current

- name: Create a virtual environment and setup the package
working-directory: ./ion-python-current
run: |
git submodule init && git submodule update && python3 -m venv ./venv && . venv/bin/activate
python-version: '3.10'
cache: 'pip'
cache-dependency-path: '**/requirements.txt'
- run: |
pip install -r requirements.txt
pip install -e .
- name: Running performance benchmark
working-directory: ./ion-python-current
env:
warmups: 100
iterations: 1000
cli_script: amazon/ionbenchmark/ion_benchmark_cli.py
runner_dir: /Users/runner/work/ion-python/ion-python
run: |
. venv/bin/activate
for testset in nestedStruct nestedList sexp realWorldDataSchema01 realWorldDataSchema02 realWorldDataSchema03
do
python $cli_script read -w $warmups --iterations $iterations -o $runner_dir/benchmarkResults/${testset}_read/new --io-type buffer --format ion_text --format ion_binary --api load_dump $runner_dir/testData/${testset}.10n
# TODO: See if there's a way to cache the ion-c build output if it hasn't changed
python $cli_script write -w $warmups --iterations $iterations -o $runner_dir/benchmarkResults/${testset}_write/new --io-type buffer --format ion_text --format ion_binary --api load_dump $runner_dir/testData/${testset}.10n
done
detect-regression:
name: Check
runs-on: ubuntu-latest
needs: [generate-test-data, prepopulate-pip-cache]
strategy:
matrix:
python-version: ['3.9', '3.11', 'pypy-3.8', 'pypy-3.10']
test-data: ['nestedStruct', 'nestedList', 'sexp', 'realWorldDataSchema01', 'realWorldDataSchema02', 'realWorldDataSchema03']
fail-fast: false
steps:
- name: Checkout the base of the PR
uses: actions/checkout@v3
with:
ref: ${{ github.base_ref }}
submodules: recursive
path: baseline

# Generates performance results for the previous commit
- name: Checkout the master branch of the Ion Python
- name: Checkout the head of the PR
uses: actions/checkout@v3
with:
ref: master
path: ion-python-master
ref: ${{ github.head_ref }}
submodules: recursive
path: new

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache-dependency-path: '**/requirements.txt'

- name: Download Test Data
id: 'download'
uses: actions/download-artifact@v3
with:
name: ${{env.test_data_id}}

# Generates performance results for the previous commit
- name: Create a virtual environment
working-directory: ./ion-python-master
working-directory: ./baseline
run: |
git submodule init && git submodule update && python3 -m venv ./venv && . venv/bin/activate
pip install -r requirements.txt
pip install -e .
- name: Running performance benchmark
working-directory: ./ion-python-master
env:
warmups: 100
iterations: 1000
cli_script: amazon/ionbenchmark/ion_benchmark_cli.py
runner_dir: /Users/runner/work/ion-python/ion-python
pip install .
- name: Run baseline performance benchmark
id: 'baseline'
working-directory: ./baseline
run: |
. venv/bin/activate
for testset in nestedStruct nestedList sexp realWorldDataSchema01 realWorldDataSchema02 realWorldDataSchema03
do
python $cli_script read -w $warmups --iterations $iterations -o $runner_dir/benchmarkResults/${testset}_read/previous --io-type buffer --format ion_text --format ion_binary --api load_dump $runner_dir/testData/${testset}.10n
python $cli_script write -w $warmups --iterations $iterations -o $runner_dir/benchmarkResults/${testset}_write/previous --io-type buffer --format ion_text --format ion_binary --api load_dump $runner_dir/testData/${testset}.10n
done
# Upload resource, results and report
- name: Upload new benchmark results directory to artifacts
uses: actions/upload-artifact@v2
with:
name: Benchmark result
path: benchmarkResults
${{env.run_cli}} spec '${{env.specs}}' -d '${{env.spec_defaults}}' \
-O '{input_file:"${{steps.download.outputs.download-path}}/${{ matrix.test-data }}.10n"}' \
-o "$PWD/report.ion" -r '${{env.report_statistics}}'
echo "::group::Ion Report"
echo "$(<report.ion)"
echo "::endgroup::"
echo "report=$PWD/report.ion" >> "$GITHUB_OUTPUT"
- name: Upload test Ion Data to artifacts
uses: actions/upload-artifact@v2
with:
name: test Ion Data
path: testData
# Generates performance results for the current commit
- name: Create a virtual environment and setup the package
working-directory: ./new
run: |
pip install -r requirements.txt
pip install .
- name: Run new performance benchmark
id: 'new'
working-directory: ./new
run: |
${{env.run_cli}} spec '${{env.specs}}' -d '${{env.spec_defaults}}' \
-O '{input_file:"${{steps.download.outputs.download-path}}/${{ matrix.test-data }}.10n"}' \
-o "$PWD/report.ion" -r '${{env.report_statistics}}'
echo "::group::Ion Report"
echo "$(<report.ion)"
echo "::endgroup::"
echo "report=$PWD/report.ion" >> "$GITHUB_OUTPUT"
# Compare results and identify regression
- name: Detect performance regression
id: regression_result
run: |
. /Users/runner/work/ion-python/ion-python/ion-python-current/venv/bin/activate
result=true
cd benchmarkResults && for FILE in *; do message=$(python /Users/runner/work/ion-python/ion-python/ion-python-current/amazon/ionbenchmark/ion_benchmark_cli.py compare --benchmark-result-previous $FILE/previous --benchmark-result-new $FILE/new $FILE/report | tee /dev/stderr) && if [ "$message" != "no regression detected" ]; then result=false; fi; done
echo "::set-output name=regression-result::$result"
if [ "$result" = "true" ]; then echo "No regression detected!" >> $GITHUB_STEP_SUMMARY; fi
- name: Upload comparison reports to the benchmark results directory
uses: actions/upload-artifact@v2
with:
name: Benchmark result
path: benchmarkResults

- name: Fail the workflow if regression happened
env:
regression_detect: ${{steps.regression_result.outputs.regression-result}}
if: ${{ env.regression_detect == 'false' }}
run: |
. /Users/runner/work/ion-python/ion-python/ion-python-current/venv/bin/activate
cd benchmarkResults && echo "**Below files are detected performance regression, please download the benchmark result(s) to see detail:**" >> $GITHUB_STEP_SUMMARY && for FILE in *; do regressionDetection=$(python /Users/runner/work/ion-python/ion-python/ion-python-current/amazon/ionbenchmark/ion_benchmark_cli.py compare --benchmark-result-previous $FILE/previous --benchmark-result-new $FILE/new $FILE/report | tee /dev/stderr) && if [ "$regressionDetection" != "no regression detected" ]; then echo "$FILE" >> $GITHUB_STEP_SUMMARY; fi; done
exit 1
working-directory: ./new
run: ${{env.run_cli}} compare --fail ${{steps.baseline.outputs.report}} ${{steps.new.outputs.report}} -c '${{env.compare_statistics}}'

0 comments on commit 3354034

Please sign in to comment.