Refactor performance regression workflow (#283)

amazon-ion · Aug 22, 2023 · 3354034 · 3354034
1 parent c73cf2a
commit 3354034
Show file tree

Hide file tree

Showing 2 changed files with 101 additions and 142 deletions.
diff --git a/.github/workflows/PR-content-check.yml b/.github/workflows/PR-content-check.yml
diff --git a/.github/workflows/performance-regression.yml b/.github/workflows/performance-regression.yml
@@ -1,152 +1,140 @@
-name: Performance Regression Detector
+name: Performance Regression
 
 on:
   pull_request:
-    branches: [ master ]
+    paths:
+      # Workflow will only run if something in this path is changed
+      - 'amazon/**'
 
-jobs:
-  pr-content-check:
-    uses: ./.github/workflows/PR-content-check.yml
+env:
+  report_statistics: 'file_size,time_mean,time_error,ops/s_mean,ops/s_error,memory_usage_peak'
+  compare_statistics: 'file_size,time_mean'
+  data_size: '100'
+  spec_defaults: '{warmups:100,iterations:100}'
+  specs: '{command:read,format:ion_text} {command:write,format:ion_text} {command:read,format:ion_binary} {command:write,format:ion_binary}'
+  test_data_id: 'generated-test-data'
+  run_cli: 'python amazon/ionbenchmark/ion_benchmark_cli.py'
 
-  detect-regression:
-    name: Detect Regression
-    # commented out while working on workflow itself
-    # todo: re-enable when skipping is valued above running all the time
-    # needs: PR-Content-Check
-    # if: ${{ needs.PR-Content-Check.outputs.result == 'pass' }}
-    runs-on: macos-latest
-    strategy:
-      matrix:
-        python-version: ['3.9', '3.10', 'pypy-3.7', 'pypy-3.8']
-      fail-fast: false
-    steps:
-      - name: Set up Python
-        uses: actions/setup-python@v2
-        with:
-          python-version: ${{ matrix.python-version }}
 
+jobs:
+  generate-test-data:
+    name: Generate Data
+    runs-on: ubuntu-latest
+    steps:
       # Generates data used for benchmarking
       - name: Checkout ion-data-generator
         uses: actions/checkout@v3
         with:
           repository: amazon-ion/ion-data-generator
           ref: main
-          path: ion-data-generator
-
       - name: Build ion-data-generator
-        run: cd ion-data-generator && mvn clean install
-
+        run: mvn clean install
       - name: Generate test Ion Data
         env:
-          jar_file: ion-data-generator/target/ion-data-generator-1.0-SNAPSHOT.jar
-          schema_dir: ion-data-generator/tst/com/amazon/ion/workflow
+          jar_file: target/ion-data-generator-1.0-SNAPSHOT.jar
+          schema_dir: tst/com/amazon/ion/workflow
         run: |
           mkdir -p testData
           for test in nestedStruct nestedList sexp realWorldDataSchema01 realWorldDataSchema02 realWorldDataSchema03
           do
-            java -jar $jar_file generate -S 50000 --input-ion-schema $schema_dir/${test}.isl testData/${test}.10n
+            java -jar $jar_file generate -S ${{env.data_size}} --input-ion-schema $schema_dir/${test}.isl testData/${test}.10n
           done
-
       - name: Upload test Ion Data to artifacts
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v3
         with:
-          name: test Ion Data
+          name: ${{env.test_data_id}}
           path: testData
 
-      # Generates performance results for the current commit
-      - name: Checkout the current commit of the Ion Python
-        uses: actions/checkout@v3
+  prepopulate-pip-cache:
+    # Since all the "Check" jobs can run in parallel, caching _could_ have basically no effect. In order to speed things
+    # up, this step can run in parallel with the "Generate Data" job, pre-caching all the dependencies.
+    name: Setup PIP Cache
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
         with:
-          ref: ${{ github.event.pull_request.head.sha }}
-          path: ion-python-current
-
-      - name: Create a virtual environment and setup the package
-        working-directory: ./ion-python-current
-        run: |
-          git submodule init && git submodule update && python3 -m venv ./venv && . venv/bin/activate
+          python-version: '3.10'
+          cache: 'pip'
+          cache-dependency-path: '**/requirements.txt'
+      - run: |
           pip install -r requirements.txt
-          pip install -e .
-
-      - name: Running performance benchmark
-        working-directory: ./ion-python-current
-        env:
-          warmups: 100
-          iterations: 1000
-          cli_script: amazon/ionbenchmark/ion_benchmark_cli.py
-          runner_dir: /Users/runner/work/ion-python/ion-python
-        run: |
-          . venv/bin/activate
-          for testset in nestedStruct nestedList sexp realWorldDataSchema01 realWorldDataSchema02 realWorldDataSchema03
-          do
-            python $cli_script read -w $warmups --iterations $iterations -o $runner_dir/benchmarkResults/${testset}_read/new --io-type buffer --format ion_text --format ion_binary --api load_dump $runner_dir/testData/${testset}.10n
+          # TODO: See if there's a way to cache the ion-c build output if it hasn't changed
 
-            python $cli_script write -w $warmups --iterations $iterations -o $runner_dir/benchmarkResults/${testset}_write/new --io-type buffer --format ion_text --format ion_binary --api load_dump $runner_dir/testData/${testset}.10n
-          done
+  detect-regression:
+    name: Check
+    runs-on: ubuntu-latest
+    needs: [generate-test-data, prepopulate-pip-cache]
+    strategy:
+      matrix:
+        python-version: ['3.9', '3.11', 'pypy-3.8', 'pypy-3.10']
+        test-data: ['nestedStruct', 'nestedList', 'sexp', 'realWorldDataSchema01', 'realWorldDataSchema02', 'realWorldDataSchema03']
+      fail-fast: false
+    steps:
+      - name: Checkout the base of the PR
+        uses: actions/checkout@v3
+        with:
+          ref: ${{ github.base_ref }}
+          submodules: recursive
+          path: baseline
 
-      # Generates performance results for the previous commit
-      - name: Checkout the master branch of the Ion Python
+      - name: Checkout the head of the PR
         uses: actions/checkout@v3
         with:
-          ref: master
-          path: ion-python-master
+          ref: ${{ github.head_ref }}
+          submodules: recursive
+          path: new
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: 'pip'
+          cache-dependency-path: '**/requirements.txt'
+
+      - name: Download Test Data
+        id: 'download'
+        uses: actions/download-artifact@v3
+        with:
+          name: ${{env.test_data_id}}
 
+      # Generates performance results for the previous commit
       - name: Create a virtual environment
-        working-directory: ./ion-python-master
+        working-directory: ./baseline
         run: |
-          git submodule init && git submodule update && python3 -m venv ./venv && . venv/bin/activate
           pip install -r requirements.txt
-          pip install -e .
-
-      - name: Running performance benchmark
-        working-directory: ./ion-python-master
-        env:
-          warmups: 100
-          iterations: 1000
-          cli_script: amazon/ionbenchmark/ion_benchmark_cli.py
-          runner_dir: /Users/runner/work/ion-python/ion-python
+          pip install .
+      - name: Run baseline performance benchmark
+        id: 'baseline'
+        working-directory: ./baseline
         run: |
-          . venv/bin/activate
-          for testset in nestedStruct nestedList sexp realWorldDataSchema01 realWorldDataSchema02 realWorldDataSchema03
-          do
-            python $cli_script read -w $warmups --iterations $iterations -o $runner_dir/benchmarkResults/${testset}_read/previous --io-type buffer --format ion_text --format ion_binary --api load_dump $runner_dir/testData/${testset}.10n
-
-            python $cli_script write -w $warmups --iterations $iterations -o $runner_dir/benchmarkResults/${testset}_write/previous --io-type buffer --format ion_text --format ion_binary --api load_dump $runner_dir/testData/${testset}.10n
-          done
-
-      # Upload resource, results and report
-      - name: Upload new benchmark results directory to artifacts
-        uses: actions/upload-artifact@v2
-        with:
-          name: Benchmark result
-          path: benchmarkResults
+          ${{env.run_cli}} spec '${{env.specs}}' -d '${{env.spec_defaults}}' \
+              -O '{input_file:"${{steps.download.outputs.download-path}}/${{ matrix.test-data }}.10n"}' \
+              -o "$PWD/report.ion" -r '${{env.report_statistics}}'
+          echo "::group::Ion Report"
+          echo "$(<report.ion)"
+          echo "::endgroup::"
+          echo "report=$PWD/report.ion" >> "$GITHUB_OUTPUT"
 
-      - name: Upload test Ion Data to artifacts
-        uses: actions/upload-artifact@v2
-        with:
-          name: test Ion Data
-          path: testData
+      # Generates performance results for the current commit
+      - name: Create a virtual environment and setup the package
+        working-directory: ./new
+        run: |
+          pip install -r requirements.txt
+          pip install .
+      - name: Run new performance benchmark
+        id: 'new'
+        working-directory: ./new
+        run: |
+          ${{env.run_cli}} spec '${{env.specs}}' -d '${{env.spec_defaults}}' \
+              -O '{input_file:"${{steps.download.outputs.download-path}}/${{ matrix.test-data }}.10n"}' \
+              -o "$PWD/report.ion" -r '${{env.report_statistics}}'
+          echo "::group::Ion Report"
+          echo "$(<report.ion)"
+          echo "::endgroup::"
+          echo "report=$PWD/report.ion" >> "$GITHUB_OUTPUT"
 
       # Compare results and identify regression
       - name: Detect performance regression
-        id: regression_result
-        run: |
-          . /Users/runner/work/ion-python/ion-python/ion-python-current/venv/bin/activate
-          result=true
-          cd benchmarkResults && for FILE in *; do message=$(python /Users/runner/work/ion-python/ion-python/ion-python-current/amazon/ionbenchmark/ion_benchmark_cli.py compare --benchmark-result-previous $FILE/previous --benchmark-result-new $FILE/new $FILE/report | tee /dev/stderr) && if [ "$message" != "no regression detected" ]; then result=false; fi; done
-          echo "::set-output name=regression-result::$result"
-          if [ "$result" = "true" ]; then echo "No regression detected!" >> $GITHUB_STEP_SUMMARY; fi
-
-      - name: Upload comparison reports to the benchmark results directory
-        uses: actions/upload-artifact@v2
-        with:
-          name: Benchmark result
-          path: benchmarkResults
-
-      - name: Fail the workflow if regression happened
-        env:
-          regression_detect: ${{steps.regression_result.outputs.regression-result}}
-        if: ${{ env.regression_detect == 'false' }}
-        run: |
-          . /Users/runner/work/ion-python/ion-python/ion-python-current/venv/bin/activate
-          cd benchmarkResults && echo "**Below files are detected performance regression, please download the benchmark result(s) to see detail:**" >> $GITHUB_STEP_SUMMARY && for FILE in *; do regressionDetection=$(python /Users/runner/work/ion-python/ion-python/ion-python-current/amazon/ionbenchmark/ion_benchmark_cli.py compare --benchmark-result-previous $FILE/previous --benchmark-result-new $FILE/new $FILE/report | tee /dev/stderr) && if [ "$regressionDetection" != "no regression detected" ]; then echo "$FILE" >> $GITHUB_STEP_SUMMARY; fi; done
-          exit 1
+        working-directory: ./new
+        run: ${{env.run_cli}} compare --fail ${{steps.baseline.outputs.report}} ${{steps.new.outputs.report}} -c '${{env.compare_statistics}}'