diff --git a/.github/workflows/benchmark-pr.yaml b/.github/workflows/benchmark-pr.yaml new file mode 100644 index 000000000..be6ee99b6 --- /dev/null +++ b/.github/workflows/benchmark-pr.yaml @@ -0,0 +1,51 @@ +--- +name: Benchmarks on AMD64 +permissions: read-all +on: [pull_request] +jobs: + benchmark-pull-request: + runs-on: ubuntu-latest-8-cores + steps: + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + with: + fetch-depth: 0 + - id: goversion + run: echo "goversion=$(cat .go-version)" >> "$GITHUB_OUTPUT" + - uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 + with: + go-version: ${{ steps.goversion.outputs.goversion }} + - name: Run Benchmarks + run: | + BENCHSTAT_OUTPUT_FILE=result.txt make test-benchmark-compare REF=${{ github.event.pull_request.head.sha }} + - run: | + echo "\`\`\`" >> "$GITHUB_STEP_SUMMARY" + cat output.txt >> "$GITHUB_STEP_SUMMARY" + echo "\`\`\`" >> "$GITHUB_STEP_SUMMARY" + cat <> "$GITHUB_STEP_SUMMARY" +
+ + This section contains three tables generated by benchstat: + + 1. Seconds per operation. + 2. Bytes per operation. + 3. Allocations per operation. + + The tables show the median and 75% confidence interval (CI) summaries for each benchmark comparing the HEAD and the BASE of the Pull Request, and an A/B comparison under "vs base". The last column shows the statistical p-value with three runs (n=3). + + The last row has the Geometric Mean (geomean) for the given rows in the table. + + Refer to [benchstat's documentation](https://pkg.go.dev/golang.org/x/perf/cmd/benchstat) for more help. + EOL + - name: Validate results under acceptable limit + run: | + export MAX_ACCEPTABLE_DIFFERENCE=5 + while IFS= read -r line; do + # Get fourth value, which is the comparison with the base. + value="$(echo "$line" | awk '{print $4}')" + if [[ "$value" = +* ]] || [[ "$value" = -* ]]; then + if (( $(echo "${value//[^0-9.]/}"'>'"$MAX_ACCEPTABLE_DIFFERENCE" | bc -l) )); then + echo "::error::$value is above the maximum acceptable difference ($MAX_ACCEPTABLE_DIFFERENCE)" + exit 1 + fi + fi + done < <(grep geomean result.txt) diff --git a/Makefile b/Makefile index b0d019802..2e0c09fa8 100644 --- a/Makefile +++ b/Makefile @@ -94,3 +94,14 @@ test-failpoint: test-robustness: gofail-enable build sudo env PATH=$$PATH go test -v ${TESTFLAGS} ./tests/dmflakey -test.root sudo env PATH=$(PWD)/bin:$$PATH go test -v ${TESTFLAGS} ${ROBUSTNESS_TESTFLAGS} ./tests/robustness -test.root + +.PHONY: test-benchmark-compare +# Runs benchmark tests on the current git ref and the given REF, and compares +# the two. +test-benchmark-compare: install-benchstat + @git fetch + ./scripts/compare_benchmarks.sh $(REF) + +.PHONY: install-benchstat +install-benchstat: + go install golang.org/x/perf/cmd/benchstat@latest diff --git a/scripts/compare_benchmarks.sh b/scripts/compare_benchmarks.sh new file mode 100755 index 000000000..f0e68c258 --- /dev/null +++ b/scripts/compare_benchmarks.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +# https://github.com/kubernetes/kube-state-metrics/blob/main/tests/compare_benchmarks.sh (originally written by mxinden) + +# exit immediately when a command fails +set -e +# only exit with zero if all commands of the pipeline exit successfully +set -o pipefail +# error on unset variables +set -u + +[[ "$#" -eq 1 ]] || echo "One argument required, $# provided." + +REF_CURRENT="$(git rev-parse --abbrev-ref HEAD)" +BASE_TO_COMPARE=$1 + +RESULT_CURRENT="$(mktemp)-${REF_CURRENT}" +RESULT_TO_COMPARE="$(mktemp)-${BASE_TO_COMPARE}" + +TIMEOUT=${TIMEOUT:-30m} +BENCH_COUNT=${BENCH_COUNT:-3} +BENCHSTAT_CONFIDENCE_LEVEL=${BENCHSTAT_CONFIDENCE_LEVEL:-0.75} +BENCHSTAT_FORMAT=${BENCHSTAT_FORMAT:-"text"} + +if [[ "${BENCHSTAT_FORMAT}" == "csv" ]] && [[ -z "${BENCHSTAT_OUTPUT_FILE}" ]]; then + echo "BENCHSTAT_FORMAT is set to csv, but BENCHSTAT_OUTPUT_FILE is not set." + exit 1 +fi + +function main() { + echo "" + echo "### Testing ${REF_CURRENT}" + + go test -timeout="${TIMEOUT}" -count="${BENCH_COUNT}" -benchmem -run=NONE -bench=. ./... | tee "${RESULT_CURRENT}" + + # Filter benchark lines, so benchstat can parse the output. + grep ^Benchmark "${RESULT_CURRENT}" > "${RESULT_CURRENT}".tmp && mv "${RESULT_CURRENT}".tmp "${RESULT_CURRENT}" + + echo "" + echo "### Done testing ${REF_CURRENT}" + + echo "" + echo "### Testing ${BASE_TO_COMPARE}" + + git checkout "${BASE_TO_COMPARE}" + + go test -timeout="${TIMEOUT}" -count="${BENCH_COUNT}" -benchmem -run=NONE -bench=. ./... | tee "${RESULT_TO_COMPARE}" + + # Filter benchark lines, so benchstat can parse the output. + grep ^Benchmark "${RESULT_TO_COMPARE}" > "${RESULT_TO_COMPARE}".tmp && mv "${RESULT_TO_COMPARE}".tmp "${RESULT_TO_COMPARE}" + + echo "" + echo "### Done testing ${BASE_TO_COMPARE}" + + git checkout - + + echo "" + echo "### Result" + echo "base=${BASE_TO_COMPARE} head=${REF_CURRENT}" + + if [[ "${BENCHSTAT_FORMAT}" == "csv" ]]; then + benchstat -format=csv -confidence="${BENCHSTAT_CONFIDENCE_LEVEL}" BASE="${RESULT_TO_COMPARE}" HEAD="${RESULT_CURRENT}" 2>/dev/null 1>"${BENCHSTAT_OUTPUT_FILE}" + else + if [[ -z "${BENCHSTAT_OUTPUT_FILE}" ]]; then + benchstat -confidence="${BENCHSTAT_CONFIDENCE_LEVEL}" BASE="${RESULT_TO_COMPARE}" HEAD="${RESULT_CURRENT}" + else + benchstat -confidence="${BENCHSTAT_CONFIDENCE_LEVEL}" BASE="${RESULT_TO_COMPARE}" HEAD="${RESULT_CURRENT}" 1>"${BENCHSTAT_OUTPUT_FILE}" + fi + fi +} + +main