feat(bench): add historical performance benchmark (aws#4083)

goatgoose · Jul 24, 2023 · 20b0174 · 20b0174
1 parent 5cc827d
commit 20b0174
Show file tree

Hide file tree

Showing 10 changed files with 1,236 additions and 26 deletions.
diff --git a/bindings/rust/bench/.gitignore b/bindings/rust/bench/.gitignore
@@ -0,0 +1,2 @@
+*.svg
+!historical-perf-*.svg
diff --git a/bindings/rust/bench/Cargo.toml b/bindings/rust/bench/Cargo.toml
@@ -3,13 +3,19 @@ name = "bench"
 version = "0.1.0"
 edition = "2021"
 
+[features]
+historical-perf = []
+
 [dependencies]
 s2n-tls = { path = "../s2n-tls" }
 rustls = "0.21"
 rustls-pemfile = "1.0"
 openssl = "0.10"
 errno = "0.3"
 libc = "0.2"
+serde_json = "1.0"
+plotters = "0.3"
+semver = "1.0"
 
 [dev-dependencies]
 criterion = "0.3"

diff --git a/bindings/rust/bench/README.md b/bindings/rust/bench/README.md
@@ -10,6 +10,16 @@ Setup is easy! Just have OpenSSL installed and generate Rust bindings for s2n-tl
 
 The benchmarks can be run with the `cargo bench` command. Criterion will auto-generate an HTML report in `target/criterion/`. 
 
+## Historical benchmarks
+
+To do historical benchmarks, run `historical-perf/bench-past.sh`. This will checkout old versions of s2n-tls back to v1.3.16 in `target/` and run benchmarks on those with the `historical-perf` feature, disabling Rustls and OpenSSL benches.
+
+### Caveats
+
+The last version benched is v1.3.16, since before that, the s2n-tls Rust bindings have a different API and would thus require a different bench harness to test. 
+
+v1.3.30-1.3.37 are not benched because of depedency issues when generating the Rust bindings. However, versions before and after are benched, so the overall trend in performance can still be seen without the data from these versions.
+
 ## Implementation details
 
 We use Rust bindings for s2n-tls and OpenSSL. All of our benchmarks are run in Rust on a single thread for consistency. 
@@ -25,3 +35,20 @@ All certs are stored in `certs/` and can be regenerated using `certs/generate_ce
 ### Negotiation parameters
 
 The cipher suites benchmarked are `TLS_AES_128_GCM_SHA256` and `TLS_AES_256_GCM_SHA384`, and the key exchange methods benchmarked are ECDHE with `secp256r1` and with `x25519`. We also test connections with and without client authentication (mTLS).
+
+## Sample output
+
+### Historical performance
+
+Because these benches take a longer time to generate (>30 min), we include the results from historical benching (as of v1.3.47) here.
+
+Notes: 
+- Two sets of parameters for the handshake couldn't be benched before 1.3.40, since security policies that negotiated those policies as their top choice did not exist before then.
+- There is no data from 1.3.30 to 1.3.37 because those versions have a dependency issue that cause the Rust bindings not to build. However, there is data before and after that period, so the performance for those versions can be inferred via interpolation.
+- The improvement in throughput in 1.3.28 was most likely caused by the addition of LTO to the default Rust bindings build. 
+- Since the benches are run over a long time, noise on the machine can cause variability, as seen in the throughput graph.
+- The variability can be seen with throughput especially because it is calculated as the inverse of time taken.
+
+![historical-perf-handshake](images/historical-perf-handshake.svg)
+
+![historical-perf-throughput](images/historical-perf-throughput.svg)
diff --git a/bindings/rust/bench/benches/handshake.rs b/bindings/rust/bench/benches/handshake.rs
@@ -10,15 +10,17 @@ use bench::{
 use criterion::{
     criterion_group, criterion_main, measurement::WallTime, BatchSize, BenchmarkGroup, Criterion,
 };
-use std::any::type_name;
 
 pub fn bench_handshake_params(c: &mut Criterion) {
     fn bench_handshake_for_library<T: TlsBenchHarness>(
         bench_group: &mut BenchmarkGroup<WallTime>,
+        name: &str,
         handshake_type: HandshakeType,
         ec_group: ECGroup,
     ) {
-        bench_group.bench_function(type_name::<T>(), |b| {
+        // generate all harnesses (TlsBenchHarness structs) beforehand so that benchmarks
+        // only include negotiation and not config/connection initialization
+        bench_group.bench_function(name, |b| {
             b.iter_batched_ref(
                 || {
                     T::new(
@@ -28,10 +30,14 @@ pub fn bench_handshake_params(c: &mut Criterion) {
                         },
                         handshake_type,
                     )
-                    .unwrap()
                 },
                 |harness| {
-                    harness.handshake().unwrap();
+                    // harnesses with certain parameters fail to initialize for
+                    // some past versions of s2n-tls, but missing data can be
+                    // visually interpolated in the historical performance graph
+                    if let Ok(harness) = harness {
+                        let _ = harness.handshake();
+                    }
                 },
                 BatchSize::SmallInput,
             )
@@ -42,17 +48,28 @@ pub fn bench_handshake_params(c: &mut Criterion) {
         for ec_group in [SECP256R1, X25519] {
             let mut bench_group =
                 c.benchmark_group(format!("handshake-{:?}-{:?}", handshake_type, ec_group));
-            bench_handshake_for_library::<S2NHarness>(&mut bench_group, handshake_type, ec_group);
-            bench_handshake_for_library::<RustlsHarness>(
-                &mut bench_group,
-                handshake_type,
-                ec_group,
-            );
-            bench_handshake_for_library::<OpenSslHarness>(
+
+            bench_handshake_for_library::<S2NHarness>(
                 &mut bench_group,
+                "s2n-tls",
                 handshake_type,
                 ec_group,
             );
+            #[cfg(not(feature = "historical-perf"))]
+            {
+                bench_handshake_for_library::<RustlsHarness>(
+                    &mut bench_group,
+                    "rustls",
+                    handshake_type,
+                    ec_group,
+                );
+                bench_handshake_for_library::<OpenSslHarness>(
+                    &mut bench_group,
+                    "openssl",
+                    handshake_type,
+                    ec_group,
+                );
+            }
         }
     }
 }

diff --git a/bindings/rust/bench/benches/throughput.rs b/bindings/rust/bench/benches/throughput.rs
@@ -9,32 +9,37 @@ use criterion::{
     criterion_group, criterion_main, measurement::WallTime, BatchSize, BenchmarkGroup, Criterion,
     Throughput,
 };
-use std::any::type_name;
 
 pub fn bench_throughput_cipher_suite(c: &mut Criterion) {
     // arbitrarily large to cut across TLS record boundaries
     let mut shared_buf = [0u8; 100000];
 
     fn bench_throughput_for_library<T: TlsBenchHarness>(
         bench_group: &mut BenchmarkGroup<WallTime>,
+        name: &str,
         shared_buf: &mut [u8],
         cipher_suite: CipherSuite,
     ) {
-        bench_group.bench_function(type_name::<T>(), |b| {
+        bench_group.bench_function(name, |b| {
             b.iter_batched_ref(
                 || {
-                    let mut harness = T::new(
+                    T::new(
                         CryptoConfig {
                             cipher_suite,
                             ec_group: Default::default(),
                         },
                         Default::default(),
                     )
-                    .unwrap();
-                    harness.handshake().unwrap();
-                    harness
+                    .map(|mut h| {
+                        let _ = h.handshake();
+                        h
+                    })
+                },
+                |harness| {
+                    if let Ok(harness) = harness {
+                        let _ = harness.round_trip_transfer(shared_buf);
+                    }
                 },
-                |harness| harness.round_trip_transfer(shared_buf).unwrap(),
                 BatchSize::SmallInput,
             )
         });
@@ -43,17 +48,27 @@ pub fn bench_throughput_cipher_suite(c: &mut Criterion) {
     for cipher_suite in [AES_128_GCM_SHA256, AES_256_GCM_SHA384] {
         let mut bench_group = c.benchmark_group(format!("throughput-{:?}", cipher_suite));
         bench_group.throughput(Throughput::Bytes(shared_buf.len() as u64));
-        bench_throughput_for_library::<S2NHarness>(&mut bench_group, &mut shared_buf, cipher_suite);
-        bench_throughput_for_library::<RustlsHarness>(
-            &mut bench_group,
-            &mut shared_buf,
-            cipher_suite,
-        );
-        bench_throughput_for_library::<OpenSslHarness>(
+        bench_throughput_for_library::<S2NHarness>(
             &mut bench_group,
+            "s2n-tls",
             &mut shared_buf,
             cipher_suite,
         );
+        #[cfg(not(feature = "historical-perf"))]
+        {
+            bench_throughput_for_library::<RustlsHarness>(
+                &mut bench_group,
+                "rustls",
+                &mut shared_buf,
+                cipher_suite,
+            );
+            bench_throughput_for_library::<OpenSslHarness>(
+                &mut bench_group,
+                "openssl",
+                &mut shared_buf,
+                cipher_suite,
+            );
+        }
     }
 }
 

diff --git a/bindings/rust/bench/certs/generate_certs.sh b/bindings/rust/bench/certs/generate_certs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: Apache-2.0

diff --git a/bindings/rust/bench/historical-perf/bench-past.sh b/bindings/rust/bench/historical-perf/bench-past.sh
@@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# immediately bail if any command fails
+set -e
+
+# suppress stdout and most cargo warnings
+exec >/dev/null
+export CARGO_TERM_QUIET=true
+export RUSTFLAGS=-Awarnings
+
+# go to s2n-tls/bindings/rust/bench/
+pushd "$(dirname "$0")"/../
+bench_path="$(pwd)"
+
+# delete past runs
+rm -rf target/historical-perf
+
+# make Cargo.toml point s2n-tls to the cloned old version
+sed -i "s|s2n-tls = .*|s2n-tls = { path = \"target/s2n-tls/bindings/rust/s2n-tls\" }|" Cargo.toml 
+
+# ensure Cargo.toml gets changed back on exit; retains original exit status
+trap "{ status=$?; sed -i 's|s2n-tls = .*|s2n-tls = { path = \"../s2n-tls\" }|' $bench_path/Cargo.toml; exit $status; }" EXIT
+
+# clone copy of repo to target/s2n-tls
+echo "cloning repo" >&2
+mkdir -p target
+cd target
+rm -rf s2n-tls
+git clone --quiet https://github.com/aws/s2n-tls
+cd s2n-tls/bindings/rust/
+copied_bindings_path="$(pwd)"
+
+# get list of tags sorted newest to oldest
+sorted_tags="$(git tag -l | sort -rV)"
+
+# last tag we want is v1.3.16, get line number of v1.3.16 in sorted_tags
+line_num_last_tag=$(echo "$sorted_tags" | grep "v1.3.16" --line-number | head -n 1 | cut -d":" -f1)
+
+# loop through all tags in order up to v1.3.16
+for tag in $(echo "$sorted_tags" | head -$line_num_last_tag)
+do
+    (
+        # go to s2n-tls/bindings/rust/ inside copied repo
+        cd $copied_bindings_path
+
+        echo "checkout tag $tag" >&2
+        git checkout $tag --quiet
+
+        echo "generating rust bindings" >&2
+        # if generate.sh fails, exit out of block
+        ./generate.sh || exit 1
+
+        echo "running cargo bench and saving results" >&2
+        cd $bench_path
+        rm -rf target/criterion
+        cargo bench --features historical-perf --no-fail-fast
+
+        # cache criterion outputs from this bench into target/historical-perf
+        for bench_group in $(ls target/criterion | grep -v "report")
+        do
+            mkdir -p target/historical-perf/$bench_group/
+            cp target/criterion/$bench_group/s2n-tls/new/estimates.json target/historical-perf/$bench_group/$tag.json
+        done
+    ) || echo "failed, trying next tag"
+    echo
+done
+
+# graph results
+cd $bench_path
+cargo run --release --bin graph_perf
+
+popd