# wolfSSL Benchmark

In [1]:
import json

import matplotlib.pyplot as plt
import scipy.stats as stats
from scipy.stats import weibull_min

from dataset import *

Some constants and loading datasets:

In [2]:
CUTOFF_PERCENTILE = 5  # drop values in this percentile

In [3]:
def load(path: str):
    with open(path) as f:
        data = json.load(f)
    return data

In [4]:
client_tls = Dataset(load("data/client-benchmarks.json"))
client_tls_ra = Dataset(load("data/client-benchmarks-ra.json"))
client_tls_ech = Dataset(load("data/client-benchmarks-ech.json"))
client_tls_ech_ra = Dataset(load("data/client-benchmarks-ra-ech.json"))
client_no_ech = {
    "TLS": client_tls,
    "TLS with RA": client_tls_ra,
}
server_tls = Dataset(load("data/server-benchmarks.json"), False)
server_tls_ra = Dataset(load("data/server-benchmarks-ra.json"), False)
server_no_ech = {
    "TLS": server_tls,
    "TLS with RA": server_tls_ra,
}
client_ech = {
    "TLS with ECH": client_tls_ech,
    "TLS with ECH,RA": client_tls_ech_ra,
}
server_ech = {
    "TLS with ECH": Dataset(load("data/server-benchmarks-ech.json"), False),
    "TLS with RA,ECH": Dataset(load("data/server-benchmarks-ra-ech.json"), False),
}

Perform 2-tailed truncation on dataset to drop outliers:

In [5]:
for ds in client_no_ech.values():
    ds.truncate(CUTOFF_PERCENTILE)
for ds in server_no_ech.values():
    ds.truncate(CUTOFF_PERCENTILE)
for ds in client_ech.values():
    ds.truncate(CUTOFF_PERCENTILE)
for ds in server_ech.values():
    ds.truncate(CUTOFF_PERCENTILE)

Some plot-related functions:

In [6]:
def plot_histogram(dataset: dict[str, Dataset], key: str, title: str = None, time_unit: str = "ms", num_bins: int = 100):
    total = None
    bmin = float('inf')
    bmax = float('-inf')
    for label, ds in dataset.items():
        if key in ds.data:
            bmin = min(bmin, ds.data[key][0])
            bmax = max(bmax, ds.data[key][-1])

    match time_unit:
        case "ms":
            bmin /= 1_000_000
            bmax /= 1_000_000
        case "us":
            bmin /= 1_000
            bmax /= 1_000
    
    #bmin -= bmin % num_bins
    #bmax += num_bins - (bmax % num_bins)

    bstep = (bmax - bmin) / num_bins
    bins = np.arange(bmin, bmax, bstep)
    if title is None:
        title = key_to_string(key)

    plt.figure(figsize=(8, 4), layout="tight")
    ax = plt.subplot()
    for label, ds in dataset.items():
        if key in ds.data:
            match time_unit:
                case "ms":
                    data = ds.data[key] / 1_000_000
                    mean = ds.mean[key] / 1_000_000
                case "us":
                    data = ds.data[key] / 1_000
                    mean = ds.mean[key] / 1_000
                    time_unit = "μs"
                case "ns":
                    data = ds.data[key]
                    mean = ds.mean[key]
            _, _, p = ax.hist(data, label=f"{label}", histtype='step', cumulative=False, bins=bins, linewidth=2)
            (r, g, b, _) = p[0].get_facecolor()
            ax.axvline(mean, label=f"mean={mean:.3f} {time_unit}", color=(r, g, b), linestyle=':', linewidth=2)
            
            if total is None:
                total = len(data)
            
    plt.legend()
    #plt.xticks(list(plt.xticks()[0]))
    plt.xlabel(f"Time ({time_unit})")
    plt.ylabel(f"Frequency (total={total})")
    plt.grid(visible=True)
    plt.title(title)
    #plt.xlim(left=bmin - np.fmod(bmin, bstep))
    #plt.xlim(right=bmax + bstep - np.fmod(bmin, bstep))
    #plt.ylim(0, 5)
    plt.savefig(f"./plots/svg/{title.replace(':', '-')}.svg", format="svg")
    plt.savefig(f"./plots//png/{title.replace(':', '-')}.png", format="png")
    plt.close()

In [7]:
def plot_client_bar(datasets: dict[str, Dataset], title: str, median_or_average: bool = True):
    x = datasets.keys()

    plt.figure(figsize=(7,20))
    
    # ClientHello msg
    client_hello = np.zeros(len(datasets))
    for key in CLIENT_KEYS[0:3]:
        if median_or_average:
            y = [ds.mean[key] if key in ds.mean else 0 for ds in datasets.values()]
        else:
            y = [ds.mean[key] if key in ds.mean else 0 for ds in datasets.values()]
        # The keys are ordered so we can deduct the ones from before to get an overall time slice
        y = np.array(y) - client_hello
        
        plt.bar(x, y, bottom=client_hello, label=key)
        client_hello += y
    
    # Certificate Verify msg
    cert_verify = np.zeros(len(datasets))
    for key in CLIENT_KEYS[3:6]:
        if median_or_average:
            y = [ds.mean[key] if key in ds.mean else 0 for ds in datasets.values()]
        else:
            y = [ds.mean[key] if key in ds.mean else 0 for ds in datasets.values()]
        # The keys are ordered so we can deduct the ones from before to get an overall time slice
        y = np.array(y) - cert_verify
        
        plt.bar(x, y, bottom=client_hello + cert_verify, label=key)
        cert_verify += y
    
    # Handshake overall
    if median_or_average:
        y = [ds.mean[CLIENT_KEYS[6]] if CLIENT_KEYS[6] in ds.mean else 0 for ds in datasets.values()]
    else:
        y = [ds.mean[CLIENT_KEYS[6]] if CLIENT_KEYS[6] in ds.mean else 0 for ds in datasets.values()]
    # The keys are ordered so we can deduct the ones from before to get an overall time slice
    y = np.array(y) - (client_hello + cert_verify)
    plt.bar(x, y, bottom=client_hello + cert_verify, label=CLIENT_KEYS[6])

    plt.legend()
    plt.xlabel("Connection Type")
    plt.ylabel("mean Time (ns)" if median_or_average else "Average Time (ns)")
    plt.title(title)
    plt.grid(visible=True)
    plt.show()

Plot and save histograms to the [`plots/`](plots) folder.

In [8]:
# Handshake with outliers
plot_histogram({"TLS": Dataset(load("data/client-benchmarks.json"))}, "client_handshake", num_bins=100)

In [9]:


# Handshake
plot_histogram(client_no_ech, "client_handshake", f"{key_to_string('client_handshake')} (no ECH)")
plot_histogram(client_ech, "client_handshake", f"{key_to_string('client_handshake')} (with ECH)")

# ClientHello
plot_histogram(client_no_ech, "client_hello", f"{key_to_string('client_hello')} (no ECH)")
plot_histogram(client_ech, "client_hello", f"{key_to_string('client_hello')} (with ECH)")

# ClientHello Extensions
plot_histogram(client_no_ech, "client_extensions", f"{key_to_string('client_extensions')} (no ECH)")
plot_histogram(client_ech, "client_extensions", f"{key_to_string('client_extensions')} (with ECH)")

# Client Certificate Verify
plot_histogram(client_no_ech, "client_certificate_verify", time_unit="us")

# Client Certificate Verify: AttReq
plot_histogram(client_no_ech, "client_certificate_verify_att_request", time_unit="us")

# Client Certificate Verify: AttReq Challenge Generation
plot_histogram(client_no_ech, "client_certificate_verify_att_request_challenge_generation", time_unit="us")

# AttReq Encoding (will be useless because clock not accurate enough at these fast operations)
plot_histogram(client_no_ech, "client_att_request", time_unit="ns")

In [10]:
# Handshake
plot_histogram(server_no_ech, "server_handshake", f"{key_to_string('server_handshake')} (no ECH)")
plot_histogram(server_ech, "server_handshake", f"{key_to_string('server_handshake')} (with ECH)")

# ServerHello
plot_histogram(server_no_ech, "server_hello", f"{key_to_string('server_hello')} (no ECH)", time_unit="us")
plot_histogram(server_ech, "server_hello", f"{key_to_string('server_hello')} (with ECH)", time_unit="us")

# Server EncryptedExtensions
plot_histogram(server_no_ech, "server_extensions", f"{key_to_string('server_extensions')} (no ECH)", time_unit="us")
plot_histogram(server_ech, "server_extensions", f"{key_to_string('server_extensions')} (with ECH)", time_unit="us")

# Server: AttReq Generation 
plot_histogram(server_no_ech, "server_att_request_generation", time_unit="us")

# Server: AttReq Challenge Generation
plot_histogram(server_no_ech, "server_att_request_challenge_generation", time_unit="us")

In [11]:
#plot_client_bar(client_no_ech, "", True)
#plot_client_bar(client_ech, "", False)

In [12]:
# for key in dataset.CLIENT_KEYS:
#     plot_histogram(client_ech, key, f"{key} - with ECH")
# for key in dataset.SERVER_KEYS:
#     plot_histogram(server_ech, key, f"{key} - with ECH")

The time differences between the overall handshake of TLS and TLS-with-RA should match the time differences of the single handshake events involving RA operations.
As we use the mean from the overall distribution, slight errors are to be expected.

In [13]:
hs_diff = client_tls_ra.mean["client_handshake"] - client_tls.mean["client_handshake"]
print(f"Δ Handshake (ns):                  {hs_diff:.0f}")

ce_diff = client_tls_ra.mean["client_extensions"] - client_tls.mean["client_extensions"]
print(f"Δ ClientExtensions (ns):           {ce_diff:.0f}")
cv_diff = client_tls_ra.mean["client_certificate_verify"] - client_tls.mean["client_certificate_verify"]
print(f"Δ Certificate Verify (ns):         {cv_diff:.3f}")
se_diff = server_tls_ra.mean["server_extensions"] - server_tls.mean["server_extensions"]
print(f"Δ EncryptedExtensions (ns):        {se_diff:.3f}")

sum = ce_diff + cv_diff + se_diff
print(f"Δ Sum (ns):                        {sum:.3f}")

diff = hs_diff - sum
print(f"Δ Handshake to RA-operations (ns): {diff:.3f}")

Δ Handshake (ns):                  14729
Δ ClientExtensions (ns):           -381
Δ Certificate Verify (ns):         5237.190
Δ EncryptedExtensions (ns):        7687.885
Δ Sum (ns):                        12544.022
Δ Handshake to RA-operations (ns): 2184.592


## Some statistics
Let $\mu_1, \mu_2$ be the two means of the two distributions, e.g., TLS and TLS-with-RA handshakes.
We test the 0-hypothesis that the two means are equal:

    $$H_0: \quad \mu_1 = \mu_2$$
    $$H_1: \quad \mu_1 \neq \mu_2$$

If the $p$-value of the test is lower than a significance level of $\alpha = 0.05$ (5%) we reject $H_0$:

In [14]:
def test_hypothesis_equal_mean(a, b, alpha=0.05):
    h0 = stats.ttest_ind(a=a, b=b, alternative="less", permutations=1000, equal_var=False)
    print(f"p-value: {h0.pvalue}")
    if h0.pvalue < alpha:
        print("We reject H₀ that µ₁ = µ₂")
    else:
        print("Failed to reject H₀ that µ₁ = µ₂")

In [15]:
def confidence_interval(data, alpha):
    a, b = stats.t.interval(confidence=1-alpha, loc=np.mean(data), df=len(data)-1, scale=stats.sem(data))
    print(f" - ({a}, {b}) ns")
    ci = np.array([a, b])
    mid = ci.mean()
    delta = (ci[1]-ci[0])/2
    return mid, delta

In [16]:
ALPHA = 0.01

tls = client_tls.data["client_handshake"]
tls_ra = client_tls_ra.data["client_handshake"]

print(">> Testing H₀ for the handshake of TLS and TLS-with-RA:")
test_hypothesis_equal_mean(tls, tls_ra, ALPHA)
mean, pm = confidence_interval(tls, ALPHA)
print("TLS mean:         ", mean, " +/- ", pm, " ns")
mean_ra, pm_ra = confidence_interval(tls_ra, ALPHA)
print("TLS-with-Ra mean: ", mean_ra, " +/- ", pm_ra, " ns")
sigma = np.sqrt(pm**2 + pm_ra**2)
print("Delta mu = ", (mean_ra - mean), " +/- ", sigma)

ech = client_tls_ech.data['client_handshake']
ech_ra = client_tls_ech_ra.data['client_handshake']
print("\n>> Testing H₀ for the handshake of TLS-with-ECH and TLS-with-ECH-RA:")
test_hypothesis_equal_mean(ech, ech_ra, ALPHA)
mean_ech, pm_ech = confidence_interval(ech, ALPHA)
print("TLS-with-ECH mean:    ", mean_ech, " +/- ", pm_ech, " ns")
mean_ech_ra, pm_ech_ra = confidence_interval(ech_ra, ALPHA)
print("TLS-with-ECH,RA mean: ", mean_ech_ra, " +/- ", pm_ech_ra, " ns")
sigma_ech = np.sqrt(pm_ech**2 + pm_ech_ra**2)
print("Delta mu = ", (mean_ech_ra - mean_ech), " +/- ", sigma_ech)

>> Testing H₀ for the handshake of TLS and TLS-with-RA:
p-value: 0.000999000999000999
We reject H₀ that µ₁ = µ₂
 - (5138142.32243908, 5140426.367771447) ns
TLS mean:          5139284.345105263  +/-  1142.0226661833003  ns
 - (5152875.058131397, 5155150.859510709) ns
TLS-with-Ra mean:  5154012.958821053  +/-  1137.900689655915  ns
Delta mu =  14728.61371578928  +/-  1612.1519002860186

>> Testing H₀ for the handshake of TLS-with-ECH and TLS-with-ECH-RA:
p-value: 0.000999000999000999
We reject H₀ that µ₁ = µ₂
 - (5331209.506974604, 5333213.329951711) ns
TLS-with-ECH mean:     5332211.4184631575  +/-  1001.9114885535091  ns
 - (5346530.862880179, 5348598.783225085) ns
TLS-with-ECH,RA mean:  5347564.823052632  +/-  1033.960172452964  ns
Delta mu =  15353.404589474201  +/-  1439.7570173867782


In [17]:
def to_csv(filename, ds, keys):
    with open(filename, "w+") as f:
        f.write("Operation,Mean time taken (ms)\n")
        for key in keys:
            if key in ds:
                val = ds[key] / 1_000_000
                f.write(f"{key_to_string(key)},{val:.3f}\n")

In [18]:
# Client
to_csv("csvs/client/tls.csv", client_tls.mean, CLIENT_KEYS)
to_csv("csvs/client/tls-ra.csv", client_tls_ra.mean, CLIENT_KEYS)
to_csv("csvs/client/tls-ech.csv", client_tls_ech.mean, CLIENT_KEYS)
to_csv("csvs/client/tls-ech-ra.csv", client_tls_ech_ra.mean, CLIENT_KEYS)

# Server
to_csv("csvs/server/tls.csv", server_tls.mean, SERVER_KEYS)
to_csv("csvs/server/tls-ra.csv", server_tls_ra.mean, SERVER_KEYS)