#### Imports, Setup and Helpers

In [None]:
import subprocess

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline
%config InlineBackend.figure_format='svg'

sns.set_theme()

In [None]:
def add_computed_properties(dataframe):
    dataframe["vec_size_bytes"] = dataframe["vec_size"] * dataframe["type_size"]
    # The reported chunk_size is given in multiples of the data size, even for
    # the binary chunking method.
    dataframe["chunk_size_bytes"] = dataframe["chunk_size"] * dataframe["type_size"]

    dataframe["runtime_normalized_seconds"] = 1e-6 * dataframe["runtime"] / dataframe["num_repetitions"]
    dataframe["bytes_per_second"] = dataframe["vec_size_bytes"] / dataframe["runtime_normalized_seconds"]

# Unconstrained local network

Performance via `localhost`, without any network limiting.

## Baseline

### localhost performance with `iperf`

We can use the `iperf` command line utility to get an upper limit for the performance of the `localhost` connection.

In [None]:
try:
    # Create a server listening on port 3000. We need to use `subprocess` manually to ensure the Jupyter process isn't blocked.
    iperf_server_proc = subprocess.Popen(['iperf', '-s', '-p', '3000'])
    # Create an iperf client
    !iperf -c localhost -p 3000 -f M
finally:
    # Shut down the server
    iperf_server_proc.terminate()

### memcpy performance

As a point of comparison, we check the performance of in-process copying.

In [None]:
df_memcpy = pd.read_csv("memcpy.csv")
df_memcpy["bytes_per_second"] = (df_memcpy["repetitions"] *  df_memcpy["size_bytes"]) / (df_memcpy["duration_nanoseconds"] * 1e-9)

In [None]:
fig, ax = plt.subplots(figsize=(12, 8))
sns.scatterplot(
    x="size_bytes",
    y="bytes_per_second",
    ax=ax,
    data=df_memcpy,
)
ax.set_xscale("log", base=2)

## Loading gRPC test results

In [None]:
df_sync = pd.read_csv("sync.csv", skipinitialspace=True)
df_sync["execution_mode"] = "sync"

In [None]:
df_threaded = pd.read_csv("threaded_client.csv", skipinitialspace=True)
df_threaded["execution_mode"] = "threaded"

In [None]:
df = pd.concat([df_sync, df_threaded])
df

In [None]:
add_computed_properties(df)

## Analysis

### Chunk sizes

For the data analysis, an important point is that the `GetArrayChunked` and `GetArrayBinaryChunked` methods have measurements for different chunk sizes. As such, the average runtimes are not directly comparable. We want to first see how chunk size affects performance.

In [None]:
# Get a dataframe containing only the methods with chunking
df_chunked = df[df["chunk_size"] != 0]

We show the performance as a function of chunk size, for reasonably large vector sizes.

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(15, 15), sharey=True)
for type_id, ax in zip(df_chunked["type_id"].unique(), axes.flatten()):
    sns.lineplot(
        ax=ax,
        x="chunk_size_bytes", 
        y="bytes_per_second", 
        hue="method_id", 
        data=df_chunked[(df_chunked["type_id"] == type_id) & (df_chunked["vec_size_bytes"] >= 1000000) & (df_chunked["execution_mode"] == "sync")]
    )
    ax.set_xscale("log", base=2)
    ax.set_title(type_id)

### Best performance

We now pick the best chunksize for each combination of parameters.

In [None]:
df_by_chunksize = df.groupby(["vec_size_bytes", "type_id", "method_id", "execution_mode", "chunk_size_bytes"])
df_aggregate = df_by_chunksize.agg({"runtime_normalized_seconds": np.mean}).reset_index()
grouped = df_aggregate.groupby(["vec_size_bytes", "type_id", "method_id", "execution_mode"])
df_best_chunksize = df_aggregate.loc[grouped["runtime_normalized_seconds"].idxmin()]

In [None]:
df_best_chunksize["bytes_per_second"] = df_best_chunksize["vec_size_bytes"] / df_best_chunksize["runtime_normalized_seconds"]

Performance of the different methods. For visibility, we only show a few data types.

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(18, 6), sharey=True)
for execution_mode, ax in zip(["sync", "threaded"], axes):
    sns.lineplot(
        x="vec_size_bytes", 
        y="bytes_per_second", 
        hue="method_id", 
        style="type_id",
        data=df_best_chunksize[(df_best_chunksize["execution_mode"] == execution_mode) & (df_best_chunksize["type_id"].isin(["int32", "sfixed32"]))],
        ax=ax
    )
    ax.set_title(f"execution_mode={execution_mode}")
    ax.set_xscale("log", base=2)

Best performing chunk size for the `sfixed64` data type in `sync` operation.

In [None]:
fig, ax = plt.subplots(figsize=(12, 10))
sns.lineplot(
    x="vec_size_bytes", 
    y="chunk_size_bytes", 
    hue="method_id", 
    ax=ax,
    data=df_best_chunksize[(df_best_chunksize["chunk_size_bytes"] != 0) & (df_best_chunksize["type_id"] == "sfixed64") & (df_best_chunksize["execution_mode"] == "sync")]
)
ax.set_xscale("log", base=2)
ax.set_yscale("log", base=2)

# Constrained network

## Loading results

In [None]:
df_bandwidth_constrained = pd.read_csv("rate_100mbit.csv", skipinitialspace=True)
add_computed_properties(df_bandwidth_constrained)

In [None]:
df_latency_constrained = pd.read_csv("delay_1ms.csv", skipinitialspace=True)
add_computed_properties(df_latency_constrained)

## Analysis

### Bandwidth-constrained

In [None]:
df_bandwidth_constrained.keys()

In [None]:
# 100 mbit constraint
bw_constraint = 100 * 1e6 / 8
fig, axes = plt.subplots(1, 3, figsize=(18, 8))
for ax, item_generator in zip(axes, df_bandwidth_constrained["item_generator_id"].unique()):
    ax.axhline(bw_constraint, color='k', linestyle='--')
    sns.lineplot(
        x="vec_size_bytes", 
        y="bytes_per_second", 
        hue="method_id", 
        style="type_id",
        data=df_bandwidth_constrained[(df_bandwidth_constrained["type_id"].isin(["int32", "sfixed32"])) & (df_bandwidth_constrained["item_generator_id"] == item_generator)],
        ax=ax
    )
    ax.set_xscale("log", base=2)
    ax.set_title(item_generator)

### Latency-constrained

In [None]:
fig, ax = plt.subplots(figsize=(12, 8))
sns.lineplot(
    x="vec_size_bytes", 
    y="bytes_per_second", 
    hue="method_id", 
    style="type_id",
    data=df_latency_constrained,
    ax=ax
)
ax.set_xscale("log", base=2)

In [None]:
fig, ax = plt.subplots(figsize=(12, 8))
latency_constraint = 1e-3
ax.axhline(latency_constraint, color='k', linestyle='--')
sns.lineplot(
    x="vec_size_bytes",
    y="runtime_normalized_seconds", 
    hue="method_id", 
    style="type_id",
    data=df_latency_constrained,
    ax=ax
)
ax.set_ylim(0, 10 * latency_constraint)
ax.set_xscale("log", base=2)