In [None]:
# hide

%load_ext nb_black

<IPython.core.display.Javascript object>

In [None]:
# default_exp core

<IPython.core.display.Javascript object>

# module Core

> Core benchmark logic

In [None]:
# export

import os
import math
import hashlib

import pandas as pd

from pathlib import Path
from typing import Optional, Callable, Any

from pydantic import BaseModel

<IPython.core.display.Javascript object>

# Benchmark Server

In [None]:
# export


class BenchmarkServer(BaseModel):
    name: str = "base_server"

    def create(self):
        # do nothing
        pass

    def remove(self):
        # do nothing
        pass

<IPython.core.display.Javascript object>

## Usage

In [None]:
name = "Nginx Docker"
server = BenchmarkServer(name=name)

<IPython.core.display.Javascript object>

## Tests

In [None]:
assert server.name == name

<IPython.core.display.Javascript object>

# Checksum Mixin

In [None]:
# export


class CheckSumMixin:
    def calculate_checksum(self, content):
        return hashlib.md5(content).hexdigest()

<IPython.core.display.Javascript object>

## Usage

In [None]:
class FooBar(CheckSumMixin):
    def some_method_returning_checksum(self, content):
        return self.calculate_checksum(content)


foobar = FooBar()

<IPython.core.display.Javascript object>

## Tests

In [None]:
test_content = b"foobar"
assert (
    foobar.some_method_returning_checksum(test_content)
    == hashlib.md5(test_content).hexdigest()
)

<IPython.core.display.Javascript object>

# Benchmark Client

In [None]:
# export


class BenchmarkClient(CheckSumMixin, BaseModel):
    name: str = "base_client"

    def verify_checksums(self, benchmark_files, responses):
        checksum_lookup = {}
        for response in responses:
            url = str(response.url)
            checksum_lookup[url] = self.calculate_checksum(response.content)

        for bf in benchmark_files:
            assert bf.checksum == checksum_lookup.get(bf.url, None)

<IPython.core.display.Javascript object>

## Usage

In [None]:
name = "Httpx"
client = BenchmarkClient(name=name)

<IPython.core.display.Javascript object>

## Tests

In [None]:
assert client.name == name

<IPython.core.display.Javascript object>

# Benchmark Files

In [None]:
# export


class FilesystemCreator(CheckSumMixin):
    def checksum_for_path(self, path):
        with path.open("rb") as f:
            checksum = self.calculate_checksum(f.read())
        return checksum

    def __call__(self, path, size):
        path.parent.mkdir(parents=True, exist_ok=True)
        if not path.exists():
            with path.open("wb") as f:
                f.write(os.urandom(size))
        return self.checksum_for_path(path)


class BenchmarkFile(BaseModel):
    number: int
    base_path: str
    size: int
    data_root: str = "data"
    hostname: str = "localhost"
    port: int = 8000
    checksum: Optional[str] = None
    creator: Callable = FilesystemCreator()

    @property
    def filesystem_path(self):
        return Path(self.data_root) / self.base_path / str(self.number)

    def get_or_create(self):
        self.checksum = self.creator(self.filesystem_path, self.size)

    @property
    def path(self):
        return f"{self.data_root}/{self.base_path}/{self.number}"

    @property
    def host(self):
        return f"http://{self.hostname}:{self.port}"

    @property
    def url(self):
        return f"{self.host}/{self.path}"

<IPython.core.display.Javascript object>

## Usage

In [None]:
base_path = "3000000_2_12500000"
benchmark_file = BenchmarkFile(number=0, size=10 ** 6 * 3, base_path=base_path)
benchmark_file.url

'http://localhost:8000/data/3000000_2_12500000/0'

<IPython.core.display.Javascript object>

## Tests

In [None]:
assert "localhost" in benchmark_file.url
assert base_path in benchmark_file.url

<IPython.core.display.Javascript object>

In [None]:
file_size = 10 ** 6 * 3


class TestCreator:
    def __call__(self, path, size):
        assert size == file_size
        return "test_md5sum"


benchmark_file = BenchmarkFile(
    number=0, size=file_size, base_path=base_path, creator=TestCreator()
)
benchmark_file.get_or_create()

<IPython.core.display.Javascript object>

# Benchmark Rows

In [None]:
# export


class BenchmarkRow(BaseModel):
    file_size: int  # size of a single file
    duration: int = 30  # in seconds
    bandwidth: int = int(10 ** 9 / 8)  # in bytes per second
    files: list[BenchmarkFile] = []
    file_creator: Callable = FilesystemCreator()
    elapsed: Optional[float] = None
    data_root: str = "data"

    def __str__(self):
        return f"size: {self.file_size} duration: {self.duration} bandwidth: {self.bandwidth}"

    @property
    def base_path(self):
        return f"{self.file_size}_{self.duration}_{self.bandwidth}"

    @property
    def complete_size(self):
        return self.duration * self.bandwidth

    @property
    def number_of_files(self):
        return math.ceil(self.complete_size / self.file_size)

    @property
    def number_of_connections(self):
        return math.ceil(self.bandwidth / self.file_size)

    def get_bytes_per_second(self, elapsed):
        return self.complete_size / elapsed

    @property
    def bytes_per_second(self):
        return self.complete_size / self.elapsed

    def create_files(self):
        if len(self.files) > 0:
            return
        for num in range(self.number_of_files):
            benchmark_file = BenchmarkFile(
                number=num,
                base_path=self.base_path,
                size=self.file_size,
                creator=self.file_creator,
                data_root=self.data_root,
            )
            benchmark_file.get_or_create()
            self.files.append(benchmark_file)

<IPython.core.display.Javascript object>

In [None]:
# hide


class DummyCreator:
    def __call__(self, path, size):
        return "dummy"

<IPython.core.display.Javascript object>

## Usage

In [None]:
byte = 8
hundred_mbit = 10 ** 8
bandwidth = hundred_mbit / byte
duration = 2  # seconds
file_size = 10 ** 6 * 3  # 100MB

benchmark_row = BenchmarkRow(
    file_size=file_size,
    duration=duration,
    bandwidth=bandwidth,
    file_creator=DummyCreator(),
)
benchmark_row.create_files()
print(len(benchmark_row.files))

9


<IPython.core.display.Javascript object>

## Tests

In [None]:
byte = 8
gigabit = 10 ** 9

test_params = {
    "file_size": 10 ** 6 * 10,  # 10MB
    "duration": 30,
    "bandwidth": gigabit / byte,
    "file_creator": DummyCreator(),
}

test_benchmark_row = BenchmarkRow(**test_params)

<IPython.core.display.Javascript object>

In [None]:
assert test_benchmark_row.bandwidth == 125000000
assert test_benchmark_row.number_of_files == 375
assert test_benchmark_row.get_bytes_per_second(30.0) == test_benchmark_row.bandwidth
assert test_benchmark_row.number_of_connections == 13

<IPython.core.display.Javascript object>

In [None]:
test_benchmark_row.create_files()
assert len(test_benchmark_row.files) == test_benchmark_row.number_of_files

# assert we don't generate files twice
test_benchmark_row.create_files()
assert len(test_benchmark_row.files) == test_benchmark_row.number_of_files

<IPython.core.display.Javascript object>

# Benchmark Result

In [None]:
# export


def convert_size(size_bytes):
    if size_bytes == 0:
        return "0B"
    size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
    i = int(math.floor(math.log(size_bytes, 1024)))
    p = math.pow(1024, i)
    s = round(size_bytes / p, 2)
    return s, size_name[i]


class BenchmarkResult(BaseModel):
    server: str
    client: str
    file_size: int
    elapsed: float
    complete_size: int
    platform: str

    def make_readable(self, size_in_bytes):
        size, unit = convert_size(size_in_bytes)
        return f"{size}{unit}"

    @property
    def readable_file_size(self):
        return self.make_readable(self.file_size)

    @property
    def bytes_per_second(self):
        return self.complete_size / self.elapsed

    @property
    def readable_bytes_per_second(self):
        return self.make_readable(self.bytes_per_second)

    def dict(self):
        _dict = super().dict()
        return {
            **super().dict(),
            "file_size_h": self.readable_file_size,
            "bytes_per_second": self.bytes_per_second,
            "bytes_per_second_h": self.readable_bytes_per_second,
        }

<IPython.core.display.Javascript object>

## Usage

In [None]:
file_size = 10 ** 6
complete_size = 100 * file_size
result = BenchmarkResult(
    server="nginx",
    client="httpx",
    file_size=file_size,
    elapsed=3.0,
    complete_size=complete_size,
    platform="x86_64",
)
print(result.dict())

{'server': 'nginx', 'client': 'httpx', 'file_size': 1000000, 'elapsed': 3.0, 'complete_size': 100000000, 'platform': 'x86_64', 'file_size_h': '976.56KB', 'bytes_per_second': 33333333.333333332, 'bytes_per_second_h': '31.79MB'}


<IPython.core.display.Javascript object>

## Tests

In [None]:
assert result.readable_bytes_per_second == "31.79MB"

<IPython.core.display.Javascript object>

# Core Benchmark Model

In [None]:
# export
import cpuinfo
import platform


class Benchmark(BaseModel):
    duration: int = 30  # in seconds
    bandwidth: int = int(10 ** 9 / 8)  # in bytes per second
    file_sizes: list[int] = [10 ** 7, 10 ** 6, 10 ** 5]
    rows: list[BenchmarkRow] = []
    file_creator: Callable = FilesystemCreator()
    platform: Optional[str] = None
    cpuinfo: Optional[dict] = None
    servers: list[BenchmarkServer] = []
    clients: list[BenchmarkClient] = []
    results: list[BenchmarkResult] = []

    def create_row_from_file_size(self, file_size):
        do_not_copy = {"rows", "file_sizes"}
        kwargs = {k: v for k, v in dict(self).items() if k not in do_not_copy}
        br = BenchmarkRow(file_size=file_size, **kwargs)
        br.create_files()
        return br

    def create_rows(self):
        if len(self.rows) > 0:
            # benchmark rows were already created
            return

        # create a row for each file_size
        for file_size in self.file_sizes:
            self.rows.append(self.create_row_from_file_size(file_size))

    def test_server_with_client(self, server, client):
        for benchmark_row in self.rows:
            elapsed = client.measure(benchmark_row)
            result = BenchmarkResult(
                server=server.name,
                client=client.name,
                file_size=benchmark_row.file_size,
                elapsed=elapsed,
                complete_size=benchmark_row.complete_size,
                platform=self.platform,
            )
            self.results.append(result)

    def collect_information_about_platform(self):
        self.platform = platform.machine()
        self.cpuinfo = cpuinfo.get_cpu_info()

    def run(self):
        self.collect_information_about_platform()
        for server in self.servers:
            # start with servers, because they are more expensive to create
            server.start()
            for client in self.clients:
                self.test_server_with_client(server, client)
            server.stop()

    @property
    def results_frame(self):
        return pd.DataFrame([r.dict() for r in self.results])

<IPython.core.display.Javascript object>

In [None]:
# hide


class DummyClient(BenchmarkClient):
    measured: bool = False

    def measure(self, benchmark_row):
        self.measured = True
        return 2.0


class DummyServer(BenchmarkServer):
    started: bool = False
    stopped: bool = False

    def start(self):
        self.started = True

    def stop(self):
        self.stopped = True

<IPython.core.display.Javascript object>

## Usage

In [None]:
byte = 8
hundred_mbit = 10 ** 8
bandwidth = hundred_mbit / byte
duration = 2  # seconds
file_size = 10 ** 6 * 3  # 100MB
file_sizes = [10 ** 7, 10 ** 6, 10 ** 5]

benchmark = Benchmark(
    duration=duration,
    bandwidth=bandwidth,
    file_creator=DummyCreator(),
    file_sizes=file_sizes,
    servers=[DummyServer(name="Nginx")],
    clients=[DummyClient(name="Httpx")],
)
benchmark.create_rows()
print(len(benchmark.rows))

benchmark.run()
print(benchmark.results)

3
[BenchmarkResult(server='Nginx', client='Httpx', file_size=10000000, elapsed=2.0, complete_size=25000000, platform='x86_64'), BenchmarkResult(server='Nginx', client='Httpx', file_size=1000000, elapsed=2.0, complete_size=25000000, platform='x86_64'), BenchmarkResult(server='Nginx', client='Httpx', file_size=100000, elapsed=2.0, complete_size=25000000, platform='x86_64')]


<IPython.core.display.Javascript object>

In [None]:
benchmark.results_frame

Unnamed: 0,server,client,file_size,elapsed,complete_size,platform,file_size_h,bytes_per_second,bytes_per_second_h
0,Nginx,Httpx,10000000,2.0,25000000,x86_64,9.54MB,12500000.0,11.92MB
1,Nginx,Httpx,1000000,2.0,25000000,x86_64,976.56KB,12500000.0,11.92MB
2,Nginx,Httpx,100000,2.0,25000000,x86_64,97.66KB,12500000.0,11.92MB


<IPython.core.display.Javascript object>

## Tests

In [None]:
byte = 8
gigabit = 10 ** 9
file_sizes = [10 ** 7, 10 ** 6, 10 ** 5]

test_params = {
    "duration": 30,
    "bandwidth": gigabit / byte,
    "file_creator": DummyCreator(),
    "file_sizes": file_sizes,
}

test_benchmark = Benchmark(**test_params)

test_benchmark.create_rows()
assert len(test_benchmark.rows) == len(file_sizes)

<IPython.core.display.Javascript object>

In [None]:
class TestClient(BenchmarkClient):
    def measure(self, benchmark_row):
        print("measure_benchmark_row: ", benchmark_row)
        return 2.0


class TestServer(BenchmarkServer):
    pass


test_params = {
    **test_params,
    "clients": [DummyClient(name="foo")],
    "servers": [DummyServer(name="bar")],
}
test_benchmark = Benchmark(**test_params)
test_benchmark.create_rows()
test_benchmark.run()

<IPython.core.display.Javascript object>

In [None]:
assert len(test_benchmark.results) == len(test_benchmark.rows)

<IPython.core.display.Javascript object>

In [None]:
assert test_benchmark.servers[0].started
assert test_benchmark.servers[0].stopped

<IPython.core.display.Javascript object>

In [None]:
assert test_benchmark.platform == platform.machine()
assert

<IPython.core.display.Javascript object>

In [None]:
import cpuinfo

cpuinfo.get_cpu_info()

{'python_version': '3.9.1.final.0 (64 bit)',
 'cpuinfo_version': [7, 0, 0],
 'cpuinfo_version_string': '7.0.0',
 'arch': 'X86_64',
 'bits': 64,
 'count': 12,
 'arch_string_raw': 'x86_64',
 'vendor_id_raw': 'GenuineIntel',
 'brand_raw': 'Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz',
 'hz_advertised_friendly': '2.6000 GHz',
 'hz_actual_friendly': '2.6000 GHz',
 'hz_advertised': [2600000000, 0],
 'hz_actual': [2600000000, 0],
 'l2_cache_size': 262144,
 'stepping': 10,
 'model': 158,
 'family': 6,
 'flags': ['1gbpage',
  '3dnowprefetch',
  'abm',
  'acpi',
  'adx',
  'aes',
  'apic',
  'avx',
  'avx1.0',
  'avx2',
  'bmi1',
  'bmi2',
  'clflush',
  'clflushopt',
  'clfsh',
  'clfsopt',
  'cmov',
  'cx16',
  'cx8',
  'de',
  'ds',
  'ds_cpl',
  'dscpl',
  'dtes64',
  'dts',
  'em64t',
  'erms',
  'est',
  'f16c',
  'fma',
  'fpu',
  'fpu_csds',
  'fxsr',
  'ht',
  'htt',
  'ibrs',
  'intel_pt',
  'invpcid',
  'ipt',
  'l1df',
  'lahf',
  'lahf_lm',
  'lzcnt',
  'mca',
  'mce',
  'mdclear',
  '

<IPython.core.display.Javascript object>

# Export

In [None]:
from nbdev.export import notebook2script

notebook2script()

Converted 00_core.ipynb.
Converted 01_serve_files_uvicorn.ipynb.
Converted 02_benchmark_clients.ipynb.
Converted 03_run_benchmark.ipynb.
Converted 04_use_nginx_to_serve_files.ipynb.
Converted 05_run_wrk_benchmark.ipynb.
Converted 06_fastapi_uvicorn_server.ipynb.
Converted index.ipynb.


<IPython.core.display.Javascript object>