In [2]:
# hide

%load_ext nb_black

<IPython.core.display.Javascript object>

In [3]:
# default_exp core

<IPython.core.display.Javascript object>

# module Core

> Core benchmark logic

In [4]:
# export

import os
import math
import hashlib

from pathlib import Path
from typing import Optional, Callable, Any

from pydantic import BaseModel

<IPython.core.display.Javascript object>

# Benchmark Files

In [5]:
# export


def md5sum_for_path(path):
    with path.open("rb") as f:
        md5sum = hashlib.md5(f.read()).hexdigest()
    return md5sum


class FilesystemCreator:
    def __call__(self, path, size):
        path.parent.mkdir(parents=True, exist_ok=True)
        if not path.exists():
            with path.open("wb") as f:
                f.write(os.urandom(size))
        return md5sum_for_path(path)


class BenchmarkFile(BaseModel):
    number: int
    base_path: str
    size: int
    data_root: str = "data"
    hostname: str = "localhost"
    port: int = 8000
    md5sum: Optional[str] = None
    creator: Callable = FilesystemCreator()

    @property
    def filesystem_path(self):
        return Path(self.data_root) / self.base_path / str(self.number)

    def get_or_create(self):
        self.md5sum = self.creator(self.filesystem_path, self.size)

    @property
    def path(self):
        return f"{self.data_root}/{self.base_path}/{self.number}"

    @property
    def host(self):
        return f"http://{self.hostname}:{self.port}"

    @property
    def url(self):
        return f"{self.host}/{self.path}"

<IPython.core.display.Javascript object>

## Usage

In [6]:
base_path = "3000000_2_12500000"
benchmark_file = BenchmarkFile(number=0, size=10 ** 6 * 3, base_path=base_path)
benchmark_file.url

'http://localhost:8000/data/3000000_2_12500000/0'

<IPython.core.display.Javascript object>

## Tests

In [7]:
assert "localhost" in benchmark_file.url
assert base_path in benchmark_file.url

<IPython.core.display.Javascript object>

In [8]:
file_size = 10 ** 6 * 3


class TestCreator:
    def __call__(self, path, size):
        assert size == file_size
        return "test_md5sum"


benchmark_file = BenchmarkFile(
    number=0, size=file_size, base_path=base_path, creator=TestCreator()
)
benchmark_file.get_or_create()

<IPython.core.display.Javascript object>

# Benchmark Rows

In [21]:
# export


class BenchmarkRow(BaseModel):
    file_size: int  # size of a single file
    duration: int = 30  # in seconds
    bandwidth: int = int(10 ** 9 / 8)  # in bytes per second
    files: list[BenchmarkFile] = []
    file_creator: Callable = FilesystemCreator()
    elapsed: Optional[float] = None
    data_root: str = "data"

    def __str__(self):
        return f"size: {self.file_size} duration: {self.duration} bandwidth: {self.bandwidth}"

    @property
    def base_path(self):
        return f"{self.file_size}_{self.duration}_{self.bandwidth}"

    @property
    def complete_size(self):
        return self.duration * self.bandwidth

    @property
    def number_of_files(self):
        return math.ceil(self.complete_size / self.file_size)

    @property
    def number_of_connections(self):
        return math.ceil(self.bandwidth / self.file_size)

    def get_bytes_per_second(self, elapsed):
        return self.complete_size / elapsed

    @property
    def bytes_per_second(self):
        return self.complete_size / self.elapsed

    def create_files(self):
        if len(self.files) > 0:
            return
        for num in range(self.number_of_files):
            benchmark_file = BenchmarkFile(
                number=num,
                base_path=self.base_path,
                size=self.file_size,
                creator=self.file_creator,
                data_root=self.data_root,
            )
            benchmark_file.get_or_create()
            self.files.append(benchmark_file)

<IPython.core.display.Javascript object>

In [22]:
# hide


class DummyCreator:
    def __call__(self, path, size):
        return "dummy"

<IPython.core.display.Javascript object>

## Usage

In [23]:
byte = 8
hundred_mbit = 10 ** 8
bandwidth = hundred_mbit / byte
duration = 2  # seconds
file_size = 10 ** 6 * 3  # 100MB

benchmark_row = BenchmarkRow(
    file_size=file_size,
    duration=duration,
    bandwidth=bandwidth,
    file_creator=DummyCreator(),
)
benchmark_row.create_files()
print(len(benchmark_row.files))

9


<IPython.core.display.Javascript object>

## Tests

In [24]:
byte = 8
gigabit = 10 ** 9

test_params = {
    "file_size": 10 ** 6 * 10,  # 10MB
    "duration": 30,
    "bandwidth": gigabit / byte,
    "file_creator": DummyCreator(),
}

test_benchmark_row = BenchmarkRow(**test_params)

<IPython.core.display.Javascript object>

In [25]:
assert test_benchmark_row.bandwidth == 125000000
assert test_benchmark_row.number_of_files == 375
assert test_benchmark_row.get_bytes_per_second(30.0) == test_benchmark_row.bandwidth
assert test_benchmark_row.number_of_connections == 13

<IPython.core.display.Javascript object>

In [26]:
test_benchmark_row.create_files()
assert len(test_benchmark_row.files) == test_benchmark_row.number_of_files

# assert we don't generate files twice
test_benchmark_row.create_files()
assert len(test_benchmark_row.files) == test_benchmark_row.number_of_files

<IPython.core.display.Javascript object>

# Core Benchmark Model

In [27]:
# export


class Benchmark(BaseModel):
    duration: int = 30  # in seconds
    bandwidth: int = int(10 ** 9 / 8)  # in bytes per second
    rows: list[BenchmarkRow] = []
    file_creator: Callable = FilesystemCreator()

    def create_rows(self, file_sizes):
        if len(self.rows) > 0:
            return
        kwargs = dict(self)
        del kwargs["rows"]
        for file_size in file_sizes:
            benchmark_row = BenchmarkRow(file_size=file_size, **kwargs)
            benchmark_row.create_files()
            self.rows.append(benchmark_row)

<IPython.core.display.Javascript object>

## Usage

In [28]:
byte = 8
hundred_mbit = 10 ** 8
bandwidth = hundred_mbit / byte
duration = 2  # seconds
file_size = 10 ** 6 * 3  # 100MB

benchmark = Benchmark(
    duration=duration, bandwidth=bandwidth, file_creator=DummyCreator()
)
benchmark.create_rows([10 ** 7, 10 ** 6, 10 ** 5])
print(len(benchmark.rows))

3


<IPython.core.display.Javascript object>

## Tests

In [29]:
byte = 8
gigabit = 10 ** 9

test_params = {
    "duration": 30,
    "bandwidth": gigabit / byte,
    "file_creator": DummyCreator(),
}

test_benchmark = Benchmark(**test_params)

<IPython.core.display.Javascript object>

In [30]:
file_sizes = [10 ** 7, 10 ** 6, 10 ** 5]
test_benchmark.create_rows(file_sizes)
assert len(test_benchmark.rows) == len(file_sizes)

<IPython.core.display.Javascript object>

# Export

In [31]:
from nbdev.export import notebook2script

notebook2script()

Converted 00_core.ipynb.
Converted 01_serve_files.ipynb.
Converted 02_run_benchmark.ipynb.
Converted 03_create_files_old.ipynb.
Converted 04_use_nginx_to_serve_files.ipynb.
Converted 05_run_wrk_benchmark.ipynb.
Converted index.ipynb.


<IPython.core.display.Javascript object>