In [None]:
from pprint import pprint

%load_ext nb_black

<IPython.core.display.Javascript object>

In [None]:
# default_exp legacy_core

<IPython.core.display.Javascript object>

# module Core

> Core benchmark logic

In [None]:
# export

import os
import math
import json
import hashlib

import pandas as pd

from pathlib import Path
from urllib.parse import urljoin
from typing import Optional, Callable, Any

from pydantic import BaseModel

from will_it_saturate.epochs import Epoch
from will_it_saturate.results import Result
from will_it_saturate.servers import BaseServer
from will_it_saturate.clients import BaseClient
from will_it_saturate.files import BenchmarkFile, FILE_CREATORS
from will_it_saturate.repositories import BaseRepository, InMemoryRepository

<IPython.core.display.Javascript object>

## Tests

In [None]:
class TestClient(BaseClient):
    measured: bool = False

    def measure(self, benchmark_row):
        self.measured = True
        print("measure_benchmark_row: ", benchmark_row)
        return 2.0


class TestServer(BaseServer):
    started: bool = False
    stopped: bool = False

    def start(self):
        self.started = True

    def stop(self):
        self.stopped = True

<IPython.core.display.Javascript object>

# Core Benchmark Model

In [None]:
# export
import cpuinfo
import platform
import subprocess

from pathlib import Path
from functools import cache


def get_macos_machine_id():
    kwargs = {"capture_output": True, "text": True}
    output = subprocess.run(
        [
            "/usr/sbin/system_profiler",
            "SPHardwareDataType",
        ],
        **kwargs,
    )
    machine_id = None
    for line in output.stdout.split("\n"):
        if "Serial Number" in line:
            machine_id = line.split()[-1]
    return machine_id


def get_linux_machine_id():
    machine_id = "linux_dummy"
    with Path("/etc/machine-id").open() as f:
        machine_id = f.read().rstrip()
    return machine_id


@cache
def get_machine_id():
    os = platform.platform().lower().split("-")[0]
    os_lookup = {"macos": get_macos_machine_id, "linux": get_linux_machine_id}
    return os_lookup[os]()


class Benchmark(BaseModel):
    duration: int = 30  # in seconds
    bandwidth: int = int(10 ** 9 / 8)  # in bytes per second
    file_sizes: list[int] = [10 ** 7, 10 ** 6, 10 ** 5]
    epochs: list[Epoch] = []
    file_creator_name: str = "filesystem"
    uname: Optional[Any] = platform.uname()
    cpuinfo: Optional[dict] = cpuinfo.get_cpu_info()
    servers: list[BaseServer] = []
    clients: list[BaseClient] = []
    results: list[Result] = []
    repository: Optional[BaseRepository] = None
    machine_id: str = get_machine_id()

    @property
    def uname_json(self):
        return json.dumps(self.uname)

    def __hash__(self):
        return hash(self.machine_id)

    def __eq__(self, other):
        self.machine_id == other.machine_id

    def create_epoch_from_file_size(self, file_size):
        do_not_copy = {
            "rows",
            "file_sizes",
            "servers",
            "clients",
            "results",
            "repository",
        }
        kwargs = {k: v for k, v in dict(self).items() if k not in do_not_copy}
        epoch = Epoch(file_size=file_size, **kwargs)
        epoch.create_files()
        return epoch

    def create_epochs(self):
        if len(self.epochs) > 0:
            # benchmark rows were already created
            return

        # create a row for each file_size
        for file_size in self.file_sizes:
            self.epochs.append(self.create_epoch_from_file_size(file_size))

    def build_empty_result(self, epoch, server, client):
        return Result(
            server=server.name,
            client=client.name,
            file_size=epoch.file_size,
            elapsed=elapsed,
            complete_size=epoch.complete_size,
            platform=self.uname.machine,
        )

    def test_server_with_client(self, server, client):
        for epoch in self.epochs:
            result = Result.build_empty_result(epoch, server, client)
            if (
                self.repository is not None
                and (
                    already_measured := self.repository.get_result(self, result)
                ).elapsed
                is not None
            ):
                print("already measured: ", already_measured)
                result = already_measured
            else:
                if not server.started:
                    server.start()
                result.elapsed = client.measure(epoch)
                if self.repository is not None:
                    self.repository.add_result(self, result)
                print("measured: ", result)
            self.results.append(result)

    def run(self):
        for server in self.servers:
            # start with servers, because they are more expensive to create
            print(f"server: {server}")
            for client in self.clients:
                self.test_server_with_client(server, client)
            if server.started:
                server.stop()

    def json(self):
        # return super().json(exclude={"rows", "repository"})
        fields = {
            "duration",
            "bandwidth",
            "cpuinfo",
        }
        return super().json(include=fields)

    @property
    def results_frame(self):
        return pd.DataFrame([r.dict_with_properties() for r in self.results])

<IPython.core.display.Javascript object>

## Usage

In [None]:
# dont_test


def create_file(path, size):
    return "test_md5sum"


FILE_CREATORS["stub_creator"] = create_file


byte = 8
hundred_mbit = 10 ** 8
bandwidth = hundred_mbit / byte
duration = 2  # seconds
file_size = 10 ** 6 * 3  # 100MB
file_sizes = [10 ** 7, 10 ** 6, 10 ** 5]

benchmark = Benchmark(
    duration=duration,
    bandwidth=bandwidth,
    file_creator_name="stub_creator",
    file_sizes=file_sizes,
    servers=[TestServer(name="Nginx")],
    clients=[TestClient(name="Httpx")],
    repository=InMemoryRepository(),
)
benchmark.create_epochs()
print(len(benchmark.epochs))

# benchmark.run()
# pprint(benchmark.results)
# benchmark.results_frame

3


<IPython.core.display.Javascript object>

## Tests

In [None]:
from collections import namedtuple


def create_file(path, size):
    return "test_md5sum"


FILE_CREATORS["stub_creator"] = create_file


TestPlatform = namedtuple("TestPlatform", ["machine"])

byte = 8
gigabit = 10 ** 9
file_sizes = [10 ** 7, 10 ** 6, 10 ** 5]

test_params = {
    "duration": 3,
    "bandwidth": gigabit / byte / 10,  # divided by ten for test duration
    "file_creator_name": "stub_creator",
    "file_sizes": file_sizes,
    "cpuinfo": {"python_version": 4.0},
    "uname": TestPlatform("M3"),
    "repository": InMemoryRepository(),
}

test_benchmark = Benchmark(**test_params)

test_benchmark.create_epochs()
assert len(test_benchmark.epochs) == len(file_sizes)

<IPython.core.display.Javascript object>

In [None]:
%%time

class TestClient(BaseClient):
    measured: bool = False

    def measure(self, benchmark_row):
        self.measured = True
        print("measure_benchmark_row: ", benchmark_row)
        return 2.0


class TestServer(BaseServer):
    started: bool = False
    stopped: bool = False

    def start(self):
        self.started = True

    def stop(self):
        self.stopped = True


test_params = {
    **test_params,
    "clients": [TestClient(name="foo")],
    "servers": [TestServer(name="bar")],
}
test_benchmark = Benchmark(**test_params)
test_benchmark.create_epochs()
# test_benchmark.run()

CPU times: user 16.5 ms, sys: 536 µs, total: 17 ms
Wall time: 16.6 ms


<IPython.core.display.Javascript object>

In [None]:
# assert len(test_benchmark.results) == len(test_benchmark.epochs)

<IPython.core.display.Javascript object>

In [None]:
# assert test_benchmark.servers[0].started
# assert test_benchmark.servers[0].stopped

<IPython.core.display.Javascript object>

In [None]:
# assert test_benchmark.uname.machine == "M3"
# assert "python_version" in test_benchmark.cpuinfo

<IPython.core.display.Javascript object>

In [None]:
# assert "duration" in test_benchmark.json()

<IPython.core.display.Javascript object>

In [None]:
# assert len(test_benchmark.repository.results[test_benchmark]) == len(
#     test_benchmark.results
# )

<IPython.core.display.Javascript object>

In [None]:
# assert already measured results are not measured again
# test_benchmark.clients = [TestClient(name="foo")]
# test_benchmark.run()
# assert test_benchmark.clients[0].measured == True

<IPython.core.display.Javascript object>

# Export

In [None]:
# dont_test

from nbdev.export import notebook2script

notebook2script()

Converted 00_host.ipynb.
Converted 01_django_views.ipynb.
Converted 01_fastapi_views.ipynb.
Converted 01_file.ipynb.
Converted 05_epochs.ipynb.
Converted 06_results.ipynb.
Converted 25_core.ipynb.
Converted 27_control_server.ipynb.
Converted 28_control_client.ipynb.
Converted 29_control_cli.ipynb.
Converted 30_servers.ipynb.
Converted 31_local_servers.ipynb.
Converted 32_docker_servers.ipynb.
Converted 40_clients.ipynb.
Converted 41_benchmark_clients.ipynb.
Converted 70_repositories.ipynb.
Converted 71_sqlite_repository.ipynb.
Converted 90_benchmark_without_benchmark.ipynb.
Converted 98_run_benchmark.ipynb.
Converted index.ipynb.


<IPython.core.display.Javascript object>