In [None]:
# | default_exp _components.benchmarking

In [None]:
# | export

from collections import deque
from datetime import datetime, timedelta
from statistics import mean
from typing import *

from fastkafka._components.logger import get_logger

In [None]:
from fastkafka._components.logger import suppress_timestamps

In [None]:
suppress_timestamps()

In [None]:
# | export

logger = get_logger("fastkafka.benchmark")

In [None]:
# | export


def _benchmark(
    interval: Union[int, timedelta] = 1,
    *,
    sliding_window_size: Optional[int] = None,
    func_name: str,
    benchmark_results: Dict[str, Dict[str, Any]],
) -> None:
    """Used to record the benchmark results(throughput, average throughput, standard deviation) of a given function

    Args:
        interval: the time interval after which the benchmark results are logged.
        sliding_window_size: the maximum number of benchmark results to use to calculate average throughput and standard deviation.
        func_name: the name of the function to be benchmarked.
        benchmark_results: a dictionary containing the benchmark results of all functions.
    """
    if isinstance(interval, int):
        interval = timedelta(seconds=interval)
    if func_name not in benchmark_results:
        benchmark_results[func_name] = {
            "count": 0,
            "last_count": 0,
            "start": None,
            "last_start": None,
            "history": [],
        }
        if sliding_window_size is not None:
            benchmark_results[func_name]["history"] = deque(maxlen=sliding_window_size)

    benchmark_results[func_name]["count"] += 1

    if benchmark_results[func_name]["count"] == 1:
        benchmark_results[func_name]["start"] = benchmark_results[func_name][
            "last_start"
        ] = datetime.utcnow()

    diff = datetime.utcnow() - benchmark_results[func_name]["last_start"]
    if diff >= interval:
        throughput = (
            benchmark_results[func_name]["count"]
            - benchmark_results[func_name]["last_count"]
        ) / (diff / timedelta(seconds=1))
        log_msg = f"Throughput = {throughput:5,.0f}"

        if sliding_window_size is not None:
            benchmark_results[func_name]["history"].append(throughput)

            log_msg += f", Avg throughput = {mean(benchmark_results[func_name]['history']):5,.0f}"
        #             if len(benchmark_results[func_name]["history"]) > 1:
        #                 log_msg += f", Standard deviation of throughput is {stdev(benchmark_results[func_name]['history']):5,.0f}"
        log_msg = (
            log_msg
            + f" - For {func_name}(interval={interval.seconds},{sliding_window_size=})"
        )
        logger.info(log_msg)

        benchmark_results[func_name]["last_start"] = datetime.utcnow()
        benchmark_results[func_name]["last_count"] = benchmark_results[func_name][
            "count"
        ]

In [None]:
interval = timedelta(seconds=1)
sliding_window_size = 5
func_name = "dummy_func"
benchmark_results = dict()

n = 15_000_000
for i in range(n):
    _benchmark(
        interval=interval,
        sliding_window_size=sliding_window_size,
        func_name=func_name,
        benchmark_results=benchmark_results,
    )

display(benchmark_results)

assert benchmark_results[func_name]["count"] == n
assert len(benchmark_results[func_name]["history"]) <= sliding_window_size, len(
    benchmark_results[func_name]["history"]
)

[INFO] fastkafka.benchmark: Throughput = 2,401,289, Avg throughput = 2,401,289 - For dummy_func(interval=1,sliding_window_size=5)
[INFO] fastkafka.benchmark: Throughput = 2,532,538, Avg throughput = 2,466,914 - For dummy_func(interval=1,sliding_window_size=5)
[INFO] fastkafka.benchmark: Throughput = 2,523,505, Avg throughput = 2,485,777 - For dummy_func(interval=1,sliding_window_size=5)
[INFO] fastkafka.benchmark: Throughput = 2,466,875, Avg throughput = 2,481,052 - For dummy_func(interval=1,sliding_window_size=5)
[INFO] fastkafka.benchmark: Throughput = 2,479,232, Avg throughput = 2,480,688 - For dummy_func(interval=1,sliding_window_size=5)
[INFO] fastkafka.benchmark: Throughput = 2,488,960, Avg throughput = 2,498,222 - For dummy_func(interval=1,sliding_window_size=5)


{'dummy_func': {'count': 15000000,
  'last_count': 14892399,
  'start': datetime.datetime(2023, 4, 7, 10, 48, 19, 887819),
  'last_start': datetime.datetime(2023, 4, 7, 10, 48, 25, 891228),
  'history': deque([2532538.0, 2523505.0, 2466875.0, 2479232.0, 2488960.0],
        maxlen=5)}}