In [None]:
# | default_exp _components.benchmarking

In [None]:
# | export

from collections import deque
from datetime import datetime, timedelta
from functools import wraps
from statistics import mean, stdev
from typing import *

from fastkafka._components.logger import get_logger

In [None]:
from fastkafka._components.logger import supress_timestamps

In [None]:
supress_timestamps()

In [None]:
# | export

logger = get_logger(__name__)

In [None]:
# | export


def _benchmark(
    interval: Union[int, timedelta] = 1,
    *,
    sliding_window_size: Optional[int] = None,
    func_name: str,
    benchmark_results: Dict[str, Dict[str, Any]],
) -> None:
    """Used to record the benchmark results(throughput, average throughput, standard deviation) of a given function

    Args:
        interval: the time interval after which the benchmark results are logged.
        sliding_window_size: the maximum number of benchmark results to use to calculate average throughput and standard deviation.
        func_name: the name of the function to be benchmarked.
        benchmark_results: a dictionary containing the benchmark results of all functions.
    """
    if isinstance(interval, int):
        interval = timedelta(seconds=interval)
    if func_name not in benchmark_results:
        benchmark_results[func_name] = {
            "count": 0,
            "last_count": 0,
            "start": None,
            "last_start": None,
            "history": [],
        }
        if sliding_window_size is not None:
            benchmark_results[func_name]["history"] = deque(maxlen=sliding_window_size)

    benchmark_results[func_name]["count"] += 1

    if benchmark_results[func_name]["count"] == 1:
        benchmark_results[func_name]["start"] = benchmark_results[func_name][
            "last_start"
        ] = datetime.utcnow()

    diff = datetime.utcnow() - benchmark_results[func_name]["last_start"]
    if diff >= interval:
        throughput = (
            benchmark_results[func_name]["count"]
            - benchmark_results[func_name]["last_count"]
        ) / (diff / timedelta(seconds=1))
        log_msg = f"For {func_name}({interval=},{sliding_window_size=}) - Throughput = {throughput:5,.0f}"

        if sliding_window_size is not None:
            benchmark_results[func_name]["history"].append(throughput)

            log_msg += f", Average throughput = {mean(benchmark_results[func_name]['history']):5,.0f}"
            if len(benchmark_results[func_name]["history"]) > 1:
                log_msg += f", Standard deviation of throughput is {stdev(benchmark_results[func_name]['history']):5,.0f}"
        logger.info(log_msg)

        benchmark_results[func_name]["last_start"] = datetime.utcnow()
        benchmark_results[func_name]["last_count"] = benchmark_results[func_name][
            "count"
        ]

In [None]:
interval = timedelta(seconds=1)
sliding_window_size = 5
func_name = "dummy_func"
benchmark_results = dict()

n = 15_000_000
for i in range(n):
    _benchmark(
        interval=interval,
        sliding_window_size=sliding_window_size,
        func_name=func_name,
        benchmark_results=benchmark_results,
    )

display(benchmark_results)

assert benchmark_results[func_name]["count"] == n
assert len(benchmark_results[func_name]["history"]) <= sliding_window_size, len(
    benchmark_results[func_name]["history"]
)

[INFO] __main__: For dummy_func(interval=datetime.timedelta(seconds=1),sliding_window_size=5) - Throughput = 1,726,401, Average throughput = 1,726,401
[INFO] __main__: For dummy_func(interval=datetime.timedelta(seconds=1),sliding_window_size=5) - Throughput = 1,718,501, Average throughput = 1,722,451, Standard deviation of throughput is 5,586
[INFO] __main__: For dummy_func(interval=datetime.timedelta(seconds=1),sliding_window_size=5) - Throughput = 1,697,432, Average throughput = 1,714,111, Standard deviation of throughput is 14,975
[INFO] __main__: For dummy_func(interval=datetime.timedelta(seconds=1),sliding_window_size=5) - Throughput = 1,936,963, Average throughput = 1,769,824, Standard deviation of throughput is 112,095
[INFO] __main__: For dummy_func(interval=datetime.timedelta(seconds=1),sliding_window_size=5) - Throughput = 2,340,899, Average throughput = 1,884,039, Standard deviation of throughput is 273,220
[INFO] __main__: For dummy_func(interval=datetime.timedelta(seconds=

{'dummy_func': {'count': 15000000,
  'last_count': 14070781,
  'start': datetime.datetime(2023, 4, 6, 12, 52, 32, 75791),
  'last_start': datetime.datetime(2023, 4, 6, 12, 52, 39, 80092),
  'history': deque([1697432.0, 1936963.0, 2340899.0, 2318432.0, 2332153.0],
        maxlen=5)}}