In [None]:
# | default_exp _helpers

# Helpers

In [None]:
# | export

import asyncio
import inspect
import json
import textwrap
from datetime import datetime, timedelta
from typing import *

import aiohttp
import anyio
from aiokafka import AIOKafkaConsumer, AIOKafkaProducer
from aiokafka.helpers import create_ssl_context
from aiokafka.structs import RecordMetadata
from IPython.display import Markdown

from fastkafka._components.helpers import in_notebook
from fastkafka._components.logger import get_logger
from fastkafka._components.meta import delegates

In [None]:
from fastkafka._components.logger import suppress_timestamps
from fastkafka.testing import ApacheKafkaBroker

In [None]:
# | export

if in_notebook():
    from tqdm.notebook import tqdm
else:
    from tqdm import tqdm

In [None]:
import pytest
from pydantic import BaseModel

In [None]:
# | export

logger = get_logger(__name__)

In [None]:
suppress_timestamps()
logger = get_logger(__name__, level=20)
logger.info("ok")

[INFO] __main__: ok


## Configuration conversions between Confluent and AIOKafka formats

In [None]:
# | export


@delegates(AIOKafkaProducer)
def aiokafka2confluent(**kwargs: Dict[str, Any]) -> Dict[str, Any]:
    """Converts AIOKafka styled config dictionary into Confluence styled one

    Returns (Dict[str, Any]):
        Confluence styled config dictionary

    Args:
        bootstrap_servers (str, list(str)): a ``host[:port]`` string or list of
            ``host[:port]`` strings that the producer should contact to
            bootstrap initial cluster metadata. This does not have to be the
            full node list.  It just needs to have at least one broker that will
            respond to a Metadata API Request. Default port is 9092. If no
            servers are specified, will default to ``localhost:9092``.
        client_id (str): a name for this client. This string is passed in
            each request to servers and can be used to identify specific
            server-side log entries that correspond to this client.
            Default: ``aiokafka-producer-#`` (appended with a unique number
            per instance)
        key_serializer (Callable): used to convert user-supplied keys to bytes
            If not :data:`None`, called as ``f(key),`` should return
            :class:`bytes`.
            Default: :data:`None`.
        value_serializer (Callable): used to convert user-supplied message
            values to :class:`bytes`. If not :data:`None`, called as
            ``f(value)``, should return :class:`bytes`.
            Default: :data:`None`.
        acks (Any): one of ``0``, ``1``, ``all``. The number of acknowledgments
            the producer requires the leader to have received before considering a
            request complete. This controls the durability of records that are
            sent. The following settings are common:

            * ``0``: Producer will not wait for any acknowledgment from the server
              at all. The message will immediately be added to the socket
              buffer and considered sent. No guarantee can be made that the
              server has received the record in this case, and the retries
              configuration will not take effect (as the client won't
              generally know of any failures). The offset given back for each
              record will always be set to -1.
            * ``1``: The broker leader will write the record to its local log but
              will respond without awaiting full acknowledgement from all
              followers. In this case should the leader fail immediately
              after acknowledging the record but before the followers have
              replicated it then the record will be lost.
            * ``all``: The broker leader will wait for the full set of in-sync
              replicas to acknowledge the record. This guarantees that the
              record will not be lost as long as at least one in-sync replica
              remains alive. This is the strongest available guarantee.

            If unset, defaults to ``acks=1``. If `enable_idempotence` is
            :data:`True` defaults to ``acks=all``
        compression_type (str): The compression type for all data generated by
            the producer. Valid values are ``gzip``, ``snappy``, ``lz4``, ``zstd``
            or :data:`None`.
            Compression is of full batches of data, so the efficacy of batching
            will also impact the compression ratio (more batching means better
            compression). Default: :data:`None`.
        max_batch_size (int): Maximum size of buffered data per partition.
            After this amount :meth:`send` coroutine will block until batch is
            drained.
            Default: 16384
        linger_ms (int): The producer groups together any records that arrive
            in between request transmissions into a single batched request.
            Normally this occurs only under load when records arrive faster
            than they can be sent out. However in some circumstances the client
            may want to reduce the number of requests even under moderate load.
            This setting accomplishes this by adding a small amount of
            artificial delay; that is, if first request is processed faster,
            than `linger_ms`, producer will wait ``linger_ms - process_time``.
            Default: 0 (i.e. no delay).
        partitioner (Callable): Callable used to determine which partition
            each message is assigned to. Called (after key serialization):
            ``partitioner(key_bytes, all_partitions, available_partitions)``.
            The default partitioner implementation hashes each non-None key
            using the same murmur2 algorithm as the Java client so that
            messages with the same key are assigned to the same partition.
            When a key is :data:`None`, the message is delivered to a random partition
            (filtered to partitions with available leaders only, if possible).
        max_request_size (int): The maximum size of a request. This is also
            effectively a cap on the maximum record size. Note that the server
            has its own cap on record size which may be different from this.
            This setting will limit the number of record batches the producer
            will send in a single request to avoid sending huge requests.
            Default: 1048576.
        metadata_max_age_ms (int): The period of time in milliseconds after
            which we force a refresh of metadata even if we haven't seen any
            partition leadership changes to proactively discover any new
            brokers or partitions. Default: 300000
        request_timeout_ms (int): Produce request timeout in milliseconds.
            As it's sent as part of
            :class:`~kafka.protocol.produce.ProduceRequest` (it's a blocking
            call), maximum waiting time can be up to ``2 *
            request_timeout_ms``.
            Default: 40000.
        retry_backoff_ms (int): Milliseconds to backoff when retrying on
            errors. Default: 100.
        api_version (str): specify which kafka API version to use.
            If set to ``auto``, will attempt to infer the broker version by
            probing various APIs. Default: ``auto``
        security_protocol (str): Protocol used to communicate with brokers.
            Valid values are: ``PLAINTEXT``, ``SSL``. Default: ``PLAINTEXT``.
            Default: ``PLAINTEXT``.
        ssl_context (ssl.SSLContext): pre-configured :class:`~ssl.SSLContext`
            for wrapping socket connections. Directly passed into asyncio's
            :meth:`~asyncio.loop.create_connection`. For more
            information see :ref:`ssl_auth`.
            Default: :data:`None`
        connections_max_idle_ms (int): Close idle connections after the number
            of milliseconds specified by this config. Specifying :data:`None` will
            disable idle checks. Default: 540000 (9 minutes).
        enable_idempotence (bool): When set to :data:`True`, the producer will
            ensure that exactly one copy of each message is written in the
            stream. If :data:`False`, producer retries due to broker failures,
            etc., may write duplicates of the retried message in the stream.
            Note that enabling idempotence acks to set to ``all``. If it is not
            explicitly set by the user it will be chosen. If incompatible
            values are set, a :exc:`ValueError` will be thrown.
            New in version 0.5.0.
        sasl_mechanism (str): Authentication mechanism when security_protocol
            is configured for ``SASL_PLAINTEXT`` or ``SASL_SSL``. Valid values
            are: ``PLAIN``, ``GSSAPI``, ``SCRAM-SHA-256``, ``SCRAM-SHA-512``,
            ``OAUTHBEARER``.
            Default: ``PLAIN``
        sasl_plain_username (str): username for SASL ``PLAIN`` authentication.
            Default: :data:`None`
        sasl_plain_password (str): password for SASL ``PLAIN`` authentication.
            Default: :data:`None`
        sasl_oauth_token_provider (: class:`~aiokafka.abc.AbstractTokenProvider`):
            OAuthBearer token provider instance. (See
            :mod:`kafka.oauth.abstract`).
            Default: :data:`None`
    """
    confluent_config = {k.replace("_", "."): v for k, v in kwargs.items()}
    for k1, k2 in zip(
        ["sasl.plain.username", "sasl.plain.password"],
        ["sasl.username", "sasl.password"],
    ):
        if k1 in confluent_config:
            confluent_config[k2] = confluent_config.pop(k1)

    if "ssl.context" in confluent_config:
        confluent_config.pop("ssl.context")

    return confluent_config

In [None]:
ssl_context = create_ssl_context()

aiokafka_config = {
    "bootstrap_servers": f"kafka.staging.airt:9092",
    "group_id": "kafka_consume_group",
    "auto_offset_reset": "earliest",
    "security_protocol": "SASL_SSL",
    "sasl_mechanism": "PLAIN",
    "sasl_plain_username": "myname",
    "sasl_plain_password": "*************",
    "ssl_context": create_ssl_context(),
}

expected = {
    "bootstrap.servers": "kafka.staging.airt:9092",
    "group.id": "kafka_consume_group",
    "auto.offset.reset": "earliest",
    "security.protocol": "SASL_SSL",
    "sasl.mechanism": "PLAIN",
    "sasl.username": "myname",
    "sasl.password": "*************",
}

confluent_config = aiokafka2confluent(**aiokafka_config)
assert confluent_config == expected

In [None]:
# | export


def confluent2aiokafka(confluent_config: Dict[str, Any]) -> Dict[str, Any]:
    """Converts AIOKafka styled config dictionary into Confluence styled one

    Args:
        confluent_config: Confluence styled config dictionary

    Returns:
        AIOKafka styled config dictionary
    """

    aiokafka_config = {k.replace(".", "_"): v for k, v in confluent_config.items()}
    for k1, k2 in zip(
        ["sasl_username", "sasl_password"],
        ["sasl_plain_username", "sasl_plain_password"],
    ):
        if k1 in aiokafka_config:
            aiokafka_config[k2] = aiokafka_config.pop(k1)

    if "sasl_plain_username" in aiokafka_config:
        aiokafka_config["ssl.context"] = (create_ssl_context(),)

    return aiokafka_config

In [None]:
confluent_config = {
    "bootstrap.servers": "kafka.staging.airt:9092",
    "group.id": "kafka_consume_group",
    "auto.offset.reset": "earliest",
    "security.protocol": "SASL_SSL",
    "sasl.mechanism": "PLAIN",
    "sasl.username": "myname",
    "sasl.password": "*************",
}

expected = {
    "bootstrap_servers": "kafka.staging.airt:9092",
    "group_id": "kafka_consume_group",
    "auto_offset_reset": "earliest",
    "security_protocol": "SASL_SSL",
    "sasl_mechanism": "PLAIN",
    "sasl_plain_username": "myname",
    "sasl_plain_password": "*************",
}

aiokafka_config = confluent2aiokafka(confluent_config)

aiokafka_config.pop("ssl.context")

assert aiokafka_config == expected

## Producing and consuming messages

In [None]:
# | export


@delegates(AIOKafkaProducer)
async def produce_messages(  # type: ignore
    *,
    topic: str,
    msgs: List[Any],
    **kwargs: Dict[str, Any],
) -> List[RecordMetadata]:
    """Produces messages to Kafka topic

    Args:
        topic: Topic name
        msgs: a list of messages to produce
        bootstrap_servers (str, list(str)): a ``host[:port]`` string or list of
            ``host[:port]`` strings that the producer should contact to
            bootstrap initial cluster metadata. This does not have to be the
            full node list.  It just needs to have at least one broker that will
            respond to a Metadata API Request. Default port is 9092. If no
            servers are specified, will default to ``localhost:9092``.
        client_id (str): a name for this client. This string is passed in
            each request to servers and can be used to identify specific
            server-side log entries that correspond to this client.
            Default: ``aiokafka-producer-#`` (appended with a unique number
            per instance)
        key_serializer (Callable): used to convert user-supplied keys to bytes
            If not :data:`None`, called as ``f(key),`` should return
            :class:`bytes`.
            Default: :data:`None`.
        value_serializer (Callable): used to convert user-supplied message
            values to :class:`bytes`. If not :data:`None`, called as
            ``f(value)``, should return :class:`bytes`.
            Default: :data:`None`.
        acks (Any): one of ``0``, ``1``, ``all``. The number of acknowledgments
            the producer requires the leader to have received before considering a
            request complete. This controls the durability of records that are
            sent. The following settings are common:

            * ``0``: Producer will not wait for any acknowledgment from the server
              at all. The message will immediately be added to the socket
              buffer and considered sent. No guarantee can be made that the
              server has received the record in this case, and the retries
              configuration will not take effect (as the client won't
              generally know of any failures). The offset given back for each
              record will always be set to -1.
            * ``1``: The broker leader will write the record to its local log but
              will respond without awaiting full acknowledgement from all
              followers. In this case should the leader fail immediately
              after acknowledging the record but before the followers have
              replicated it then the record will be lost.
            * ``all``: The broker leader will wait for the full set of in-sync
              replicas to acknowledge the record. This guarantees that the
              record will not be lost as long as at least one in-sync replica
              remains alive. This is the strongest available guarantee.

            If unset, defaults to ``acks=1``. If `enable_idempotence` is
            :data:`True` defaults to ``acks=all``
        compression_type (str): The compression type for all data generated by
            the producer. Valid values are ``gzip``, ``snappy``, ``lz4``, ``zstd``
            or :data:`None`.
            Compression is of full batches of data, so the efficacy of batching
            will also impact the compression ratio (more batching means better
            compression). Default: :data:`None`.
        max_batch_size (int): Maximum size of buffered data per partition.
            After this amount :meth:`send` coroutine will block until batch is
            drained.
            Default: 16384
        linger_ms (int): The producer groups together any records that arrive
            in between request transmissions into a single batched request.
            Normally this occurs only under load when records arrive faster
            than they can be sent out. However in some circumstances the client
            may want to reduce the number of requests even under moderate load.
            This setting accomplishes this by adding a small amount of
            artificial delay; that is, if first request is processed faster,
            than `linger_ms`, producer will wait ``linger_ms - process_time``.
            Default: 0 (i.e. no delay).
        partitioner (Callable): Callable used to determine which partition
            each message is assigned to. Called (after key serialization):
            ``partitioner(key_bytes, all_partitions, available_partitions)``.
            The default partitioner implementation hashes each non-None key
            using the same murmur2 algorithm as the Java client so that
            messages with the same key are assigned to the same partition.
            When a key is :data:`None`, the message is delivered to a random partition
            (filtered to partitions with available leaders only, if possible).
        max_request_size (int): The maximum size of a request. This is also
            effectively a cap on the maximum record size. Note that the server
            has its own cap on record size which may be different from this.
            This setting will limit the number of record batches the producer
            will send in a single request to avoid sending huge requests.
            Default: 1048576.
        metadata_max_age_ms (int): The period of time in milliseconds after
            which we force a refresh of metadata even if we haven't seen any
            partition leadership changes to proactively discover any new
            brokers or partitions. Default: 300000
        request_timeout_ms (int): Produce request timeout in milliseconds.
            As it's sent as part of
            :class:`~kafka.protocol.produce.ProduceRequest` (it's a blocking
            call), maximum waiting time can be up to ``2 *
            request_timeout_ms``.
            Default: 40000.
        retry_backoff_ms (int): Milliseconds to backoff when retrying on
            errors. Default: 100.
        api_version (str): specify which kafka API version to use.
            If set to ``auto``, will attempt to infer the broker version by
            probing various APIs. Default: ``auto``
        security_protocol (str): Protocol used to communicate with brokers.
            Valid values are: ``PLAINTEXT``, ``SSL``. Default: ``PLAINTEXT``.
            Default: ``PLAINTEXT``.
        ssl_context (ssl.SSLContext): pre-configured :class:`~ssl.SSLContext`
            for wrapping socket connections. Directly passed into asyncio's
            :meth:`~asyncio.loop.create_connection`. For more
            information see :ref:`ssl_auth`.
            Default: :data:`None`
        connections_max_idle_ms (int): Close idle connections after the number
            of milliseconds specified by this config. Specifying :data:`None` will
            disable idle checks. Default: 540000 (9 minutes).
        enable_idempotence (bool): When set to :data:`True`, the producer will
            ensure that exactly one copy of each message is written in the
            stream. If :data:`False`, producer retries due to broker failures,
            etc., may write duplicates of the retried message in the stream.
            Note that enabling idempotence acks to set to ``all``. If it is not
            explicitly set by the user it will be chosen. If incompatible
            values are set, a :exc:`ValueError` will be thrown.
            New in version 0.5.0.
        sasl_mechanism (str): Authentication mechanism when security_protocol
            is configured for ``SASL_PLAINTEXT`` or ``SASL_SSL``. Valid values
            are: ``PLAIN``, ``GSSAPI``, ``SCRAM-SHA-256``, ``SCRAM-SHA-512``,
            ``OAUTHBEARER``.
            Default: ``PLAIN``
        sasl_plain_username (str): username for SASL ``PLAIN`` authentication.
            Default: :data:`None`
        sasl_plain_password (str): password for SASL ``PLAIN`` authentication.
            Default: :data:`None`
        sasl_oauth_token_provider (: class:`~aiokafka.abc.AbstractTokenProvider`):
            OAuthBearer token provider instance. (See
            :mod:`kafka.oauth.abstract`).
            Default: :data:`None`
    """
    p = AIOKafkaProducer(**kwargs)
    await p.start()

    try:

        def prepare_msg(msg: Any) -> bytes:
            if isinstance(msg, bytes):
                return msg
            elif isinstance(msg, str):
                return msg.encode("utf-8")
            elif hasattr(msg, "json"):
                return msg.json().encode("utf-8")  # type: ignore
            return json.dumps(msg).encode("utf-8")

        fx = [
            await p.send(topic, prepare_msg(msg))
            for msg in tqdm(msgs, desc=f"producing to '{topic}'")
        ]
        delivery = [await f for f in fx]
        return delivery
    finally:
        await p.stop()

In [None]:
# print(combine_params(produce_messages, AIOKafkaProducer).__doc__)

In [None]:
class Hello(BaseModel):
    msg: str


msgs_count = 120_000
msgs = (
    [b"Hello world bytes" for _ in range(msgs_count // 3)]
    + [f"Hello world as string for the {i+1}. time!" for i in range(msgs_count // 3)]
    + [
        Hello(msg="Hello workd as Pydantic object for the {i+1}. time!")
        for i in range(msgs_count // 3)
    ]
)
async with ApacheKafkaBroker(topics=["test_produce_messages"]) as bootstrap_server:
    delivery_report = await produce_messages(
        msgs=msgs, topic="test_produce_messages", bootstrap_servers=bootstrap_server
    )

[INFO] fastkafka._components.test_dependencies: Java is already installed.
[INFO] fastkafka._components.test_dependencies: But not exported to PATH, exporting...
[INFO] fastkafka._components.test_dependencies: Kafka is installed.
[INFO] fastkafka._components.test_dependencies: But not exported to PATH, exporting...
[INFO] fastkafka._testing.apache_kafka_broker: Starting zookeeper...
[INFO] fastkafka._testing.apache_kafka_broker: Starting kafka...
[INFO] fastkafka._testing.apache_kafka_broker: Local Kafka broker up and running on 127.0.0.1:9092


producing to 'test_produce_messages':   0%|          | 0/120000 [00:00<?, ?it/s]

[INFO] fastkafka._components._subprocess: terminate_asyncio_process(): Terminating the process 2864...
[INFO] fastkafka._components._subprocess: terminate_asyncio_process(): Process 2864 terminated.
[INFO] fastkafka._components._subprocess: terminate_asyncio_process(): Terminating the process 2490...
[INFO] fastkafka._components._subprocess: terminate_asyncio_process(): Process 2490 terminated.


In [None]:
# | export


@delegates(AIOKafkaConsumer)
async def consumes_messages(
    *,
    topic: str,
    msgs_count: int,
    **kwargs: Dict[str, Any],
) -> None:
    """Consumes messages
    Args:
        topic: Topic name
        msgs_count: number of messages to consume before returning
        *topics (list(str)): optional list of topics to subscribe to. If not set,
            call :meth:`.subscribe` or :meth:`.assign` before consuming records.
            Passing topics directly is same as calling :meth:`.subscribe` API.
        bootstrap_servers (str, list(str)): a ``host[:port]`` string (or list of
            ``host[:port]`` strings) that the consumer should contact to bootstrap
            initial cluster metadata.

            This does not have to be the full node list.
            It just needs to have at least one broker that will respond to a
            Metadata API Request. Default port is 9092. If no servers are
            specified, will default to ``localhost:9092``.
        client_id (str): a name for this client. This string is passed in
            each request to servers and can be used to identify specific
            server-side log entries that correspond to this client. Also
            submitted to :class:`~.consumer.group_coordinator.GroupCoordinator`
            for logging with respect to consumer group administration. Default:
            ``aiokafka-{version}``
        group_id (str or None): name of the consumer group to join for dynamic
            partition assignment (if enabled), and to use for fetching and
            committing offsets. If None, auto-partition assignment (via
            group coordinator) and offset commits are disabled.
            Default: None
        key_deserializer (Callable): Any callable that takes a
            raw message key and returns a deserialized key.
        value_deserializer (Callable, Optional): Any callable that takes a
            raw message value and returns a deserialized value.
        fetch_min_bytes (int): Minimum amount of data the server should
            return for a fetch request, otherwise wait up to
            `fetch_max_wait_ms` for more data to accumulate. Default: 1.
        fetch_max_bytes (int): The maximum amount of data the server should
            return for a fetch request. This is not an absolute maximum, if
            the first message in the first non-empty partition of the fetch
            is larger than this value, the message will still be returned
            to ensure that the consumer can make progress. NOTE: consumer
            performs fetches to multiple brokers in parallel so memory
            usage will depend on the number of brokers containing
            partitions for the topic.
            Supported Kafka version >= 0.10.1.0. Default: 52428800 (50 Mb).
        fetch_max_wait_ms (int): The maximum amount of time in milliseconds
            the server will block before answering the fetch request if
            there isn't sufficient data to immediately satisfy the
            requirement given by fetch_min_bytes. Default: 500.
        max_partition_fetch_bytes (int): The maximum amount of data
            per-partition the server will return. The maximum total memory
            used for a request ``= #partitions * max_partition_fetch_bytes``.
            This size must be at least as large as the maximum message size
            the server allows or else it is possible for the producer to
            send messages larger than the consumer can fetch. If that
            happens, the consumer can get stuck trying to fetch a large
            message on a certain partition. Default: 1048576.
        max_poll_records (int): The maximum number of records returned in a
            single call to :meth:`.getmany`. Defaults ``None``, no limit.
        request_timeout_ms (int): Client request timeout in milliseconds.
            Default: 40000.
        retry_backoff_ms (int): Milliseconds to backoff when retrying on
            errors. Default: 100.
        auto_offset_reset (str): A policy for resetting offsets on
            :exc:`.OffsetOutOfRangeError` errors: ``earliest`` will move to the oldest
            available message, ``latest`` will move to the most recent, and
            ``none`` will raise an exception so you can handle this case.
            Default: ``latest``.
        enable_auto_commit (bool): If true the consumer's offset will be
            periodically committed in the background. Default: True.
        auto_commit_interval_ms (int): milliseconds between automatic
            offset commits, if enable_auto_commit is True. Default: 5000.
        check_crcs (bool): Automatically check the CRC32 of the records
            consumed. This ensures no on-the-wire or on-disk corruption to
            the messages occurred. This check adds some overhead, so it may
            be disabled in cases seeking extreme performance. Default: True
        metadata_max_age_ms (int): The period of time in milliseconds after
            which we force a refresh of metadata even if we haven't seen any
            partition leadership changes to proactively discover any new
            brokers or partitions. Default: 300000
        partition_assignment_strategy (list): List of objects to use to
            distribute partition ownership amongst consumer instances when
            group management is used. This preference is implicit in the order
            of the strategies in the list. When assignment strategy changes:
            to support a change to the assignment strategy, new versions must
            enable support both for the old assignment strategy and the new
            one. The coordinator will choose the old assignment strategy until
            all members have been updated. Then it will choose the new
            strategy. Default: [:class:`.RoundRobinPartitionAssignor`]
        max_poll_interval_ms (int): Maximum allowed time between calls to
            consume messages (e.g., :meth:`.getmany`). If this interval
            is exceeded the consumer is considered failed and the group will
            rebalance in order to reassign the partitions to another consumer
            group member. If API methods block waiting for messages, that time
            does not count against this timeout. See `KIP-62`_ for more
            information. Default 300000
        rebalance_timeout_ms (int): The maximum time server will wait for this
            consumer to rejoin the group in a case of rebalance. In Java client
            this behaviour is bound to `max.poll.interval.ms` configuration,
            but as ``aiokafka`` will rejoin the group in the background, we
            decouple this setting to allow finer tuning by users that use
            :class:`.ConsumerRebalanceListener` to delay rebalacing. Defaults
            to ``session_timeout_ms``
        session_timeout_ms (int): Client group session and failure detection
            timeout. The consumer sends periodic heartbeats
            (`heartbeat.interval.ms`) to indicate its liveness to the broker.
            If no hearts are received by the broker for a group member within
            the session timeout, the broker will remove the consumer from the
            group and trigger a rebalance. The allowed range is configured with
            the **broker** configuration properties
            `group.min.session.timeout.ms` and `group.max.session.timeout.ms`.
            Default: 10000
        heartbeat_interval_ms (int): The expected time in milliseconds
            between heartbeats to the consumer coordinator when using
            Kafka's group management feature. Heartbeats are used to ensure
            that the consumer's session stays active and to facilitate
            rebalancing when new consumers join or leave the group. The
            value must be set lower than `session_timeout_ms`, but typically
            should be set no higher than 1/3 of that value. It can be
            adjusted even lower to control the expected time for normal
            rebalances. Default: 3000
        consumer_timeout_ms (int): maximum wait timeout for background fetching
            routine. Mostly defines how fast the system will see rebalance and
            request new data for new partitions. Default: 200
        api_version (str): specify which kafka API version to use.
            :class:`AIOKafkaConsumer` supports Kafka API versions >=0.9 only.
            If set to ``auto``, will attempt to infer the broker version by
            probing various APIs. Default: ``auto``
        security_protocol (str): Protocol used to communicate with brokers.
            Valid values are: ``PLAINTEXT``, ``SSL``. Default: ``PLAINTEXT``.
        ssl_context (ssl.SSLContext): pre-configured :class:`~ssl.SSLContext`
            for wrapping socket connections. Directly passed into asyncio's
            :meth:`~asyncio.loop.create_connection`. For more information see
            :ref:`ssl_auth`. Default: None.
        exclude_internal_topics (bool): Whether records from internal topics
            (such as offsets) should be exposed to the consumer. If set to True
            the only way to receive records from an internal topic is
            subscribing to it. Requires 0.10+ Default: True
        connections_max_idle_ms (int): Close idle connections after the number
            of milliseconds specified by this config. Specifying `None` will
            disable idle checks. Default: 540000 (9 minutes).
        isolation_level (str): Controls how to read messages written
            transactionally.

            If set to ``read_committed``, :meth:`.getmany` will only return
            transactional messages which have been committed.
            If set to ``read_uncommitted`` (the default), :meth:`.getmany` will
            return all messages, even transactional messages which have been
            aborted.

            Non-transactional messages will be returned unconditionally in
            either mode.

            Messages will always be returned in offset order. Hence, in
            `read_committed` mode, :meth:`.getmany` will only return
            messages up to the last stable offset (LSO), which is the one less
            than the offset of the first open transaction. In particular any
            messages appearing after messages belonging to ongoing transactions
            will be withheld until the relevant transaction has been completed.
            As a result, `read_committed` consumers will not be able to read up
            to the high watermark when there are in flight transactions.
            Further, when in `read_committed` the seek_to_end method will
            return the LSO. See method docs below. Default: ``read_uncommitted``
        sasl_mechanism (str): Authentication mechanism when security_protocol
            is configured for ``SASL_PLAINTEXT`` or ``SASL_SSL``. Valid values are:
            ``PLAIN``, ``GSSAPI``, ``SCRAM-SHA-256``, ``SCRAM-SHA-512``,
            ``OAUTHBEARER``.
            Default: ``PLAIN``
        sasl_plain_username (str): username for SASL ``PLAIN`` authentication.
            Default: None
        sasl_plain_password (str): password for SASL ``PLAIN`` authentication.
            Default: None
        sasl_oauth_token_provider (~aiokafka.abc.AbstractTokenProvider): OAuthBearer token provider instance. (See :mod:`kafka.oauth.abstract`).
            Default: None
    """
    consumer = AIOKafkaConsumer(topic, **kwargs)
    await consumer.start()
    try:
        with tqdm(total=msgs_count, desc=f"consuming from '{topic}'") as pbar:
            async for msg in consumer:
                pbar.update(1)
                if pbar.n >= pbar.total:
                    break
    finally:
        await consumer.stop()

In [None]:
# print(combine_params(consumes_messages, AIOKafkaConsumer).__doc__)

In [None]:
async with ApacheKafkaBroker(topics=["test_consume_messages"]) as bootstrap_server:
    async with anyio.create_task_group() as tg:
        tg.start_soon(
            lambda d: produce_messages(**d),
            dict(
                msgs=msgs,
                topic="test_consume_messages",
                bootstrap_servers=bootstrap_server,
            ),
        )
        tg.start_soon(
            lambda d: consumes_messages(**d),
            dict(
                msgs_count=int(len(msgs) * 0.9),
                topic="test_consume_messages",
                bootstrap_servers=bootstrap_server,
            ),
        )

[INFO] fastkafka._components.test_dependencies: Java is already installed.
[INFO] fastkafka._components.test_dependencies: Kafka is installed.
[INFO] fastkafka._testing.apache_kafka_broker: Starting zookeeper...
[INFO] fastkafka._testing.apache_kafka_broker: Starting kafka...
[INFO] fastkafka._testing.apache_kafka_broker: Local Kafka broker up and running on 127.0.0.1:9092
[INFO] aiokafka.consumer.subscription_state: Updating subscribed topics to: frozenset({'test_consume_messages'})


producing to 'test_consume_messages':   0%|          | 0/120000 [00:00<?, ?it/s]

[INFO] aiokafka.consumer.group_coordinator: Metadata for topic has changed from {} to {'test_consume_messages': 1}. 


consuming from 'test_consume_messages':   0%|          | 0/108000 [00:00<?, ?it/s]

[INFO] fastkafka._components._subprocess: terminate_asyncio_process(): Terminating the process 4073...
[INFO] fastkafka._components._subprocess: terminate_asyncio_process(): Process 4073 terminated.
[INFO] fastkafka._components._subprocess: terminate_asyncio_process(): Terminating the process 3701...
[INFO] fastkafka._components._subprocess: terminate_asyncio_process(): Process 3701 terminated.


In [None]:
# | export


@delegates(AIOKafkaConsumer)
@delegates(AIOKafkaProducer, keep=True)
async def produce_and_consume_messages(
    *,
    produce_topic: str,
    consume_topic: str,
    msgs: List[Any],
    msgs_count: int,
    **kwargs: Dict[str, Any],
) -> None:
    """produce_and_consume_messages

    Args:
        produce_topic: Topic name for producing messages
        consume_topic: Topic name for consuming messages
        msgs: a list of messages to produce
        msgs_count: number of messages to consume before returning
        bootstrap_servers (str, list(str)): a ``host[:port]`` string (or list of
            ``host[:port]`` strings) that the consumer should contact to bootstrap
            initial cluster metadata.

            This does not have to be the full node list.
            It just needs to have at least one broker that will respond to a
            Metadata API Request. Default port is 9092. If no servers are
            specified, will default to ``localhost:9092``.
        client_id (str): a name for this client. This string is passed in
            each request to servers and can be used to identify specific
            server-side log entries that correspond to this client. Also
            submitted to :class:`~.consumer.group_coordinator.GroupCoordinator`
            for logging with respect to consumer group administration. Default:
            ``aiokafka-{version}``
        group_id (str or None): name of the consumer group to join for dynamic
            partition assignment (if enabled), and to use for fetching and
            committing offsets. If None, auto-partition assignment (via
            group coordinator) and offset commits are disabled.
            Default: None
        key_deserializer (Callable): Any callable that takes a
            raw message key and returns a deserialized key.
        value_deserializer (Callable, Optional): Any callable that takes a
            raw message value and returns a deserialized value.
        fetch_min_bytes (int): Minimum amount of data the server should
            return for a fetch request, otherwise wait up to
            `fetch_max_wait_ms` for more data to accumulate. Default: 1.
        fetch_max_bytes (int): The maximum amount of data the server should
            return for a fetch request. This is not an absolute maximum, if
            the first message in the first non-empty partition of the fetch
            is larger than this value, the message will still be returned
            to ensure that the consumer can make progress. NOTE: consumer
            performs fetches to multiple brokers in parallel so memory
            usage will depend on the number of brokers containing
            partitions for the topic.
            Supported Kafka version >= 0.10.1.0. Default: 52428800 (50 Mb).
        fetch_max_wait_ms (int): The maximum amount of time in milliseconds
            the server will block before answering the fetch request if
            there isn't sufficient data to immediately satisfy the
            requirement given by fetch_min_bytes. Default: 500.
        max_partition_fetch_bytes (int): The maximum amount of data
            per-partition the server will return. The maximum total memory
            used for a request ``= #partitions * max_partition_fetch_bytes``.
            This size must be at least as large as the maximum message size
            the server allows or else it is possible for the producer to
            send messages larger than the consumer can fetch. If that
            happens, the consumer can get stuck trying to fetch a large
            message on a certain partition. Default: 1048576.
        max_poll_records (int): The maximum number of records returned in a
            single call to :meth:`.getmany`. Defaults ``None``, no limit.
        request_timeout_ms (int): Client request timeout in milliseconds.
            Default: 40000.
        retry_backoff_ms (int): Milliseconds to backoff when retrying on
            errors. Default: 100.
        auto_offset_reset (str): A policy for resetting offsets on
            :exc:`.OffsetOutOfRangeError` errors: ``earliest`` will move to the oldest
            available message, ``latest`` will move to the most recent, and
            ``none`` will raise an exception so you can handle this case.
            Default: ``latest``.
        enable_auto_commit (bool): If true the consumer's offset will be
            periodically committed in the background. Default: True.
        auto_commit_interval_ms (int): milliseconds between automatic
            offset commits, if enable_auto_commit is True. Default: 5000.
        check_crcs (bool): Automatically check the CRC32 of the records
            consumed. This ensures no on-the-wire or on-disk corruption to
            the messages occurred. This check adds some overhead, so it may
            be disabled in cases seeking extreme performance. Default: True
        metadata_max_age_ms (int): The period of time in milliseconds after
            which we force a refresh of metadata even if we haven't seen any
            partition leadership changes to proactively discover any new
            brokers or partitions. Default: 300000
        partition_assignment_strategy (list): List of objects to use to
            distribute partition ownership amongst consumer instances when
            group management is used. This preference is implicit in the order
            of the strategies in the list. When assignment strategy changes:
            to support a change to the assignment strategy, new versions must
            enable support both for the old assignment strategy and the new
            one. The coordinator will choose the old assignment strategy until
            all members have been updated. Then it will choose the new
            strategy. Default: [:class:`.RoundRobinPartitionAssignor`]
        max_poll_interval_ms (int): Maximum allowed time between calls to
            consume messages (e.g., :meth:`.getmany`). If this interval
            is exceeded the consumer is considered failed and the group will
            rebalance in order to reassign the partitions to another consumer
            group member. If API methods block waiting for messages, that time
            does not count against this timeout. See `KIP-62`_ for more
            information. Default 300000
        rebalance_timeout_ms (int): The maximum time server will wait for this
            consumer to rejoin the group in a case of rebalance. In Java client
            this behaviour is bound to `max.poll.interval.ms` configuration,
            but as ``aiokafka`` will rejoin the group in the background, we
            decouple this setting to allow finer tuning by users that use
            :class:`.ConsumerRebalanceListener` to delay rebalacing. Defaults
            to ``session_timeout_ms``
        session_timeout_ms (int): Client group session and failure detection
            timeout. The consumer sends periodic heartbeats
            (`heartbeat.interval.ms`) to indicate its liveness to the broker.
            If no hearts are received by the broker for a group member within
            the session timeout, the broker will remove the consumer from the
            group and trigger a rebalance. The allowed range is configured with
            the **broker** configuration properties
            `group.min.session.timeout.ms` and `group.max.session.timeout.ms`.
            Default: 10000
        heartbeat_interval_ms (int): The expected time in milliseconds
            between heartbeats to the consumer coordinator when using
            Kafka's group management feature. Heartbeats are used to ensure
            that the consumer's session stays active and to facilitate
            rebalancing when new consumers join or leave the group. The
            value must be set lower than `session_timeout_ms`, but typically
            should be set no higher than 1/3 of that value. It can be
            adjusted even lower to control the expected time for normal
            rebalances. Default: 3000
        consumer_timeout_ms (int): maximum wait timeout for background fetching
            routine. Mostly defines how fast the system will see rebalance and
            request new data for new partitions. Default: 200
        api_version (str): specify which kafka API version to use.
            :class:`AIOKafkaConsumer` supports Kafka API versions >=0.9 only.
            If set to ``auto``, will attempt to infer the broker version by
            probing various APIs. Default: ``auto``
        security_protocol (str): Protocol used to communicate with brokers.
            Valid values are: ``PLAINTEXT``, ``SSL``. Default: ``PLAINTEXT``.
        ssl_context (ssl.SSLContext): pre-configured :class:`~ssl.SSLContext`
            for wrapping socket connections. Directly passed into asyncio's
            :meth:`~asyncio.loop.create_connection`. For more information see
            :ref:`ssl_auth`. Default: None.
        exclude_internal_topics (bool): Whether records from internal topics
            (such as offsets) should be exposed to the consumer. If set to True
            the only way to receive records from an internal topic is
            subscribing to it. Requires 0.10+ Default: True
        connections_max_idle_ms (int): Close idle connections after the number
            of milliseconds specified by this config. Specifying `None` will
            disable idle checks. Default: 540000 (9 minutes).
        isolation_level (str): Controls how to read messages written
            transactionally.

            If set to ``read_committed``, :meth:`.getmany` will only return
            transactional messages which have been committed.
            If set to ``read_uncommitted`` (the default), :meth:`.getmany` will
            return all messages, even transactional messages which have been
            aborted.

            Non-transactional messages will be returned unconditionally in
            either mode.

            Messages will always be returned in offset order. Hence, in
            `read_committed` mode, :meth:`.getmany` will only return
            messages up to the last stable offset (LSO), which is the one less
            than the offset of the first open transaction. In particular any
            messages appearing after messages belonging to ongoing transactions
            will be withheld until the relevant transaction has been completed.
            As a result, `read_committed` consumers will not be able to read up
            to the high watermark when there are in flight transactions.
            Further, when in `read_committed` the seek_to_end method will
            return the LSO. See method docs below. Default: ``read_uncommitted``
        sasl_mechanism (str): Authentication mechanism when security_protocol
            is configured for ``SASL_PLAINTEXT`` or ``SASL_SSL``. Valid values are:
            ``PLAIN``, ``GSSAPI``, ``SCRAM-SHA-256``, ``SCRAM-SHA-512``,
            ``OAUTHBEARER``.
            Default: ``PLAIN``
        sasl_plain_username (str): username for SASL ``PLAIN`` authentication.
            Default: None
        sasl_plain_password (str): password for SASL ``PLAIN`` authentication.
            Default: None
        sasl_oauth_token_provider (~aiokafka.abc.AbstractTokenProvider): OAuthBearer token provider instance. (See :mod:`kafka.oauth.abstract`).
            Default: None
        key_serializer (Callable): used to convert user-supplied keys to bytes
            If not :data:`None`, called as ``f(key),`` should return
            :class:`bytes`.
            Default: :data:`None`.
        value_serializer (Callable): used to convert user-supplied message
            values to :class:`bytes`. If not :data:`None`, called as
            ``f(value)``, should return :class:`bytes`.
            Default: :data:`None`.
        acks (Any): one of ``0``, ``1``, ``all``. The number of acknowledgments
            the producer requires the leader to have received before considering a
            request complete. This controls the durability of records that are
            sent. The following settings are common:

            * ``0``: Producer will not wait for any acknowledgment from the server
              at all. The message will immediately be added to the socket
              buffer and considered sent. No guarantee can be made that the
              server has received the record in this case, and the retries
              configuration will not take effect (as the client won't
              generally know of any failures). The offset given back for each
              record will always be set to -1.
            * ``1``: The broker leader will write the record to its local log but
              will respond without awaiting full acknowledgement from all
              followers. In this case should the leader fail immediately
              after acknowledging the record but before the followers have
              replicated it then the record will be lost.
            * ``all``: The broker leader will wait for the full set of in-sync
              replicas to acknowledge the record. This guarantees that the
              record will not be lost as long as at least one in-sync replica
              remains alive. This is the strongest available guarantee.

            If unset, defaults to ``acks=1``. If `enable_idempotence` is
            :data:`True` defaults to ``acks=all``
        compression_type (str): The compression type for all data generated by
            the producer. Valid values are ``gzip``, ``snappy``, ``lz4``, ``zstd``
            or :data:`None`.
            Compression is of full batches of data, so the efficacy of batching
            will also impact the compression ratio (more batching means better
            compression). Default: :data:`None`.
        max_batch_size (int): Maximum size of buffered data per partition.
            After this amount :meth:`send` coroutine will block until batch is
            drained.
            Default: 16384
        linger_ms (int): The producer groups together any records that arrive
            in between request transmissions into a single batched request.
            Normally this occurs only under load when records arrive faster
            than they can be sent out. However in some circumstances the client
            may want to reduce the number of requests even under moderate load.
            This setting accomplishes this by adding a small amount of
            artificial delay; that is, if first request is processed faster,
            than `linger_ms`, producer will wait ``linger_ms - process_time``.
            Default: 0 (i.e. no delay).
        partitioner (Callable): Callable used to determine which partition
            each message is assigned to. Called (after key serialization):
            ``partitioner(key_bytes, all_partitions, available_partitions)``.
            The default partitioner implementation hashes each non-None key
            using the same murmur2 algorithm as the Java client so that
            messages with the same key are assigned to the same partition.
            When a key is :data:`None`, the message is delivered to a random partition
            (filtered to partitions with available leaders only, if possible).
        max_request_size (int): The maximum size of a request. This is also
            effectively a cap on the maximum record size. Note that the server
            has its own cap on record size which may be different from this.
            This setting will limit the number of record batches the producer
            will send in a single request to avoid sending huge requests.
            Default: 1048576.
        enable_idempotence (bool): When set to :data:`True`, the producer will
            ensure that exactly one copy of each message is written in the
            stream. If :data:`False`, producer retries due to broker failures,
            etc., may write duplicates of the retried message in the stream.
            Note that enabling idempotence acks to set to ``all``. If it is not
            explicitly set by the user it will be chosen. If incompatible
            values are set, a :exc:`ValueError` will be thrown.
            New in version 0.5.0.
        sasl_oauth_token_provider (: class:`~aiokafka.abc.AbstractTokenProvider`):
            OAuthBearer token provider instance. (See
            :mod:`kafka.oauth.abstract`).
            Default: :data:`None`
        *topics (list(str)): optional list of topics to subscribe to. If not set,
            call :meth:`.subscribe` or :meth:`.assign` before consuming records.
            Passing topics directly is same as calling :meth:`.subscribe` API.
    """
    async with anyio.create_task_group() as tg:
        tg.start_soon(
            lambda d: produce_messages(**d),
            dict(msgs=msgs, topic=produce_topic, **kwargs),
        )
        tg.start_soon(
            lambda d: consumes_messages(**d),
            dict(
                msgs_count=msgs_count,
                topic=consume_topic,
                **kwargs,
            ),
        )

In [None]:
# print(combine_params(combine_params(produce_and_consume_messages, AIOKafkaProducer), AIOKafkaConsumer).__doc__)

In [None]:
async with ApacheKafkaBroker(
    topics=["produce_and_consume_messages"]
) as bootstrap_server:
    await produce_and_consume_messages(
        produce_topic="produce_and_consume_messages",
        consume_topic="produce_and_consume_messages",
        msgs=msgs,
        msgs_count=int(len(msgs) * 0.95),
        bootstrap_servers=bootstrap_server,
    )

[INFO] fastkafka._components.test_dependencies: Java is already installed.
[INFO] fastkafka._components.test_dependencies: Kafka is installed.
[INFO] fastkafka._testing.apache_kafka_broker: Starting zookeeper...
[INFO] fastkafka._testing.apache_kafka_broker: Starting kafka...
[INFO] fastkafka._testing.apache_kafka_broker: Local Kafka broker up and running on 127.0.0.1:9092
[INFO] aiokafka.consumer.subscription_state: Updating subscribed topics to: frozenset({'produce_and_consume_messages'})
[INFO] aiokafka.consumer.group_coordinator: Metadata for topic has changed from {} to {'produce_and_consume_messages': 1}. 


producing to 'produce_and_consume_messages':   0%|          | 0/120000 [00:00<?, ?it/s]

consuming from 'produce_and_consume_messages':   0%|          | 0/114000 [00:00<?, ?it/s]

[INFO] fastkafka._components._subprocess: terminate_asyncio_process(): Terminating the process 5277...
[INFO] fastkafka._components._subprocess: terminate_asyncio_process(): Process 5277 terminated.
[INFO] fastkafka._components._subprocess: terminate_asyncio_process(): Terminating the process 4904...
[INFO] fastkafka._components._subprocess: terminate_asyncio_process(): Process 4904 terminated.


In [None]:
# | export


def get_collapsible_admonition(
    code_block: str, *, name: Optional[str] = None
) -> Markdown:
    """
    Generate a collapsible admonition containing a code block as an example.

    Args:
        code_block: The code block to be included in the example.
        name: Optional name or title for the example.
            Default is None.

    Returns:
        A Markdown object representing the collapsible admonition
        with the provided code block.
    """
    alt_name = "" if name is None else name
    intro = f'This example contains the content of the file "{alt_name}":'
    return Markdown(
        f"??? Example \n\n    {intro}\n\n"
        + textwrap.indent(f"```python\n{code_block}\n```", prefix="    ")
    )

In [None]:
get_collapsible_admonition("print('hello')", name="server.py")

??? Example 

    This example contains the content of the file "server.py":

    ```python
    print('hello')
    ```

In [None]:
# | export


def source2markdown(o: Union[str, Callable[..., Any]]) -> Markdown:
    """Converts source code into Markdown for displaying it with Jupyter notebook

    Args:
        o: source code
    """
    s = inspect.getsource(o) if callable(o) else o
    return Markdown(
        f"""
```python
{s}
```
"""
    )

In [None]:
def f():
    pass


source2markdown(f)


```python
def f():
    pass

```


In [None]:
# | export


async def wait_for_get_url(
    url: str, timeout: Optional[int] = None, **kwargs: Dict[str, Any]
) -> aiohttp.ClientResponse:
    """
    Asynchronously wait for a GET request to a specified URL with an optional timeout.

    Args:
        url: The URL to send the GET request to.
        timeout: Optional maximum number of seconds to wait
            for a response. If not provided, there is no timeout. Default is None.
        **kwargs: Additional keyword arguments to be passed to the tqdm progress bar,
            if a timeout is provided.

    Returns:
        The aiohttp.ClientResponse response object for the GET request.

    Raises:
        TimeoutError: If the timeout is reached and the URL couldn't be fetched within
            the specified time.
    """
    t0 = datetime.now()
    if timeout is not None:
        pbar = tqdm(total=timeout, **kwargs)
    try:
        async with aiohttp.ClientSession() as session:
            while True:
                try:
                    async with session.get(url) as response:
                        if timeout is not None:
                            pbar.update(pbar.total - pbar.n)
                        return response
                except aiohttp.ClientConnectorError as e:
                    if timeout is not None:
                        if pbar.total - pbar.n > 1:
                            pbar.update(1)
                    await asyncio.sleep(1)

                if timeout is not None and datetime.now() - t0 >= timedelta(
                    seconds=timeout
                ):
                    raise TimeoutError(
                        f"Could not fetch url '{url}' for more than {timeout} seconds"
                    )
    finally:
        if timeout is not None:
            pbar.close()

In [None]:
await wait_for_get_url("https://python.org", timeout=5, desc="should pass")

with pytest.raises(TimeoutError) as e:
    await wait_for_get_url("https://0.0.0.0:4000", timeout=5, desc="expected to fail")
e

should pass:   0%|          | 0/5 [00:00<?, ?it/s]

expected to fail:   0%|          | 0/5 [00:00<?, ?it/s]

<ExceptionInfo TimeoutError("Could not fetch url 'https://0.0.0.0:4000' for more than 5 seconds") tblen=2>