In [None]:
# | default_exp _components.loop

In [None]:
# | export

from typing import Optional, List, Dict, Callable, Type, Any
from typing import get_type_hints

from os import environ
from datetime import datetime, timedelta
import logging

import asyncio
from asyncio import iscoroutinefunction  # do not use the version from inspect

import anyio
from pydantic import BaseModel
from pydantic import Field, HttpUrl, EmailStr, PositiveInt

from confluent_kafka import Consumer
from confluent_kafka import Message, KafkaError

import asyncer

import fast_kafka_api.logger

fast_kafka_api.logger.should_supress_timestamps = True

# import fast_kafka_api
from fast_kafka_api.confluent_kafka import AIOProducer
from fast_kafka_api.asyncapi import KafkaMessage
from fast_kafka_api.logger import get_logger
from fast_kafka_api.testing import true_after

[INFO] fast_kafka_api.asyncapi: ok


In [None]:
# | export
logger = get_logger(__name__)

In [None]:
logger = get_logger(__name__, level=logging.DEBUG)

In [None]:
logger.debug("ok")

[DEBUG] __main__: ok


In [None]:
import unittest.mock

import nest_asyncio

In [None]:
# | eval: false
# allows async calls in notebooks

nest_asyncio.apply()

In [None]:
# | export


class KafkaErrorMsg(KafkaMessage):
    topic: str = Field(..., description="topic where exception occurred")
    raw_msg: Optional[bytes] = Field(None, description="raw message string")
    error: str = Field(..., description="exception triggered by the message")

In [None]:
kafka_server_url = environ["KAFKA_HOSTNAME"]
kafka_server_port = environ["KAFKA_PORT"]

kafka_config = {
    "bootstrap.servers": f"{kafka_server_url}:{kafka_server_port}",
    "auto.offset.reset": "earliest",
}

In [None]:
class MyInfo(KafkaMessage):
    mobile: str = Field(..., example="+385987654321")
    name: str = Field(..., example="James Bond")


class MyMsgUrl(KafkaMessage):
    info: MyInfo = Field(..., example=dict(mobile="+385987654321", name="James Bond"))
    url: HttpUrl = Field(..., example="https://sis.gov.uk/agents/007")


class MyMsgEmail(KafkaMessage):
    msg_url: MyMsgUrl = Field(
        ...,
        example=dict(
            info=dict(mobile="+385987654321", name="James Bond"),
            url="https://sis.gov.uk/agents/007",
        ),
    )
    email: EmailStr = Field(..., example="agent-007@sis.gov.uk")

In [None]:
# | export


async def _consumer_pooling_step(
    *,
    async_poll_f: Callable[[float], Optional[Message]],
    on_event_callback: Callable[
        [KafkaMessage, Callable[[str, KafkaMessage], None]], None
    ],
    on_error_callback: Optional[Callable[[KafkaErrorMsg], None]] = None,
    produce: Callable[[KafkaMessage], None],
    msg_type: Type[KafkaMessage],
    timeout: float,
    topic: str,
) -> None:
    """Consumer pooling step

    Polls async polling function `async_poll_f` and then process it by calling `on_event_callback` or
    `on_error_callback`, depending on whether the message was successfully parsed.

    Params:
        async_poll_f: async polling function
        on_event_callback: async calling function to be called on JSON parsed message using `msg_type.parse_raw` function.
            The second parameter to the function is a produce function.
        on_error_callback: async calling function to be called in case of any kind of error.
        produce: produce function to be passed to on_event_callback
        msg_type: pydantic class used for parsing message JSON
        timeout: timeout parameter passed to polling functions
        topic: name of topic used for logging and calling `on_error_callback`
    """
    logger.debug("_consumer_pooling_step()")
    try:
        msg = await async_poll_f(timeout=timeout)  # type: ignore
        if msg is None:
            logger.debug(
                f"consumers_async_loop(topic={topic}): no messages for the topic {topic} due to no message available."
            )
        elif msg.error() is not None:
            logger.warning(
                f"consumers_async_loop(topic={topic}): no messages for the topic {topic} due to error: {msg.error()}"
            )
            if on_error_callback is not None:
                kafka_err_msg = KafkaErrorMsg(
                    topic=topic,
                    raw_msg=msg.value(),
                    error=msg.error(),
                )
                await on_error_callback(kafka_err_msg)

        else:
            logger.debug(
                f"consumers_async_loop(topic={topic}): message received for the topic {topic}: {msg.value()}, {on_event_callback}, msg_type={msg_type},"
            )
            msg_object = msg_type.parse_raw(msg.value().decode("utf-8"))
            logger.debug(
                f"consumers_async_loop(topic={topic}): calling {on_event_callback}({msg_object})"
            )
            await on_event_callback(msg_object, produce)

    except Exception as e:
        import traceback

        logger.warning(
            f"consumers_async_loop(topic={topic}): Exception in inner try raised: {e}"
            + "\n"
            + traceback.format_exc()
        )

        if on_error_callback is not None:
            kafka_err_msg = KafkaErrorMsg(
                topic=topic,
                raw_msg=msg.value() if msg is not None else None,
                error=str(e),
            )
            await on_error_callback(kafka_err_msg)

In [None]:
def async_mock(*args, **kwargs):
    mock = unittest.mock.Mock(*args, **kwargs)
    f = asyncer.asyncify(mock)
    return f, mock

In [None]:
on_event_callback, on_event_callback_mock = async_mock(return_value=None)
on_error_callback, on_error_callback_mock = async_mock(return_value=None)
produce = unittest.mock.Mock(return_value=None)

timeout = 0.1

async_poll_f, async_poll_f_mock = async_mock(return_value=None)
asyncio.run(
    _consumer_pooling_step(
        async_poll_f=async_poll_f,
        timeout=timeout,
        topic="my_topic",
        on_event_callback=on_event_callback,
        on_error_callback=on_error_callback,
        produce=produce,
        msg_type=MyMsgUrl,
    )
)
async_poll_f_mock.assert_called_once_with(timeout=timeout)
on_event_callback_mock.assert_not_called()
on_error_callback_mock.assert_not_called()
produce.assert_not_called()

[DEBUG] __main__: _consumer_pooling_step()
[DEBUG] __main__: consumers_async_loop(topic=my_topic): no messages for the topic my_topic due to no message available.


In [None]:
on_event_callback, on_event_callback_mock = async_mock(return_value=None)
on_error_callback, on_error_callback_mock = async_mock(return_value=None)
produce = unittest.mock.Mock(return_value=None)

timeout = 0.1

m = unittest.mock.MagicMock()
m.error = unittest.mock.Mock(return_value="some error occurred")
m.value = unittest.mock.Mock(return_value=None)
async_poll_f, async_poll_f_mock = async_mock(return_value=m)

asyncio.run(
    _consumer_pooling_step(
        async_poll_f=async_poll_f,
        timeout=timeout,
        topic="my_topic",
        on_event_callback=on_event_callback,
        on_error_callback=on_error_callback,
        produce=produce,
        msg_type=MyMsgUrl,
    )
)
async_poll_f_mock.assert_called_once_with(timeout=timeout)
on_event_callback_mock.assert_not_called()
on_error_callback_mock.assert_called_once_with(
    KafkaErrorMsg(topic="my_topic", raw_msg=None, error="some error occurred")
)
produce.assert_not_called()

[DEBUG] __main__: _consumer_pooling_step()


In [None]:
on_event_callback, on_event_callback_mock = async_mock(return_value=None)
on_error_callback, on_error_callback_mock = async_mock(return_value=None)
produce = unittest.mock.Mock(return_value=None)

timeout = 0.1

msg = MyMsgUrl(
    info=MyInfo(mobile=385999999999, name="Marko"),
    url="https://www.acme.com",
)
m = unittest.mock.MagicMock()
m.error = unittest.mock.Mock(return_value=None)
m.value = unittest.mock.Mock(return_value=msg.json().encode("utf-8"))
async_poll_f, async_poll_f_mock = async_mock(return_value=m)

asyncio.run(
    _consumer_pooling_step(
        async_poll_f=async_poll_f,
        timeout=timeout,
        topic="my_topic",
        on_event_callback=on_event_callback,
        on_error_callback=on_error_callback,
        produce=produce,
        msg_type=MyMsgUrl,
    )
)
async_poll_f_mock.assert_called_once_with(timeout=timeout)
on_event_callback_mock.assert_called_once_with(msg, produce)
on_error_callback_mock.assert_not_called()
produce.assert_not_called()

[DEBUG] __main__: _consumer_pooling_step()
[DEBUG] __main__: consumers_async_loop(topic=my_topic): message received for the topic my_topic: b'{"info": {"mobile": "385999999999", "name": "Marko"}, "url": "https://www.acme.com"}', <function asyncify.<locals>.wrapper>, msg_type=<class '__main__.MyMsgUrl'>,
[DEBUG] __main__: consumers_async_loop(topic=my_topic): calling <function asyncify.<locals>.wrapper>(info=MyInfo(mobile='385999999999', name='Marko') url=HttpUrl('https://www.acme.com', ))


In [None]:
on_event_callback, on_event_callback_mock = async_mock(return_value=None)
on_error_callback, on_error_callback_mock = async_mock(return_value=None)
produce = unittest.mock.Mock(return_value=None)

timeout = 0.1

msg = MyMsgUrl(
    info=MyInfo(mobile=385999999999, name="Marko"),
    url="https://www.acme.com",
)
m = unittest.mock.MagicMock()
m.error = unittest.mock.Mock(return_value=None)
m.value = unittest.mock.Mock(return_value=msg.json().encode("utf-8"))
async_poll_f, async_poll_f_mock = async_mock(return_value=m)

asyncio.run(
    _consumer_pooling_step(
        async_poll_f=async_poll_f,
        timeout=timeout,
        topic="my_topic",
        on_event_callback=on_event_callback,
        on_error_callback=on_error_callback,
        produce=produce,
        msg_type=MyInfo,
    )
)
async_poll_f_mock.assert_called_once_with(timeout=timeout)
on_event_callback_mock.assert_not_called()
on_error_callback_mock.assert_called_once_with(
    KafkaErrorMsg(
        topic="my_topic",
        raw_msg='{"info": {"mobile": "385999999999", "name": "Marko"}, "url": "https://www.acme.com"}',
        error="2 validation errors for MyInfo\nmobile\n  field required (type=value_error.missing)\nname\n  field required (type=value_error.missing)",
    )
)
produce.assert_not_called()

[DEBUG] __main__: _consumer_pooling_step()
[DEBUG] __main__: consumers_async_loop(topic=my_topic): message received for the topic my_topic: b'{"info": {"mobile": "385999999999", "name": "Marko"}, "url": "https://www.acme.com"}', <function asyncify.<locals>.wrapper>, msg_type=<class '__main__.MyInfo'>,
mobile
  field required (type=value_error.missing)
name
  field required (type=value_error.missing)
Traceback (most recent call last):
  File "<ipython-input-11-82fe5561df1e>", line 54, in _consumer_pooling_step
    msg_object = msg_type.parse_raw(msg.value().decode("utf-8"))
  File "pydantic/main.py", line 549, in pydantic.main.BaseModel.parse_raw
  File "pydantic/main.py", line 526, in pydantic.main.BaseModel.parse_obj
  File "pydantic/main.py", line 342, in pydantic.main.BaseModel.__init__
pydantic.error_wrappers.ValidationError: 2 validation errors for MyInfo
mobile
  field required (type=value_error.missing)
name
  field required (type=value_error.missing)



In [None]:
async def on_event_callback(msg_url: MyMsgUrl, produce):
    msg = MyMsgEmail(msg_url=msg_url, email="marko@acme.com")
    produce("some_topic", msg)


_event_counter = 0


def produce(topic: str, msg: KafkaMessage):
    global _event_counter
    _event_counter = _event_counter + 1
    raw_msg = msg.json().encode("utf-8")


on_error_callback, _= async_mock(return_value=None)

timeout = 0.1

msg = MyMsgUrl(
    info=MyInfo(mobile=385999999999, name="Marko"),
    url="https://www.acme.com",
)
m = unittest.mock.MagicMock()
m.error = unittest.mock.Mock(return_value=None)
m.value = unittest.mock.Mock(return_value=msg.json().encode("utf-8"))
async_poll_f, _ = async_mock(return_value=m)


async def benchmark(n: int):
    start = datetime.now()
    for _ in range(n):
        await _consumer_pooling_step(
            async_poll_f=async_poll_f,
            timeout=timeout,
            topic="my_topic",
            on_event_callback=on_event_callback,
            on_error_callback=on_error_callback,
            produce=produce,
            msg_type=MyMsgUrl,
        )
    assert _event_counter == n
    t = (datetime.now() - start) / timedelta(seconds=1)
    thrp = _event_counter / t
    print(f"Messages processed: {_event_counter:,d}")
    print(f"Time              : {t:.2f} s")
    print(f"Throughput.       : {thrp:,.0f} msg/s")


logger = get_logger(__name__, level=logging.INFO)
asyncio.run(benchmark(10_000))
logger = get_logger(__name__, level=logging.DEBUG)

Messages processed: 10,000
Time              : 5.71 s
Throughput.       : 1,750 msg/s


In [None]:
# | export


async def _consumers_async_loop(
    *,
    async_poll_f: Callable[[float], Optional[Message]],
    on_event_callback: Callable[
        [KafkaMessage, Callable[[str, KafkaMessage], None]], None
    ],
    on_error_callback: Optional[Callable[[KafkaErrorMsg], None]] = None,
    is_shutting_down_f: Callable[[], bool],
    produce: Callable[[KafkaMessage], None],
    msg_type: Type[KafkaMessage],
    timeout: float,
    topic: str,
):
    logger.info(f"_consumers_async_loop(topic={topic}, timeout={timeout}) starting.")
    if not iscoroutinefunction(async_poll_f):
        raise ValueError(
            f"async_poll_f ({async_poll_f}) must be coroutine, but it isn't."
        )
    if not iscoroutinefunction(on_event_callback):
        raise ValueError(
            f"on_event_callback ({on_event_callback}) must be coroutine, but it isn't."
        )
    if on_error_callback and not iscoroutinefunction(on_error_callback):
        raise ValueError(
            f"on_event_callback ({on_error_callback}) must be coroutine, but it isn't."
        )

    try:
        while True:
            if is_shutting_down_f():
                logger.info(f"consumers_async_loop(topic={topic}) shutting down...")
                break

            await _consumer_pooling_step(
                async_poll_f=async_poll_f,
                timeout=timeout,
                topic=topic,
                on_event_callback=on_event_callback,
                on_error_callback=on_error_callback,
                produce=produce,
                msg_type=msg_type,
            )
    except Exception as e:
        logger.error(
            f"consumers_async_loop(topic={topic}): Exception in outer try raised: {e}"
        )
        if on_error_callback is not None:
            kafka_err_msg = KafkaErrorMsg(
                topic=topic,
                raw_msg=None,
                error=str(e),
            )
            await on_error_callback(kafka_err_msg)

    logger.info(f"_consumers_async_loop(topic={topic}) exiting.")

In [None]:
# import asyncer
async def on_event_callback(msg_url: MyMsgUrl, produce):
    msg = MyMsgEmail(msg_url=msg_url, email="marko@acme.com")
    #     await asyncer.asyncify(produce)("some_topic", msg)
    produce("some_topic", msg)


_event_counter = 0


def produce(topic: str, msg: KafkaMessage):
    global _event_counter
    _event_counter = _event_counter + 1
    raw_msg = msg.json().encode("utf-8")


on_error_callback, _ = async_mock(return_value=None)

timeout = 0.1

msg = MyMsgUrl(
    info=MyInfo(mobile=385999999999, name="Marko"),
    url="https://www.acme.com",
)
m = unittest.mock.MagicMock()
m.error = unittest.mock.Mock(return_value=None)
m.value = unittest.mock.Mock(return_value=msg.json().encode("utf-8"))
async_poll_f, _ = async_mock(return_value=m)


async def test__consumers_async_loop():
    start = datetime.now()
    await _consumers_async_loop(
        async_poll_f=async_poll_f,
        is_shutting_down_f=true_after(5),
        timeout=timeout,
        topic="my_topic",
        on_event_callback=on_event_callback,
        on_error_callback=on_error_callback,
        produce=produce,
        msg_type=MyMsgUrl,
    )
    t = (datetime.now() - start) / timedelta(seconds=1)
    thrp = _event_counter / t
    print(f"Messages processed: {_event_counter:,d}")
    print(f"Time              : {t:.2f} s")
    print(f"Throughput.       : {thrp:,.0f} msg/s")

    assert _event_counter > 0, _event_counter
    assert thrp > 100, thrp


logger = get_logger(__name__, level=logging.INFO)
asyncio.run(test__consumers_async_loop())
logger = get_logger(__name__, level=logging.DEBUG)

[INFO] __main__: _consumers_async_loop(topic=my_topic, timeout=0.1) starting.
[INFO] __main__: consumers_async_loop(topic=my_topic) shutting down...
[INFO] __main__: _consumers_async_loop(topic=my_topic) exiting.
Messages processed: 8,178
Time              : 5.05 s
Throughput.       : 1,620 msg/s


In [None]:
# | export


async def consumers_async_loop(
    *,
    consumer: Consumer,
    producer: AIOProducer,
    topic: str,
    on_event_callback: List[
        Callable[[KafkaMessage, Callable[[str, KafkaMessage], None]], None]
    ],
    is_shutting_down_f: Callable[[], bool],
    config: Dict[str, str],
    timeout: float = 1.0,
):
    logger.info(f"consumers_async_loop(topic={topic}, config={config}, timeout={timeout}) starting.")
    try:
        # we convert the blocking poll() function into asynchronous one (it executes poll() in a worker thread)
        async_poll_f = asyncer.asyncify(consumer.poll)

        # convert on_event_callback to coroutine if needed
        async_on_event_callback = on_event_callback
        if not iscoroutinefunction(async_on_event_callback):
            async_on_event_callback = asyncer.asyncify(async_on_event_callback)
        msg_type = _get_first_func_arg_type(on_event_callback)

        async def on_error_callback(error_msg: KafkaErrorMsg, app=app) -> None:
            asyncer.asyncify(app.produce)(topic=app._on_error_topic, msg=error_msg)

        await _consumers_async_loop(
            async_poll_f=async_poll_f,
            timeout=timeout,
            topic=topic,
            on_event_callback=async_on_event_callback,
            on_error_callback=on_error_callback,
            msg_type=msg_type,
        )
    finally:
        logger.info(f"consumers_async_loop(topic={topic}) exiting.")