In [None]:
# | default_exp _components.aiokafka_loop

In [None]:
# | export
from typing import *

from os import environ
import asyncio
import unittest.mock
from datetime import datetime, timedelta

from aiokafka import AIOKafkaConsumer
from aiokafka.structs import TopicPartition, ConsumerRecord
from pydantic import BaseModel, HttpUrl, NonNegativeInt, Field
import asyncer
import anyio

from fast_kafka_api.logger import get_logger, supress_timestamps
from fast_kafka_api.testing import true_after, create_and_fill_testing_topic, nb_safe_seed
from fast_kafka_api.asyncapi import KafkaMessage

[INFO] fast_kafka_api.asyncapi: ok


In [None]:
seed = nb_safe_seed("_components.aiokafka_loop")

In [None]:
# | eval: false
# allows async calls in notebooks

import nest_asyncio
nest_asyncio.apply()

In [None]:
# | export

logger = get_logger(__name__)

In [None]:
supress_timestamps()
logger = get_logger(__name__, level=20)
logger.debug("ok")

In [None]:
kafka_server_url = environ["KAFKA_HOSTNAME"]
kafka_server_port = environ["KAFKA_PORT"]

kafka_config = {
    "bootstrap.servers": f"{kafka_server_url}:{kafka_server_port}"
}

In [None]:
class MyMessage(BaseModel):
    url: HttpUrl = Field(..., example="http://www.acme.com", description="Url example")
    port: NonNegativeInt = Field(1000)

In [None]:
# | export

async def process_msgs(
    *,
    msgs: Dict[TopicPartition, List[ConsumerRecord]],
    callbacks: Dict[
        str, Callable[[KafkaMessage, Callable[[str, BaseModel], None]], None]
    ],
    produce: Callable[[str, BaseModel], None],
    msg_types: Dict[str, Type[BaseModel]],
    process_f: Callable[None, None] ## TODO, add correct typing
):
    for topic_partition, topic_msgs in msgs.items():
        topic = topic_partition.topic
        msg_type = msg_types[topic]
        decoded_msgs = [
            msg_type.parse_raw(msg.value.decode("utf-8")) for msg in topic_msgs
        ]
        for msg in decoded_msgs:
            await process_f((callbacks[topic], msg, produce))

In [None]:
topic_part_0_0 = TopicPartition("topic_0", 0)
topic_part_0_1 = TopicPartition("topic_0", 1)
topic_part_1_0 = TopicPartition("topic_1", 0)

msg = MyMessage(url="http://www.acme.com", port=22)

In [None]:
## One msg, one topic, callback called once, produce and process_f called once

In [None]:
## Two msg, two topics, each callback called once, produce and process_f called twice

In [None]:
## Two msg, one topic, one callback called twice, other called nonce, produce and process_f called twice

In [None]:
## Two msg, one topic, two partitions, one callback called twice, produce and process_f called twice

In [None]:
# | export


async def process_message_callback(receive_stream):
    async with receive_stream:
        async for callback, msg, produce in receive_stream:
            await callback(msg, produce)
            

async def _aiokafka_consumer_loop(
    *,
    consumer,
    callbacks: Dict[
        str, Callable[[KafkaMessage, Callable[[str, BaseModel], None]], None]
    ],
    produce: Callable[[str, BaseModel], None],
    msg_types: Dict[str, Type[BaseModel]],
    is_shutting_down_f: Callable[[], bool],
):
    send_stream, receive_stream = anyio.create_memory_object_stream()
    async with anyio.create_task_group() as tg:
        tg.start_soon(process_message_callback, receive_stream)
        async with send_stream:
            while True:
                msgs = await consumer.getmany(timeout_ms=100)
                await process_msgs(
                    msgs=msgs,
                    callbacks=callbacks,
                    produce=produce,
                    msg_types=msg_types,
                    process_f=send_stream.send,
                )
                if is_shutting_down_f():
                    break

In [None]:
# Mock consumer with messages in getmany
# Check full combination of callbacks in the previous tests

In [None]:
# | export

async def aiokafka_consumer_loop(
    topics: List[str],
    *,
    bootstrap_servers: str,
    auto_offset_reset: str,
    max_poll_records: int,
    callbacks: Dict[
        str, Callable[[KafkaMessage, Callable[[str, BaseModel], None]], None]
    ],
    produce: Callable[[str, BaseModel], None],
    msg_types: Dict[str, Type[BaseModel]],
    is_shutting_down_f: Callable[[], bool],
):
    consumer = AIOKafkaConsumer(
        bootstrap_servers=bootstrap_servers,
        auto_offset_reset=auto_offset_reset,
        max_poll_records=max_poll_records,
    )
    logger.info("Consumer created.")

    await consumer.start()
    logger.info("Consumer started.")
    consumer.subscribe(topics)
    logger.info("Consumer subscribed.")

    try:
        await _aiokafka_consumer_loop(
            consumer=consumer,
            callbacks=callbacks,
            produce=produce,
            msg_types=msg_types,
            is_shutting_down_f=is_shutting_down_f,
        )
    finally:
        await consumer.stop()
        logger.info(f"Consumer stopped.")

In [None]:
msgs_sent = 9178
msgs = [
    MyMessage(url="http://www.ai.com", port=port).json().encode("utf-8")
    for port in range(msgs_sent)
]
msgs_received = 0

async def count_msg(msg: MyMessage, produce):
    global msgs_received
    msgs_received = msgs_received + 1
    await produce("my_topic", msg)
    
async def produce_print_msg(topic: str, msg: MyMessage):
    if msg.port % 1000 == 0:
        print(f"Producing {msg} for {topic}")

async with create_and_fill_testing_topic(kafka_config=kafka_config, msgs=msgs, seed=seed(1)) as topic:
    await aiokafka_consumer_loop(
        topics = [topic],
        bootstrap_servers = kafka_config["bootstrap.servers"],
        auto_offset_reset="earliest",
        max_poll_records=100,
        callbacks = {topic: count_msg},
        produce = produce_print_msg,
        msg_types= {topic: MyMessage},
        is_shutting_down_f= true_after(5),
    )

assert msgs_sent == msgs_received

[INFO] fast_kafka_api.testing: create_missing_topics(['my_topic_928922829']): new_topics = [NewTopic(topic=my_topic_928922829,num_partitions=3)]
[INFO] fast_kafka_api.testing: Producer <aiokafka.producer.producer.AIOKafkaProducer object> created.
[INFO] fast_kafka_api.testing: Producer <aiokafka.producer.producer.AIOKafkaProducer object> stared.
[INFO] fast_kafka_api.testing: Sent messages: len(sent_msgs)=9178
[INFO] __main__: Consumer created.
[INFO] __main__: Consumer started.
[INFO] aiokafka.consumer.subscription_state: Updating subscribed topics to: frozenset({'my_topic_928922829'})
[INFO] aiokafka.consumer.consumer: Subscribed to topic(s): {'my_topic_928922829'}
[INFO] __main__: Consumer subscribed.
[INFO] aiokafka.consumer.group_coordinator: Metadata for topic has changed from {} to {'my_topic_928922829': 3}. 
Producing url=HttpUrl('http://www.ai.com', ) port=2000 for my_topic
Producing url=HttpUrl('http://www.ai.com', ) port=3000 for my_topic
Producing url=HttpUrl('http://www.ai

In [None]:
msgs_sent = 100000
msgs = [
    MyMessage(url="http://www.ai.com", port=port).json().encode("utf-8")
    for port in range(msgs_sent)
]
msgs_received = 0

async def count_msg(msg: MyMessage, produce):
    global msgs_received
    msgs_received = msgs_received + 1
    await produce("my_topic", msg)
    
async def produce_print_msg(topic: str, msg: MyMessage):
    if msg.port % 10000 == 0:
        print(f"Producing {msg} for {topic}")

async with create_and_fill_testing_topic(kafka_config=kafka_config, msgs=msgs, seed=seed(1)) as topic:
    start = datetime.now()
    await aiokafka_consumer_loop(
        topics = [topic],
        bootstrap_servers = kafka_config["bootstrap.servers"],
        auto_offset_reset="earliest",
        max_poll_records=100,
        callbacks = {topic: count_msg},
        produce = produce_print_msg,
        msg_types= {topic: MyMessage},
        is_shutting_down_f= true_after(5),
    )
    t = (datetime.now() - start) / timedelta(seconds=1)
    thrp = msgs_received / t
    
    print(f"Messages processed: {msgs_received:,d}")
    print(f"Time              : {t:.2f} s")
    print(f"Throughput.       : {thrp:,.0f} msg/s")

[INFO] fast_kafka_api.testing: create_missing_topics(['my_topic_928922829']): new_topics = [NewTopic(topic=my_topic_928922829,num_partitions=3)]
[INFO] fast_kafka_api.testing: Producer <aiokafka.producer.producer.AIOKafkaProducer object> created.
[INFO] fast_kafka_api.testing: Producer <aiokafka.producer.producer.AIOKafkaProducer object> stared.
[INFO] fast_kafka_api.testing: Sent messages: len(sent_msgs)=100000
[INFO] __main__: Consumer created.
[INFO] __main__: Consumer started.
[INFO] aiokafka.consumer.subscription_state: Updating subscribed topics to: frozenset({'my_topic_928922829'})
[INFO] aiokafka.consumer.consumer: Subscribed to topic(s): {'my_topic_928922829'}
[INFO] __main__: Consumer subscribed.
[INFO] aiokafka.consumer.group_coordinator: Metadata for topic has changed from {} to {'my_topic_928922829': 3}. 
Producing url=HttpUrl('http://www.ai.com', ) port=20000 for my_topic
Producing url=HttpUrl('http://www.ai.com', ) port=10000 for my_topic
Producing url=HttpUrl('http://ww