In [None]:
# | default_exp _testing.local_kafka_broker

In [None]:
# | export
import uuid
from collections import namedtuple
from dataclasses import dataclass

from typing import *
from aiokafka import AIOKafkaConsumer, AIOKafkaProducer
from aiokafka.structs import ConsumerRecord, TopicPartition

from fastkafka._components.meta import patch, delegates
from fastkafka._components.logger import get_logger

In [None]:
from contextlib import asynccontextmanager
import unittest

In [None]:
# | export

logger = get_logger(__name__)

# Local Kafka broker
> In-memory mockup of Kafka broker protocol

In [None]:
# | export


def create_consumer_record(topic: str, msg: bytes) -> ConsumerRecord: # type: ignore
    record = ConsumerRecord(
        topic=topic,
        partition=0,
        offset=0,
        timestamp=0,
        timestamp_type=0,
        key=None,
        value=msg,
        checksum=0,
        serialized_key_size=0,
        serialized_value_size=0,
        headers=[],
    )
    return record

In [None]:
record = create_consumer_record("my_topic", b"my_msg")
record.partition = 1
record

ConsumerRecord(topic='my_topic', partition=1, offset=0, timestamp=0, timestamp_type=0, key=None, value=b'my_msg', checksum=0, serialized_key_size=0, serialized_value_size=0, headers=[])

In [None]:
# | export


@dataclass
class ConsumerMetadata:
    topic: str
    offset: int

In [None]:
consumer_meta = ConsumerMetadata("my_topic", 0)
assert consumer_meta.topic == "my_topic"
assert consumer_meta.offset == 0

In [None]:
# | export


class LocalKafkaBroker:
    def __init__(self, topics: List[str]):
        self.data: Dict[str, List[ConsumerRecord]] = {topic: list() for topic in topics} # type: ignore

        self.consumers_metadata: Dict[str, List[ConsumerMetadata]] = {}

    def connect(self) -> uuid.UUID:
        return uuid.uuid4()

    def subscribe(self, actor_id: str, *, auto_offest_reset: str, topic: str) -> None:
        consumer_metadata = self.consumers_metadata.get(actor_id, list())
        consumer_metadata.append(
            ConsumerMetadata(
                topic, len(self.data[topic]) if auto_offest_reset == "latest" else 0
            )
        )
        self.consumers_metadata[actor_id] = consumer_metadata

    def unsubscribe(self, actor_id: str) -> None:
        try:
            del self.consumers_metadata[actor_id]
        except KeyError:
            logger.warning(f"No subscription with {actor_id=} found!")

    def produce(
        self, actor_id: str, *, topic: str, msg: bytes, key: Optional[bytes]
    ) -> None:
        record = create_consumer_record(topic, msg)
        self.data[topic].append(record)

    def consume( # type: ignore
        self, actor_id: str
    ) -> Dict[TopicPartition, List[ConsumerRecord]]:
        msgs: Dict[TopicPartition, List[ConsumerRecord]] = {} # type: ignore

        try:
            consumer_metadata = self.consumers_metadata[actor_id]
        except KeyError:
            logger.warn(f"No subscription with {actor_id=} found!")
            return msgs

        for metadata in consumer_metadata:
            try:
                msgs[TopicPartition(metadata.topic, 0)] = self.data[metadata.topic][
                    metadata.offset :
                ]
                metadata.offset = len(self.data[metadata.topic])
            except KeyError:
                raise RuntimeError(
                    f"{metadata.topic=} not found, did you pass it to LocalKafkaBroker on init to be created?"
                )
        return msgs

    def _patch_consumers_and_producers(self) -> None:
        pass

    def __enter__(self) -> "LocalKafkaBroker":
        logger.info("Local kafka broker starting")
        self._patch_consumers_and_producers()
        return self

    def __exit__(self, *args: Any) -> None:
        logger.info("Local kafka broker stopping")

In [None]:
# TODO: Tests

## Consumer patching

We need to patch AIOKafkaConsumer methods so that we can redirect the consumer to our local kafka broker.

Patched methods:

- [x] \_\_init\_\_
- [x] start
- [x] subscribe
- [x] stop
- [x] getmany

Patching \_\_init\_\_ so that we pass the LocalKafkaBroker instance to all instances of AIOKafkaConsumer in the patched namespace

In [None]:
# | export


def _patch_AIOKafkaConsumer_init(broker: LocalKafkaBroker) -> None:
    @patch
    @delegates(AIOKafkaConsumer)
    def __init__( # type: ignore
        self: AIOKafkaConsumer,
        broker: LocalKafkaBroker = broker,
        auto_offset_reset: str ="latest",
        **kwargs: Any,
    ) -> None:
        logger.info("AIOKafkaConsumer patched __init__() called()")
        self.broker = broker
        self.auto_offset_reset = auto_offset_reset
        self.id = None

In [None]:
with LocalKafkaBroker(topics=["my_topic"]) as broker:
    _patch_AIOKafkaConsumer_init(broker)
    consumer = AIOKafkaConsumer()

[INFO] __main__: Local kafka broker starting
[INFO] __main__: AIOKafkaConsumer patched __init__() called()
[INFO] __main__: Local kafka broker stopping


Patching start so that we don't try to start the real AIOKafkaConsumer instance

In [None]:
# | export


def _patch_AIOKafkaConsumer_start() -> None:
    @patch
    @delegates(AIOKafkaConsumer.start)
    async def start(self: AIOKafkaConsumer, **kwargs: Any) -> None: # type: ignore
        logger.info("AIOKafkaConsumer patched start() called()")
        if self.id is not None:
            raise RuntimeError(
                "Consumer start() already called! Run consumer stop() before running start() again"
            )
        self.id = self.broker.connect()

In [None]:
with LocalKafkaBroker(topics=["my_topic"]) as broker:
    _patch_AIOKafkaConsumer_init(broker)
    consumer = AIOKafkaConsumer()

    _patch_AIOKafkaConsumer_start()
    await consumer.start()

[INFO] __main__: Local kafka broker starting
[INFO] __main__: AIOKafkaConsumer patched __init__() called()
[INFO] __main__: AIOKafkaConsumer patched start() called()
[INFO] __main__: Local kafka broker stopping


Patching subscribe so that we can connect to our Local, in-memory, Kafka broker

In [None]:
# | export


def _patch_AIOKafkaConsumer_subscribe() -> None:
    @patch
    @delegates(AIOKafkaConsumer.subscribe)
    def subscribe(self: AIOKafkaConsumer, topics: List[str], **kwargs: Any) -> None: # type: ignore
        logger.info("AIOKafkaConsumer patched subscribe() called")
        if self.id is None:
            raise RuntimeError(
                "Consumer start() not called! Run consumer start() first"
            )
        logger.info(f"AIOKafkaConsumer.subscribe(), subscribing to: {topics}")
        [
            self.broker.subscribe(
                self.id, topic=topic, auto_offest_reset=self.auto_offset_reset
            )
            for topic in topics
        ]

In [None]:
with LocalKafkaBroker(topics=["my_topic"]) as broker:
    _patch_AIOKafkaConsumer_init(broker)
    consumer = AIOKafkaConsumer()

    _patch_AIOKafkaConsumer_start()
    await consumer.start()

    _patch_AIOKafkaConsumer_subscribe()
    consumer.subscribe(["my_topic"])

[INFO] __main__: Local kafka broker starting
[INFO] __main__: AIOKafkaConsumer patched __init__() called()
[INFO] __main__: AIOKafkaConsumer patched start() called()
[INFO] __main__: AIOKafkaConsumer patched subscribe() called
[INFO] __main__: AIOKafkaConsumer.subscribe(), subscribing to: ['my_topic']
[INFO] __main__: Local kafka broker stopping


Patching stop so that be dont break anything by calling the real AIOKafkaConsumer stop()

In [None]:
# | export


def _patch_AIOKafkaConsumer_stop() -> None:
    @patch
    @delegates(AIOKafkaConsumer.stop)
    async def stop(self: AIOKafkaConsumer, **kwargs: Any) -> None: # type: ignore
        logger.info("AIOKafkaConsumer patched stop() called")
        if self.id is None:
            raise RuntimeError(
                "Consumer start() not called! Run consumer start() first"
            )
        self.broker.unsubscribe(self.id)

In [None]:
with LocalKafkaBroker(topics=["my_topic"]) as broker:
    _patch_AIOKafkaConsumer_init(broker)
    consumer = AIOKafkaConsumer()

    _patch_AIOKafkaConsumer_start()
    _patch_AIOKafkaConsumer_subscribe()
    _patch_AIOKafkaConsumer_stop()

    await consumer.start()
    consumer.subscribe(["my_topic"])
    await consumer.stop()

[INFO] __main__: Local kafka broker starting
[INFO] __main__: AIOKafkaConsumer patched __init__() called()
[INFO] __main__: AIOKafkaConsumer patched start() called()
[INFO] __main__: AIOKafkaConsumer patched subscribe() called
[INFO] __main__: AIOKafkaConsumer.subscribe(), subscribing to: ['my_topic']
[INFO] __main__: AIOKafkaConsumer patched stop() called
[INFO] __main__: Local kafka broker stopping


Patching getmany so that the messages are pulled from our Local, in-memory, Kafka broker

In [None]:
# | export


def _patch_AIOKafkaConsumer_getmany() -> None:
    @patch
    @delegates(AIOKafkaConsumer.getmany)
    async def getmany( # type: ignore
        self: AIOKafkaConsumer, **kwargs: Any
    ) -> Dict[TopicPartition, List[ConsumerRecord]]:
        logger.info("AIOKafkaConsumer patched getmany() called!")
        return self.broker.consume(self.id) # type: ignore

In [None]:
with LocalKafkaBroker(topics=["my_topic"]) as broker:
    _patch_AIOKafkaConsumer_init(broker)
    consumer = AIOKafkaConsumer()

    _patch_AIOKafkaConsumer_getmany()
    await consumer.getmany()

[INFO] __main__: Local kafka broker starting
[INFO] __main__: AIOKafkaConsumer patched __init__() called()
[INFO] __main__: AIOKafkaConsumer patched getmany() called!
[INFO] __main__: Local kafka broker stopping


  logger.warn(f"No subscription with {actor_id=} found!")


General patch for consumers:

In [None]:
# | export


def _patch_AIOKafkaConsumer(broker: LocalKafkaBroker) -> None:
    _patch_AIOKafkaConsumer_init(broker)
    _patch_AIOKafkaConsumer_start()
    _patch_AIOKafkaConsumer_subscribe()
    _patch_AIOKafkaConsumer_stop()
    _patch_AIOKafkaConsumer_getmany()

In [None]:
with LocalKafkaBroker(topics=["my_topic"]) as broker:
    _patch_AIOKafkaConsumer(broker)

[INFO] __main__: Local kafka broker starting
[INFO] __main__: Local kafka broker stopping


## Producer patching

We need to patch AIOKafkaProducer methods so that we can redirect the producer to our local kafka broker

- [x] \_\_init\_\_
- [x] start
- [x] stop
- [x] send

Patching \_\_init\_\_ so that we pass the LocalKafkaBroker instance to all instances of AIOKafkaProducer in the patched namespace

In [None]:
# | export


def _patch_AIOKafkaProducer_init(broker: LocalKafkaBroker) -> None:
    @patch
    @delegates(AIOKafkaProducer)
    def __init__( # type: ignore
        self: AIOKafkaProducer, broker: LocalKafkaBroker = broker, **kwargs: Any
    ) -> None:
        logger.info("AIOKafkaProducer patched __init__() called()")
        self.broker = broker
        self.id = None

In [None]:
with LocalKafkaBroker(topics=["my_topic"]) as mock:
    _patch_AIOKafkaProducer_init(broker=mock)
    producer = AIOKafkaProducer()

[INFO] __main__: Local kafka broker starting
[INFO] __main__: AIOKafkaProducer patched __init__() called()
[INFO] __main__: Local kafka broker stopping


Patching AIOKafkaProducer start so that we don't unintentionally try to start a real instance of AIOKafkaProducer

In [None]:
# | export


def _patch_AIOKafkaProducer_start() -> None:
    @patch
    @delegates(AIOKafkaProducer.start)
    async def start(self: AIOKafkaProducer, **kwargs: Any) -> None: # type: ignore
        logger.info("AIOKafkaProducer patched start() called()")
        if self.id is not None:
            raise RuntimeError(
                "Producer start() already called! Run producer stop() before running start() again"
            )
        self.id = self.broker.connect()

In [None]:
with LocalKafkaBroker(topics=["my_topic"]) as broker:
    _patch_AIOKafkaProducer_init(broker=broker)
    producer = AIOKafkaProducer()

    _patch_AIOKafkaProducer_start()
    await producer.start()

[INFO] __main__: Local kafka broker starting
[INFO] __main__: AIOKafkaProducer patched __init__() called()
[INFO] __main__: AIOKafkaProducer patched start() called()
[INFO] __main__: Local kafka broker stopping


Patching AIOKafkaProducerStop so that we don't uniintentionally try to stop a real instance of AIOKafkaProducer

In [None]:
# | export


def _patch_AIOKafkaProducer_stop() -> None:
    @patch
    @delegates(AIOKafkaProducer.stop)
    async def stop(self: AIOKafkaProducer, **kwargs: Any) -> None: # type: ignore
        logger.info("AIOKafkaProducer patched stop() called")
        if self.id is None:
            raise RuntimeError(
                "Producer start() not called! Run producer start() first"
            )

In [None]:
with LocalKafkaBroker(topics=["my_topic"]) as broker:
    _patch_AIOKafkaProducer_init(broker=broker)
    producer = AIOKafkaProducer()

    _patch_AIOKafkaProducer_start()
    await producer.start()

    _patch_AIOKafkaProducer_stop()
    await producer.stop()

[INFO] __main__: Local kafka broker starting
[INFO] __main__: AIOKafkaProducer patched __init__() called()
[INFO] __main__: AIOKafkaProducer patched start() called()
[INFO] __main__: AIOKafkaProducer patched stop() called
[INFO] __main__: Local kafka broker stopping


Patching AIOKafkaProducer send so that we redirect sent messages to Local, in-memory, Kafka broker

In [None]:
# | export


def _patch_AIOKafkaProducer_send() -> None:
    @patch
    @delegates(AIOKafkaProducer.send)
    async def send( # type: ignore
        self: AIOKafkaProducer,
        topic: str,
        msg: bytes,
        key: Optional[bytes] = None,
        **kwargs: Any,
    ) -> None:
        # logger.info("AIOKafkaProducer patched send() called()")
        if self.id is None:
            raise RuntimeError(
                "Producer start() not called! Run producer start() first"
            )
        self.broker.produce(self.id, topic=topic, msg=msg, key=key)

In [None]:
with LocalKafkaBroker(topics=["my_topic"]) as broker:
    _patch_AIOKafkaProducer_init(broker=broker)
    producer = AIOKafkaProducer()

    _patch_AIOKafkaProducer_start()
    await producer.start()

    _patch_AIOKafkaProducer_send()
    await producer.send("my_topic", b"some_msg")

[INFO] __main__: Local kafka broker starting
[INFO] __main__: AIOKafkaProducer patched __init__() called()
[INFO] __main__: AIOKafkaProducer patched start() called()
[INFO] __main__: Local kafka broker stopping


General patch for producers:

In [None]:
# | export


def _patch_AIOKafkaProducer(broker: LocalKafkaBroker) -> None:
    _patch_AIOKafkaProducer_init(broker)
    _patch_AIOKafkaProducer_start()
    _patch_AIOKafkaProducer_stop()
    _patch_AIOKafkaProducer_send()

In [None]:
with LocalKafkaBroker(topics=["my_topic"]) as broker:
    _patch_AIOKafkaProducer(broker)

[INFO] __main__: Local kafka broker starting
[INFO] __main__: Local kafka broker stopping


## Add patching to LocalKafkaBroker

In [None]:
@patch
def _patch_consumers_and_producers(self: LocalKafkaBroker) -> None:
    logger.info(
        "LocalKafkaProducer._patch_consumers_and_producers(): Patching consumers and producers!"
    )
    _patch_AIOKafkaConsumer(self)
    _patch_AIOKafkaProducer(self)

In [None]:
with LocalKafkaBroker(["topic"]) as broker:
    assert AIOKafkaConsumer().broker == broker
    assert AIOKafkaProducer().broker == broker

[INFO] __main__: Local kafka broker starting
[INFO] __main__: LocalKafkaProducer._patch_consumers_and_producers(): Patching consumers and producers!
[INFO] __main__: AIOKafkaConsumer patched __init__() called()
[INFO] __main__: AIOKafkaProducer patched __init__() called()
[INFO] __main__: Local kafka broker stopping


## Broker, consumer and producer integration tests

In [None]:
@asynccontextmanager
async def create_consumer_and_producer(
    auto_offset_reset: str = "latest",
) -> AsyncIterator[Tuple[AIOKafkaConsumer, AIOKafkaProducer]]:
    consumer = AIOKafkaConsumer(auto_offset_reset=auto_offset_reset)
    producer = AIOKafkaProducer()

    await consumer.start()
    await producer.start()

    yield (consumer, producer)

    await consumer.stop()
    await producer.stop()

In [None]:
async with create_consumer_and_producer() as (consumer, producer):
    assert isinstance(consumer, AIOKafkaConsumer)
    assert isinstance(producer, AIOKafkaProducer)

[INFO] __main__: AIOKafkaConsumer patched __init__() called()
[INFO] __main__: AIOKafkaProducer patched __init__() called()
[INFO] __main__: AIOKafkaConsumer patched start() called()
[INFO] __main__: AIOKafkaProducer patched start() called()
[INFO] __main__: AIOKafkaConsumer patched stop() called
[INFO] __main__: AIOKafkaProducer patched stop() called


In [None]:
def checkEqual(L1, L2):
    return len(L1) == len(L2) and sorted(L1) == sorted(L2)

In [None]:
assert checkEqual([1, 2], [3]) == False
assert checkEqual([1, 2, 3], [3, 2, 1]) == True

Sanity check, let's see if the messages are sent to broker and received by the consumer

In [None]:
topic = "test_topic"
sent_msgs = [f"msg{i}".encode("UTF-8") for i in range(320)]

with LocalKafkaBroker([topic]) as broker:
    async with create_consumer_and_producer(auto_offset_reset="earliest") as (
        consumer,
        producer,
    ):
        [await producer.send(topic, msg) for msg in sent_msgs]
        consumer.subscribe([topic])
        received = await consumer.getmany()
        received_msgs = [msg.value for _, msgs in received.items() for msg in msgs]
        data = [msg.value for msg in broker.data[topic]]
    assert checkEqual(
        received_msgs, sent_msgs
    ), f"{sent_msgs=}\n{received_msgs=}\n{data=}"

[INFO] __main__: Local kafka broker starting
[INFO] __main__: LocalKafkaProducer._patch_consumers_and_producers(): Patching consumers and producers!
[INFO] __main__: AIOKafkaConsumer patched __init__() called()
[INFO] __main__: AIOKafkaProducer patched __init__() called()
[INFO] __main__: AIOKafkaConsumer patched start() called()
[INFO] __main__: AIOKafkaProducer patched start() called()
[INFO] __main__: AIOKafkaConsumer patched subscribe() called
[INFO] __main__: AIOKafkaConsumer.subscribe(), subscribing to: ['test_topic']
[INFO] __main__: AIOKafkaConsumer patched getmany() called!
[INFO] __main__: AIOKafkaConsumer patched stop() called
[INFO] __main__: AIOKafkaProducer patched stop() called
[INFO] __main__: Local kafka broker stopping


Check if only subscribed topic messages are received by the consumer

In [None]:
topic1 = "test_topic1"
topic2 = "test_topic2"
sent_msgs_1 = [(f"msg{i}" + topic1).encode("UTF-8") for i in range(32)]
sent_msgs_2 = [(f"msg{i}" + topic2).encode("UTF-8") for i in range(32)]

with LocalKafkaBroker([topic1, topic2]) as broker:
    async with create_consumer_and_producer(auto_offset_reset="earliest") as (
        consumer,
        producer,
    ):
        [await producer.send(topic1, msg) for msg in sent_msgs_1]
        [await producer.send(topic2, msg) for msg in sent_msgs_2]

        consumer.subscribe([topic1])
        received = await consumer.getmany()
        received_msgs = [msg.value for _, msgs in received.items() for msg in msgs]

    assert checkEqual(sent_msgs_1, received_msgs)

[INFO] __main__: Local kafka broker starting
[INFO] __main__: LocalKafkaProducer._patch_consumers_and_producers(): Patching consumers and producers!
[INFO] __main__: AIOKafkaConsumer patched __init__() called()
[INFO] __main__: AIOKafkaProducer patched __init__() called()
[INFO] __main__: AIOKafkaConsumer patched start() called()
[INFO] __main__: AIOKafkaProducer patched start() called()
[INFO] __main__: AIOKafkaConsumer patched subscribe() called
[INFO] __main__: AIOKafkaConsumer.subscribe(), subscribing to: ['test_topic1']
[INFO] __main__: AIOKafkaConsumer patched getmany() called!
[INFO] __main__: AIOKafkaConsumer patched stop() called
[INFO] __main__: AIOKafkaProducer patched stop() called
[INFO] __main__: Local kafka broker stopping


Check if msgs are received only after subscribing when auto_offset_reset is set to "latest"

In [None]:
topic = "test_topic"
sent_msgs_before = [f"msg{i}".encode("UTF-8") for i in range(32)]
sent_msgs_after = [f"msg{i}".encode("UTF-8") for i in range(32, 64)]

with LocalKafkaBroker([topic]) as broker:
    async with create_consumer_and_producer() as (consumer, producer):
        [await producer.send(topic, msg) for msg in sent_msgs_before]

        consumer.subscribe([topic])
        [await producer.send(topic, msg) for msg in sent_msgs_after]
        received = await consumer.getmany()
        received_msgs = [msg.value for _, msgs in received.items() for msg in msgs]

    assert checkEqual(sent_msgs_after, received_msgs)

[INFO] __main__: Local kafka broker starting
[INFO] __main__: LocalKafkaProducer._patch_consumers_and_producers(): Patching consumers and producers!
[INFO] __main__: AIOKafkaConsumer patched __init__() called()
[INFO] __main__: AIOKafkaProducer patched __init__() called()
[INFO] __main__: AIOKafkaConsumer patched start() called()
[INFO] __main__: AIOKafkaProducer patched start() called()
[INFO] __main__: AIOKafkaConsumer patched subscribe() called
[INFO] __main__: AIOKafkaConsumer.subscribe(), subscribing to: ['test_topic']
[INFO] __main__: AIOKafkaConsumer patched getmany() called!
[INFO] __main__: AIOKafkaConsumer patched stop() called
[INFO] __main__: AIOKafkaProducer patched stop() called
[INFO] __main__: Local kafka broker stopping
