In [1]:
# | default_exp _testing.local_kafka_broker

In [2]:
# | export
import uuid
from collections import namedtuple
from dataclasses import dataclass
from contextlib import contextmanager
import asyncio

from typing import *
import fastkafka._application.app
from aiokafka import AIOKafkaConsumer, AIOKafkaProducer
from aiokafka.structs import ConsumerRecord, TopicPartition

import fastkafka._application.app
from fastkafka._components.meta import copy_func, patch, delegates, classcontextmanager
from fastkafka._components.logger import get_logger

In [3]:
from contextlib import asynccontextmanager
import unittest

In [4]:
# | export

logger = get_logger(__name__)

# Local Kafka broker
> In-memory mockup of Kafka broker protocol

In [5]:
# | export


def create_consumer_record(topic: str, msg: bytes) -> ConsumerRecord: # type: ignore
    record = ConsumerRecord(
        topic=topic,
        partition=0,
        offset=0,
        timestamp=0,
        timestamp_type=0,
        key=None,
        value=msg,
        checksum=0,
        serialized_key_size=0,
        serialized_value_size=0,
        headers=[],
    )
    return record

In [6]:
record = create_consumer_record("my_topic", b"my_msg")
record.partition = 1
record

ConsumerRecord(topic='my_topic', partition=1, offset=0, timestamp=0, timestamp_type=0, key=None, value=b'my_msg', checksum=0, serialized_key_size=0, serialized_value_size=0, headers=[])

In [7]:
# | export


@dataclass
class ConsumerMetadata:
    topic: str
    offset: int

In [8]:
consumer_meta = ConsumerMetadata("my_topic", 0)
assert consumer_meta.topic == "my_topic"
assert consumer_meta.offset == 0

In [9]:
# | export

@classcontextmanager()
class LocalKafkaBroker:
    def __init__(self, topics: List[str]):
        self.data: Dict[str, List[ConsumerRecord]] = {topic: list() for topic in topics} # type: ignore
        self.consumers_metadata: Dict[str, List[ConsumerMetadata]] = {}
        self.is_started: bool = False

    def connect(self) -> uuid.UUID:
        return uuid.uuid4()

    def subscribe(self, actor_id: str, *, auto_offest_reset: str, topic: str) -> None:
        consumer_metadata = self.consumers_metadata.get(actor_id, list())
        consumer_metadata.append(
            ConsumerMetadata(
                topic, len(self.data[topic]) if auto_offest_reset == "latest" else 0
            )
        )
        self.consumers_metadata[actor_id] = consumer_metadata

    def unsubscribe(self, actor_id: str) -> None:
        try:
            del self.consumers_metadata[actor_id]
        except KeyError:
            logger.warning(f"No subscription with {actor_id=} found!")

    def produce(
        self, actor_id: str, *, topic: str, msg: bytes, key: Optional[bytes]
    ) -> ConsumerRecord: # type: ignore
        record = create_consumer_record(topic, msg)
        self.data[topic].append(record)
        return record

    def consume( # type: ignore
        self, actor_id: str
    ) -> Dict[TopicPartition, List[ConsumerRecord]]:
        msgs: Dict[TopicPartition, List[ConsumerRecord]] = {} # type: ignore

        try:
            consumer_metadata = self.consumers_metadata[actor_id]
        except KeyError:
            logger.warning(f"No subscription with {actor_id=} found!")
            return msgs

        for metadata in consumer_metadata:
            try:
                msgs[TopicPartition(metadata.topic, 0)] = self.data[metadata.topic][
                    metadata.offset :
                ]
                metadata.offset = len(self.data[metadata.topic])
            except KeyError:
                raise RuntimeError(
                    f"{metadata.topic=} not found, did you pass it to LocalKafkaBroker on init to be created?"
                )
        return msgs
    
    def lifecycle(self) -> "LocalKafkaBroker":
        raise NotImplementedError()

    async def _start(self) -> str:
        logger.info("LocalKafkaBroker._start() called")
        self.__enter__()
        return "localbroker:0"
    
    async def _stop(self) -> None:
        logger.info("LocalKafkaBroker._stop() called")
        self.__exit__()

cls=<class '__main__.LocalKafkaBroker'>


In [10]:
# TODO: Tests

## Consumer patching

We need to patch AIOKafkaConsumer methods so that we can redirect the consumer to our local kafka broker.

Patched methods:

- [x] \_\_init\_\_
- [x] start
- [x] subscribe
- [x] stop
- [x] getmany

In [11]:
class ConsumerMock:
    def __init__(
        self,
        broker: LocalKafkaBroker,
    ) -> None:
        logger.info("AIOKafkaConsumer patched __init__() called()")
        self.broker = broker
        self.id = None

    @delegates(AIOKafkaConsumer)
    def __call__(
        self, auto_offset_reset: str = "latest", **kwargs: Any
    ) -> "ConsumerMock":
        self.auto_offset_reset = auto_offset_reset
        return self

    @delegates(AIOKafkaConsumer.start)
    async def start(self, **kwargs: Any) -> None:
        raise NotImplementedError()

    @delegates(AIOKafkaConsumer.subscribe)
    def subscribe(self, topics: List[str], **kwargs: Any) -> None:
        raise NotImplementedError()

Patching start so that we don't try to start the real AIOKafkaConsumer instance

In [12]:
# | export


@patch
@delegates(AIOKafkaConsumer.start)
async def start(self: ConsumerMock, **kwargs: Any) -> None:
    logger.info("AIOKafkaConsumer patched start() called()")
    if self.id is not None:
        raise RuntimeError(
            "Consumer start() already called! Run consumer stop() before running start() again"
        )
    self.id = self.broker.connect()

In [13]:
broker = LocalKafkaBroker(topics=["my_topic"])

consumer = ConsumerMock(broker)
consumer = consumer()
await consumer.start()

[INFO] __main__: AIOKafkaConsumer patched __init__() called()
[INFO] __main__: AIOKafkaConsumer patched start() called()


Patching subscribe so that we can connect to our Local, in-memory, Kafka broker

In [14]:
# | export


@patch
@delegates(AIOKafkaConsumer.subscribe)
def subscribe(self: ConsumerMock, topics: List[str], **kwargs: Any) -> None:
    logger.info("AIOKafkaConsumer patched subscribe() called")
    if self.id is None:
        raise RuntimeError("Consumer start() not called! Run consumer start() first")
    logger.info(f"AIOKafkaConsumer.subscribe(), subscribing to: {topics}")
    [
        self.broker.subscribe(
            self.id, topic=topic, auto_offest_reset=self.auto_offset_reset
        )
        for topic in topics
    ]

In [15]:
broker = LocalKafkaBroker(topics=["my_topic"])

consumer = ConsumerMock(broker)
consumer = consumer()

await consumer.start()
consumer.subscribe(["my_topic"])

[INFO] __main__: AIOKafkaConsumer patched __init__() called()
[INFO] __main__: AIOKafkaConsumer patched start() called()
[INFO] __main__: AIOKafkaConsumer patched subscribe() called
[INFO] __main__: AIOKafkaConsumer.subscribe(), subscribing to: ['my_topic']


Patching stop so that be dont break anything by calling the real AIOKafkaConsumer stop()

In [16]:
# | export

@patch
@delegates(AIOKafkaConsumer.stop)
async def stop(self: ConsumerMock, **kwargs: Any) -> None: # type: ignore
    logger.info("AIOKafkaConsumer patched stop() called")
    if self.id is None:
        raise RuntimeError(
            "Consumer start() not called! Run consumer start() first"
        )
    self.broker.unsubscribe(self.id)

In [17]:
broker = LocalKafkaBroker(topics=["my_topic"])
    
consumer = ConsumerMock(broker)
consumer = consumer()

await consumer.start()
consumer.subscribe(["my_topic"])
await consumer.stop()

[INFO] __main__: AIOKafkaConsumer patched __init__() called()
[INFO] __main__: AIOKafkaConsumer patched start() called()
[INFO] __main__: AIOKafkaConsumer patched subscribe() called
[INFO] __main__: AIOKafkaConsumer.subscribe(), subscribing to: ['my_topic']
[INFO] __main__: AIOKafkaConsumer patched stop() called


Patching getmany so that the messages are pulled from our Local, in-memory, Kafka broker

In [18]:
# | export

@patch
@delegates(AIOKafkaConsumer.getmany)
async def getmany(
    self: ConsumerMock, **kwargs: Any
) -> Dict[TopicPartition, List[ConsumerRecord]]:
    return self.broker.consume(self.id) # type: ignore

In [19]:
broker = LocalKafkaBroker(topics=["my_topic"])

consumer = ConsumerMock(broker)
consumer = consumer()
await consumer.getmany()

[INFO] __main__: AIOKafkaConsumer patched __init__() called()


{}

## Producer patching

We need to patch AIOKafkaProducer methods so that we can redirect the producer to our local kafka broker

- [x] \_\_init\_\_
- [x] start
- [x] stop
- [x] send

In [30]:
class ProducerMock:
    def __init__(self, broker: LocalKafkaBroker, **kwargs: Any) -> None:
        logger.info("AIOKafkaProducer patched __init__() called()")
        self.broker = broker
        self.id = None
        
    @delegates(AIOKafkaProducer)
    def __call__(self, **kwargs: Any) -> "ProducerMock":
        return self
    
    @delegates(AIOKafkaProducer.start)
    async def start(self, **kwargs: Any) -> None:
        raise NotImplementedError()

    @delegates(AIOKafkaProducer.stop)
    async def stop(self, **kwargs: Any) -> None:
        raise NotImplementedError()

    @delegates(AIOKafkaProducer.send)
    async def send(  # type: ignore
        self: AIOKafkaProducer,
        topic: str,
        msg: bytes,
        key: Optional[bytes] = None,
        **kwargs: Any,
    ) -> None:
        raise NotImplementedError()

Patching AIOKafkaProducer start so that we mock the startup procedure of AIOKafkaProducer

In [31]:
# | export


@patch
@delegates(AIOKafkaProducer.start)
async def start(self: ProducerMock, **kwargs: Any) -> None:
    logger.info("AIOKafkaProducer patched start() called()")
    if self.id is not None:
        raise RuntimeError(
            "Producer start() already called! Run producer stop() before running start() again"
        )
    self.id = self.broker.connect()

In [33]:
broker = LocalKafkaBroker(topics=["my_topic"])

producer = ProducerMock(broker)
producer = producer()

await producer.start()

[INFO] __main__: AIOKafkaProducer patched __init__() called()
[INFO] __main__: AIOKafkaProducer patched start() called()


Patching AIOKafkaProducerStop so that we don't uniintentionally try to stop a real instance of AIOKafkaProducer

In [34]:
# | export


@patch
@delegates(AIOKafkaProducer.stop)
async def stop(self: ProducerMock, **kwargs: Any) -> None:
    logger.info("AIOKafkaProducer patched stop() called")
    if self.id is None:
        raise RuntimeError(
            "Producer start() not called! Run producer start() first"
        )

In [36]:
broker = LocalKafkaBroker(topics=["my_topic"])

producer = ProducerMock(broker)
producer = producer()

await producer.start()
await producer.stop()

[INFO] __main__: AIOKafkaProducer patched __init__() called()
[INFO] __main__: AIOKafkaProducer patched start() called()
[INFO] __main__: AIOKafkaProducer patched stop() called


Patching AIOKafkaProducer send so that we redirect sent messages to Local, in-memory, Kafka broker

In [37]:
# | export


@patch
@delegates(AIOKafkaProducer.send)
async def send(
    self: ProducerMock,
    topic: str,
    msg: bytes,
    key: Optional[bytes] = None,
    **kwargs: Any,
) -> None:
    if self.id is None:
        raise RuntimeError(
            "Producer start() not called! Run producer start() first"
        )
    record = self.broker.produce(self.id, topic=topic, msg=msg, key=key)

    async def _f(record: ConsumerRecord = record) -> ConsumerRecord:
        return record

    return asyncio.create_task(_f())

In [38]:
broker = LocalKafkaBroker(topics=["my_topic"])

producer = ProducerMock(broker)
producer = producer()

await producer.start()
msg_fut = await producer.send("my_topic", b"some_msg")
await msg_fut

[INFO] __main__: AIOKafkaProducer patched __init__() called()
[INFO] __main__: AIOKafkaProducer patched start() called()


ConsumerRecord(topic='my_topic', partition=0, offset=0, timestamp=0, timestamp_type=0, key=None, value=b'some_msg', checksum=0, serialized_key_size=0, serialized_value_size=0, headers=[])

## Add patching to LocalKafkaBroker

In [39]:
# | export


@patch
@contextmanager
def lifecycle(self: LocalKafkaBroker) -> None:
    logger.info(
        "LocalKafkaProducer._patch_consumers_and_producers(): Patching consumers and producers!"
    )
    try:
        logger.info("Local kafka broker starting")
        old_consumer = fastkafka._application.app.AIOKafkaConsumer
        old_producer = fastkafka._application.app.AIOKafkaProducer
        fastkafka._application.app.AIOKafkaConsumer = ConsumerMock(self)
        fastkafka._application.app.AIOKafkaProducer = ProducerMock(self)
        self.is_started = True
        yield self
    finally:
        logger.info("Local kafka broker stopping")
        fastkafka._application.app.AIOKafkaConsumer = old_consumer
        fastkafka._application.app.AIOKafkaProducer = old_producer
        self.is_started = False

In [45]:
assert isinstance(fastkafka._application.app.AIOKafkaConsumer(), AIOKafkaConsumer)
assert isinstance(fastkafka._application.app.AIOKafkaProducer(), AIOKafkaProducer)
with LocalKafkaBroker(["topic"]) as broker:
    assert isinstance(fastkafka._application.app.AIOKafkaConsumer(), ConsumerMock)
    assert isinstance(fastkafka._application.app.AIOKafkaProducer(), ProducerMock)
    assert fastkafka._application.app.AIOKafkaConsumer().broker == broker
    assert fastkafka._application.app.AIOKafkaProducer().broker == broker
assert isinstance(fastkafka._application.app.AIOKafkaConsumer(), AIOKafkaConsumer)
assert isinstance(fastkafka._application.app.AIOKafkaProducer(), AIOKafkaProducer)

[ERROR] asyncio: Unclosed AIOKafkaConsumer
consumer: <aiokafka.consumer.consumer.AIOKafkaConsumer object at 0x7f4fecd51090>
[ERROR] asyncio: Unclosed AIOKafkaProducer
producer: <aiokafka.producer.producer.AIOKafkaProducer object at 0x7f4fecd52090>
<class '__main__.LocalKafkaBroker'>.__enter__
[INFO] __main__: LocalKafkaProducer._patch_consumers_and_producers(): Patching consumers and producers!
[INFO] __main__: Local kafka broker starting
[INFO] __main__: AIOKafkaConsumer patched __init__() called()
[INFO] __main__: AIOKafkaProducer patched __init__() called()
<class '__main__.LocalKafkaBroker'>.__exit__
[INFO] __main__: Local kafka broker stopping
[ERROR] asyncio: Unclosed AIOKafkaConsumer
consumer: <aiokafka.consumer.consumer.AIOKafkaConsumer object at 0x7f4fecd52510>
[ERROR] asyncio: Unclosed AIOKafkaProducer
producer: <aiokafka.producer.producer.AIOKafkaProducer object at 0x7f4fed251d10>


## Broker, consumer and producer integration tests

In [52]:
@asynccontextmanager
async def create_consumer_and_producer(
    auto_offset_reset: str = "latest",
) -> AsyncIterator[Tuple[AIOKafkaConsumer, AIOKafkaProducer]]:
    consumer = fastkafka._application.app.AIOKafkaConsumer(auto_offset_reset=auto_offset_reset)
    producer = fastkafka._application.app.AIOKafkaProducer()

    await consumer.start()
    await producer.start()

    yield (consumer, producer)

    await consumer.stop()
    await producer.stop()

In [53]:
def checkEqual(L1, L2):
    return len(L1) == len(L2) and sorted(L1) == sorted(L2)

In [54]:
assert checkEqual([1, 2], [3]) == False
assert checkEqual([1, 2, 3], [3, 2, 1]) == True

Sanity check, let's see if the messages are sent to broker and received by the consumer

In [55]:
topic = "test_topic"
sent_msgs = [f"msg{i}".encode("UTF-8") for i in range(320)]

with LocalKafkaBroker([topic]) as broker:
    async with create_consumer_and_producer(auto_offset_reset="earliest") as (
        consumer,
        producer,
    ):
        [await producer.send(topic, msg) for msg in sent_msgs]
        consumer.subscribe([topic])
        received = await consumer.getmany()
        received_msgs = [msg.value for _, msgs in received.items() for msg in msgs]
        data = [msg.value for msg in broker.data[topic]]
    assert checkEqual(
        received_msgs, sent_msgs
    ), f"{sent_msgs=}\n{received_msgs=}\n{data=}"

<class '__main__.LocalKafkaBroker'>.__enter__
[INFO] __main__: LocalKafkaProducer._patch_consumers_and_producers(): Patching consumers and producers!
[INFO] __main__: Local kafka broker starting
[INFO] __main__: AIOKafkaConsumer patched __init__() called()
[INFO] __main__: AIOKafkaProducer patched __init__() called()
[INFO] __main__: AIOKafkaConsumer patched start() called()
[INFO] __main__: AIOKafkaProducer patched start() called()
[INFO] __main__: AIOKafkaConsumer patched subscribe() called
[INFO] __main__: AIOKafkaConsumer.subscribe(), subscribing to: ['test_topic']
[INFO] __main__: AIOKafkaConsumer patched stop() called
[INFO] __main__: AIOKafkaProducer patched stop() called
<class '__main__.LocalKafkaBroker'>.__exit__
[INFO] __main__: Local kafka broker stopping


Check if only subscribed topic messages are received by the consumer

In [56]:
topic1 = "test_topic1"
topic2 = "test_topic2"
sent_msgs_1 = [(f"msg{i}" + topic1).encode("UTF-8") for i in range(32)]
sent_msgs_2 = [(f"msg{i}" + topic2).encode("UTF-8") for i in range(32)]

with LocalKafkaBroker([topic1, topic2]) as broker:
    async with create_consumer_and_producer(auto_offset_reset="earliest") as (
        consumer,
        producer,
    ):
        [await producer.send(topic1, msg) for msg in sent_msgs_1]
        [await producer.send(topic2, msg) for msg in sent_msgs_2]

        consumer.subscribe([topic1])
        received = await consumer.getmany()
        received_msgs = [msg.value for _, msgs in received.items() for msg in msgs]

    assert checkEqual(sent_msgs_1, received_msgs)

<class '__main__.LocalKafkaBroker'>.__enter__
[INFO] __main__: LocalKafkaProducer._patch_consumers_and_producers(): Patching consumers and producers!
[INFO] __main__: Local kafka broker starting
[INFO] __main__: AIOKafkaConsumer patched __init__() called()
[INFO] __main__: AIOKafkaProducer patched __init__() called()
[INFO] __main__: AIOKafkaConsumer patched start() called()
[INFO] __main__: AIOKafkaProducer patched start() called()
[INFO] __main__: AIOKafkaConsumer patched subscribe() called
[INFO] __main__: AIOKafkaConsumer.subscribe(), subscribing to: ['test_topic1']
[INFO] __main__: AIOKafkaConsumer patched stop() called
[INFO] __main__: AIOKafkaProducer patched stop() called
<class '__main__.LocalKafkaBroker'>.__exit__
[INFO] __main__: Local kafka broker stopping


Check if msgs are received only after subscribing when auto_offset_reset is set to "latest"

In [57]:
topic = "test_topic"
sent_msgs_before = [f"msg{i}".encode("UTF-8") for i in range(32)]
sent_msgs_after = [f"msg{i}".encode("UTF-8") for i in range(32, 64)]

with LocalKafkaBroker([topic]) as broker:
    async with create_consumer_and_producer() as (consumer, producer):
        [await producer.send(topic, msg) for msg in sent_msgs_before]

        consumer.subscribe([topic])
        [await producer.send(topic, msg) for msg in sent_msgs_after]
        received = await consumer.getmany()
        received_msgs = [msg.value for _, msgs in received.items() for msg in msgs]

    assert checkEqual(sent_msgs_after, received_msgs)

<class '__main__.LocalKafkaBroker'>.__enter__
[INFO] __main__: LocalKafkaProducer._patch_consumers_and_producers(): Patching consumers and producers!
[INFO] __main__: Local kafka broker starting
[INFO] __main__: AIOKafkaConsumer patched __init__() called()
[INFO] __main__: AIOKafkaProducer patched __init__() called()
[INFO] __main__: AIOKafkaConsumer patched start() called()
[INFO] __main__: AIOKafkaProducer patched start() called()
[INFO] __main__: AIOKafkaConsumer patched subscribe() called
[INFO] __main__: AIOKafkaConsumer.subscribe(), subscribing to: ['test_topic']
[INFO] __main__: AIOKafkaConsumer patched stop() called
[INFO] __main__: AIOKafkaProducer patched stop() called
<class '__main__.LocalKafkaBroker'>.__exit__
[INFO] __main__: Local kafka broker stopping
