In [None]:
# | default_exp kafka_broker

In [None]:
# | export 

import asyncio
from contextlib import asynccontextmanager, contextmanager
from time import sleep
from tempfile import TemporaryDirectory, NamedTemporaryFile
from pathlib import Path
from fastcore.meta import delegates
from typing import *

import asyncer

from fastkafka.server import terminate_asyncio_process

In [None]:
# | export

def get_zookeeper_config_string(
    data_dir: str = "/tmp/zookeeper", # the directory where the snapshot is stored.
    client_port: int = 2181, # the port at which the clients will connect
    maxClientCnxns: int = 0, # disable the per-ip limit on the number of connections since this is a non-production config
    admin_enableServer: bool = False, # Disable the adminserver by default to avoid port conflicts.
    admin_serverPort: int = 8080, # Set the port to something non-conflicting if choosing to enable this
) -> str:

    zookeeper_config = f"""dataDir={data_dir}
clientPort={client_port}
maxClientCnxns={maxClientCnxns}
admin.enableServer={"true" if admin_enableServer else "false"}
admin.serverPort={admin_serverPort}
"""

    return zookeeper_config

In [None]:
assert get_zookeeper_config_string() == """dataDir=/tmp/zookeeper
clientPort=2181
maxClientCnxns=0
admin.enableServer=false
admin.serverPort=8080
"""

assert get_zookeeper_config_string(client_port = 100) == """dataDir=/tmp/zookeeper
clientPort=100
maxClientCnxns=0
admin.enableServer=false
admin.serverPort=8080
"""

In [None]:
# | export

@contextmanager
def write_config(config: str):
    try:
        with TemporaryDirectory() as config_dir:
            temp_config = Path(config_dir)/"configuration.config"
            with open(temp_config, "w") as config_file:
                config_file.write(config)
            yield temp_config
    except Exception as e:
        print(f"Exception raised {e=}")

In [None]:
with write_config(get_zookeeper_config_string()) as zookeeper_config:
    print(zookeeper_config)
    with open(zookeeper_config, "r") as config_file:
        assert config_file.read() == """dataDir=/tmp/zookeeper
clientPort=2181
maxClientCnxns=0
admin.enableServer=false
admin.serverPort=8080
"""

/tmp/tmpdcxb7fp6/configuration.config


In [None]:
# | export

@asynccontextmanager
@delegates(get_zookeeper_config_string)
async def zookeeper(
    zookeeper_script_path: str = "/work/kafka_2.13-3.3.1/bin/zookeeper-server-start.sh",
    **kwargs
) -> Generator[asyncio.subprocess.Process, None, None]:
    config_generator = write_config(get_zookeeper_config_string(**kwargs))
    config_path = config_generator.__enter__()
    proc = await asyncio.create_subprocess_exec(
        zookeeper_script_path,
        config_path,
        stdout=asyncio.subprocess.PIPE,
        stdin=asyncio.subprocess.PIPE,
    )
    try:
        yield proc
    except Exception as e:
        print(f"Exception raised {e=}")
    finally:
        await terminate_asyncio_process(proc)
        config_generator.__exit__(None, None, None)

In [None]:
with TemporaryDirectory() as tmp:
    tmp_dir_path = Path(tmp)
    async with zookeeper(data_dir=tmp_dir_path/"zookeeper") as zookeeper_proc:
        sleep(5)
    
    
zookeeper_output, _ = await zookeeper_proc.communicate()

print(zookeeper_output.decode("UTF-8"))

[INFO] fastkafka.server: terminate_asyncio_process(): Terminating the process 34694...
[INFO] fastkafka.server: terminate_asyncio_process(): Process 34694 terminated.
[2023-02-07 13:48:54,610] INFO Reading configuration from: /tmp/tmp_4m4paj5/configuration.config (org.apache.zookeeper.server.quorum.QuorumPeerConfig)
[2023-02-07 13:48:54,614] INFO clientPortAddress is 0.0.0.0:2181 (org.apache.zookeeper.server.quorum.QuorumPeerConfig)
[2023-02-07 13:48:54,615] INFO secureClientPort is not set (org.apache.zookeeper.server.quorum.QuorumPeerConfig)
[2023-02-07 13:48:54,615] INFO observerMasterPort is not set (org.apache.zookeeper.server.quorum.QuorumPeerConfig)
[2023-02-07 13:48:54,615] INFO metricsProvider.className is org.apache.zookeeper.metrics.impl.DefaultMetricsProvider (org.apache.zookeeper.server.quorum.QuorumPeerConfig)
[2023-02-07 13:48:54,617] INFO autopurge.snapRetainCount set to 3 (org.apache.zookeeper.server.DatadirCleanupManager)
[2023-02-07 13:48:54,617] INFO autopurge.purge

In [None]:
# | export

def get_kafka_config_string(
    log_dirs: str = "/tmp/kafka-logs",
    zookeeper_connect: str = "localhost:2181",
    listener_port: int = 9092
) -> str:

    kafka_config = f"""broker.id=0

############################# Socket Server Settings #############################

# The address the socket server listens on. If not configured, the host name will be equal to the value of
# java.net.InetAddress.getCanonicalHostName(), with PLAINTEXT listener name, and port 9092.
#   FORMAT:
#     listeners = listener_name://host_name:port
#   EXAMPLE:
#     listeners = PLAINTEXT://your.host.name:9092
listeners=PLAINTEXT://:{listener_port}

# Listener name, hostname and port the broker will advertise to clients.
# If not set, it uses the value for "listeners".
#advertised.listeners=PLAINTEXT://your.host.name:9092

# Maps listener names to security protocols, the default is for them to be the same. See the config documentation for more details
#listener.security.protocol.map=PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL

# The number of threads that the server uses for receiving requests from the network and sending responses to the network
num.network.threads=3

# The number of threads that the server uses for processing requests, which may include disk I/O
num.io.threads=8

# The send buffer (SO_SNDBUF) used by the socket server
socket.send.buffer.bytes=102400

# The receive buffer (SO_RCVBUF) used by the socket server
socket.receive.buffer.bytes=102400

# The maximum size of a request that the socket server will accept (protection against OOM)
socket.request.max.bytes=104857600


############################# Log Basics #############################

# A comma separated list of directories under which to store log files
log.dirs={log_dirs}

# The default number of log partitions per topic. More partitions allow greater
# parallelism for consumption, but this will also result in more files across
# the brokers.
num.partitions=1

# The number of threads per data directory to be used for log recovery at startup and flushing at shutdown.
# This value is recommended to be increased for installations with data dirs located in RAID array.
num.recovery.threads.per.data.dir=1

offsets.topic.replication.factor=1
transaction.state.log.replication.factor=1
transaction.state.log.min.isr=1

# The number of messages to accept before forcing a flush of data to disk
log.flush.interval.messages=10000

# The maximum amount of time a message can sit in a log before we force a flush
log.flush.interval.ms=1000

# The minimum age of a log file to be eligible for deletion due to age
log.retention.hours=168

# A size-based retention policy for logs. Segments are pruned from the log unless the remaining
# segments drop below log.retention.bytes. Functions independently of log.retention.hours.
log.retention.bytes=1073741824

# The maximum size of a log segment file. When this size is reached a new log segment will be created.
log.segment.bytes=1073741824

# The interval at which log segments are checked to see if they can be deleted according to the retention policies
log.retention.check.interval.ms=300000

# Zookeeper connection string (see zookeeper docs for details).
zookeeper.connect={zookeeper_connect}

# Timeout in ms for connecting to zookeeper
zookeeper.connection.timeout.ms=18000

# The following configuration specifies the time, in milliseconds, that the GroupCoordinator will delay the initial consumer rebalance.
group.initial.rebalance.delay.ms=0
"""

    return kafka_config

In [None]:
# | export

@asynccontextmanager
@delegates(get_kafka_config_string)
async def kafka_broker(
    kafka_script_path = "/work/kafka_2.13-3.3.1/bin/kafka-server-start.sh",
    **kwargs
) -> Generator[asyncio.subprocess.Process, None, None]:
    config_generator = write_config(get_kafka_config_string(**kwargs))
    config_path = config_generator.__enter__()
    proc = await asyncio.create_subprocess_exec(
        kafka_script_path,
        config_path,
        stdout=asyncio.subprocess.PIPE,
        stdin=asyncio.subprocess.PIPE,
    )
    try:
        yield proc
    except Exception as e:
        print(f"Exception raised {e=}")
    finally:
        await terminate_asyncio_process(proc)
        config_generator.__exit__(None, None, None)

In [None]:
with TemporaryDirectory() as tmp:
    tmp_dir_path = Path(tmp)
    async with zookeeper(data_dir=tmp_dir_path/"zookeeper") as zookeeper_proc:
        sleep(5)
        async with kafka_broker(log_dirs=tmp_dir_path/"kafka") as kafka_broker_proc:
            sleep(30)
    
z_out, _ = await zookeeper_proc.communicate()
k_out, _ = await kafka_broker_proc.communicate()

print(z_out.decode("UTF-8"))
print(k_out.decode("UTF-8"))

[INFO] fastkafka.server: terminate_asyncio_process(): Terminating the process 37978...
[INFO] fastkafka.server: terminate_asyncio_process(): Process 37978 terminated.
[INFO] fastkafka.server: terminate_asyncio_process(): Terminating the process 37612...
[INFO] fastkafka.server: terminate_asyncio_process(): Process 37612 terminated.
[2023-02-07 13:59:30,880] INFO Reading configuration from: /tmp/tmpyn2pe0o8/configuration.config (org.apache.zookeeper.server.quorum.QuorumPeerConfig)
[2023-02-07 13:59:30,886] INFO clientPortAddress is 0.0.0.0:2181 (org.apache.zookeeper.server.quorum.QuorumPeerConfig)
[2023-02-07 13:59:30,887] INFO secureClientPort is not set (org.apache.zookeeper.server.quorum.QuorumPeerConfig)
[2023-02-07 13:59:30,887] INFO observerMasterPort is not set (org.apache.zookeeper.server.quorum.QuorumPeerConfig)
[2023-02-07 13:59:30,887] INFO metricsProvider.className is org.apache.zookeeper.metrics.impl.DefaultMetricsProvider (org.apache.zookeeper.server.quorum.QuorumPeerConfi

In [None]:
# | export

@asynccontextmanager
async def create_kafka_test_env(zookeeper_port: int, kafka_port: int) -> str:
    try:
        with TemporaryDirectory() as tmp:
            tmp_dir_path = Path(tmp)
            async with zookeeper(data_dir=tmp_dir_path/"zookeeper",client_port=zookeeper_port) as zookeeper_proc:
                sleep(5)
                async with kafka_broker(log_dirs=tmp_dir_path/"kafka",listener_port=kafka_port, zookeeper_connect=f"localhost:{zookeeper_port}") as kafka_broker_proc:
                    sleep(5)
                    yield f"localhost:{kafka_port}"
    except Exception as e:
        print(f"Exception raised {e=}")
    finally:
        pass
           

In [None]:
import anyio
import asyncer
from tqdm.notebook import tqdm, trange

from fastkafka.helpers import (
    consumes_messages,
    produce_messages,
)

In [None]:
 async with create_kafka_test_env(zookeeper_port=9998, kafka_port=9999) as bootstrap_servers:
    msgs = [
        dict(user_id=i, feature_1=[(i / 1_000) ** 2], feature_2=[i % 177])
        for i in trange(100_000, desc="generating messages")
    ]

    async with asyncer.create_task_group() as tg:
        tg.soonify(consumes_messages)(
            msgs_count=len(msgs), topic="test_data", bootstrap_servers=bootstrap_servers
        )

        await anyio.sleep(2)

        tg.soonify(produce_messages)(
            msgs=msgs, topic="test_data", bootstrap_servers=bootstrap_servers
        )

generating messages:   0%|          | 0/100000 [00:00<?, ?it/s]

[INFO] aiokafka.consumer.subscription_state: Updating subscribed topics to: frozenset({'test_data'})
[INFO] aiokafka.consumer.group_coordinator: Metadata for topic has changed from {} to {'test_data': 1}. 


consuming from 'test_data':   0%|          | 0/100000 [00:00<?, ?it/s]

producing to 'test_data':   0%|          | 0/100000 [00:00<?, ?it/s]

[INFO] fastkafka.server: terminate_asyncio_process(): Terminating the process 46138...
[INFO] fastkafka.server: terminate_asyncio_process(): Process 46138 terminated.
[INFO] fastkafka.server: terminate_asyncio_process(): Terminating the process 45773...
[INFO] fastkafka.server: terminate_asyncio_process(): Process 45773 terminated.
