In [1]:
# | default_exp testing

In [2]:
# | export

import asyncio
import contextlib
import hashlib
import os
import random
import shlex

# [B404:blacklist] Consider possible security implications associated with the subprocess module.
import requests
import shutil
import signal
import subprocess  # nosec
import textwrap
import time
import typer
import unittest
import unittest.mock
from contextlib import asynccontextmanager, contextmanager
from datetime import datetime, timedelta
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import *

import asyncer
import uvicorn
from aiokafka import AIOKafkaConsumer, AIOKafkaProducer
from confluent_kafka.admin import AdminClient, NewTopic
from fastcore.meta import delegates
from pydantic import BaseModel
import tarfile
from tqdm import tqdm

# from fastkafka.server import _import_from_string
from fastkafka._components.helpers import combine_params, use_parameters_of
from fastkafka._components.logger import get_logger, supress_timestamps
from fastkafka.helpers import (
    consumes_messages,
    create_admin_client,
    create_missing_topics,
    in_notebook,
    tqdm,
    trange,
    produce_messages,
)
from fastkafka.application import FastKafka
from fastkafka._components.helpers import _import_from_string
from fastkafka.helpers import in_notebook

In [3]:
# | export

if in_notebook():
    from tqdm.notebook import tqdm, trange
else:
    from tqdm import tqdm, trange

In [4]:
from inspect import signature

import nest_asyncio
from nbdev_mkdocs.docstring import run_examples_from_docstring

In [5]:
# | notest

# allows async calls in notebooks

nest_asyncio.apply()

In [6]:
# | export

logger = get_logger(__name__)

In [7]:
supress_timestamps()
logger = get_logger(__name__, level=20)
logger.info("ok")

[INFO] __main__: ok


In [8]:
# | export

kafka_server_url = (
    os.environ["KAFKA_HOSTNAME"] if "KAFKA_HOSTNAME" in os.environ else "localhost"
)
kafka_server_port = os.environ["KAFKA_PORT"] if "KAFKA_PORT" in os.environ else "9092"

aiokafka_config = {
    "bootstrap_servers": f"{kafka_server_url}:{kafka_server_port}",
}

In [9]:
# | export


def nb_safe_seed(s: str) -> Callable[[int], int]:
    """Gets a unique seed function for a notebook

    Params:
        s: name of the notebook used to initialize the seed function

    Returns:
        A unique seed function
    """
    init_seed = int(hashlib.sha256(s.encode("utf-8")).hexdigest(), 16) % (10**8)

    def _get_seed(x: int = 0, *, init_seed: int = init_seed) -> int:
        return init_seed + x

    return _get_seed

In [10]:
seed = nb_safe_seed("999_test_utils")

assert seed() == seed(0)
assert seed() + 1 == seed(1)

In [11]:
# | export


def true_after(seconds: float) -> Callable[[], bool]:
    """Function returning True after a given number of seconds"""
    t = datetime.now()

    def _true_after(seconds: float = seconds, t: datetime = t) -> bool:
        return (datetime.now() - t) > timedelta(seconds=seconds)

    return _true_after

In [12]:
f = true_after(1.1)
assert not f()
time.sleep(1)
assert not f()
time.sleep(0.1)
assert f()

In [13]:
# | export


@contextmanager
@delegates(create_missing_topics)  # type: ignore
def create_testing_topic(
    *,
    topic_prefix: str = "test_topic_",
    seed: Optional[int] = None,
    **kwargs: Dict[str, Any],
) -> Generator[str, None, None]:
    """Create testing topic

    Example:
        ```python
        from os import environ
        from fastkafka.testing import create_testing_topic, create_admin_client

        kafka_server_url = environ["KAFKA_HOSTNAME"]
        aiokafka_config = {"bootstrap_servers": f"{kafka_server_url}:9092"}

        with create_testing_topic(
            topic_prefix="my_topic_for_create_testing_topic_",
            seed=746855,
            num_partitions=1,
            **aiokafka_config
        ) as topic:
            # Check if topic is created and exists in topic list
            kafka_admin = create_admin_client(**aiokafka_config)
            existing_topics = kafka_admin.list_topics().topics.keys()
            assert topic in existing_topics

        # Check if topic is deleted after exiting context
        existing_topics = kafka_admin.list_topics().topics.keys()
        assert topic not in existing_topics
        ```

    Args:
        topic_prefix: topic name prefix which will be augumented with a randomly generated sufix
        seed: seed used to generate radnom sufix
        topic_names: a list of topic names
        num_partitions: Number of partitions to create
        replication_factor: Replication factor of partitions, or -1 if replica_assignment is used.
        replica_assignment: List of lists with the replication assignment for each new partition.
        new_topic_config: topic level config parameters as defined here: https://kafka.apache.org/documentation.html#topicconfigs
        bootstrap_servers (str, list(str)): a ``host[:port]`` string or list of
            ``host[:port]`` strings that the producer should contact to
            bootstrap initial cluster metadata. This does not have to be the
            full node list.  It just needs to have at least one broker that will
            respond to a Metadata API Request. Default port is 9092. If no
            servers are specified, will default to ``localhost:9092``.
        security_protocol (str): Protocol used to communicate with brokers.
            Valid values are: ``PLAINTEXT``, ``SSL``. Default: ``PLAINTEXT``.
            Default: ``PLAINTEXT``.
        sasl_mechanism (str): Authentication mechanism when security_protocol
            is configured for ``SASL_PLAINTEXT`` or ``SASL_SSL``. Valid values
            are: ``PLAIN``, ``GSSAPI``, ``SCRAM-SHA-256``, ``SCRAM-SHA-512``,
            ``OAUTHBEARER``.
            Default: ``PLAIN``
        sasl_plain_username (str): username for SASL ``PLAIN`` authentication.
            Default: :data:`None`
        sasl_plain_password (str): password for SASL ``PLAIN`` authentication.
            Default: :data:`None`

    Returns:
        Generator returning the generated name of the created topic


    """
    # create random topic name
    random.seed(seed)
    # [B311:blacklist] Standard pseudo-random generators are not suitable for security/cryptographic purposes.
    suffix = str(random.randint(0, 10**10))  # nosec

    topic = topic_prefix + suffix.zfill(3)

    # delete topic if it already exists
    admin = create_admin_client(**kwargs)  # type: ignore
    existing_topics = admin.list_topics().topics.keys()
    if topic in existing_topics:
        logger.warning(f"topic {topic} exists, deleting it...")
        fs = admin.delete_topics(topics=[topic])
        results = {k: f.result() for k, f in fs.items()}
        while topic in admin.list_topics().topics.keys():
            time.sleep(1)
    try:
        # create topic if needed
        create_missing_topics([topic], **kwargs)
        while topic not in admin.list_topics().topics.keys():
            time.sleep(1)
        yield topic

    finally:
        pass
        # cleanup if needed again
        fs = admin.delete_topics(topics=[topic])
        while topic in admin.list_topics().topics.keys():
            time.sleep(1)

In [14]:
# print(combine_params(create_testing_topic, create_missing_topics).__doc__)

In [15]:
run_examples_from_docstring(create_testing_topic(width=120), supress_stdout=True)

[ERROR] griffe.agents.nodes: Failed to parse annotation from 'Name' node: 'NoneType' object has no attribute 'resolve'
[ERROR] griffe.agents.nodes: Failed to parse annotation from 'Name' node: 'NoneType' object has no attribute 'resolve'
[ERROR] griffe.agents.nodes: Failed to parse annotation from 'Name' node: 'NoneType' object has no attribute 'resolve'
[ERROR] griffe.agents.nodes: Failed to parse annotation from 'Name' node: 'NoneType' object has no attribute 'resolve'


In [16]:
# | export


@asynccontextmanager
@delegates(produce_messages)  # type: ignore
@delegates(create_testing_topic, keep=True)  # type: ignore
async def create_and_fill_testing_topic(**kwargs: Dict[str, str]) -> AsyncIterator[str]:
    """Create testing topic with a random sufix in the same and fill it will messages

    Args:
        topic_names: a list of topic names
        num_partitions: Number of partitions to create
        replication_factor: Replication factor of partitions, or -1 if replica_assignment is used.
        replica_assignment: List of lists with the replication assignment for each new partition.
        new_topic_config: topic level config parameters as defined here: https://kafka.apache.org/documentation.html#topicconfigs
        bootstrap_servers (str, list(str)): a ``host[:port]`` string or list of
            ``host[:port]`` strings that the producer should contact to
            bootstrap initial cluster metadata. This does not have to be the
            full node list.  It just needs to have at least one broker that will
            respond to a Metadata API Request. Default port is 9092. If no
            servers are specified, will default to ``localhost:9092``.
        security_protocol (str): Protocol used to communicate with brokers.
            Valid values are: ``PLAINTEXT``, ``SSL``. Default: ``PLAINTEXT``.
            Default: ``PLAINTEXT``.
        sasl_mechanism (str): Authentication mechanism when security_protocol
            is configured for ``SASL_PLAINTEXT`` or ``SASL_SSL``. Valid values
            are: ``PLAIN``, ``GSSAPI``, ``SCRAM-SHA-256``, ``SCRAM-SHA-512``,
            ``OAUTHBEARER``.
            Default: ``PLAIN``
        sasl_plain_username (str): username for SASL ``PLAIN`` authentication.
            Default: :data:`None`
        sasl_plain_password (str): password for SASL ``PLAIN`` authentication.
            Default: :data:`None`
        topic: Topic name
        msgs: a list of messages to produce
        client_id (str): a name for this client. This string is passed in
            each request to servers and can be used to identify specific
            server-side log entries that correspond to this client.
            Default: ``aiokafka-producer-#`` (appended with a unique number
            per instance)
        key_serializer (Callable): used to convert user-supplied keys to bytes
            If not :data:`None`, called as ``f(key),`` should return
            :class:`bytes`.
            Default: :data:`None`.
        value_serializer (Callable): used to convert user-supplied message
            values to :class:`bytes`. If not :data:`None`, called as
            ``f(value)``, should return :class:`bytes`.
            Default: :data:`None`.
        acks (Any): one of ``0``, ``1``, ``all``. The number of acknowledgments
            the producer requires the leader to have received before considering a
            request complete. This controls the durability of records that are
            sent. The following settings are common:

            * ``0``: Producer will not wait for any acknowledgment from the server
              at all. The message will immediately be added to the socket
              buffer and considered sent. No guarantee can be made that the
              server has received the record in this case, and the retries
              configuration will not take effect (as the client won't
              generally know of any failures). The offset given back for each
              record will always be set to -1.
            * ``1``: The broker leader will write the record to its local log but
              will respond without awaiting full acknowledgement from all
              followers. In this case should the leader fail immediately
              after acknowledging the record but before the followers have
              replicated it then the record will be lost.
            * ``all``: The broker leader will wait for the full set of in-sync
              replicas to acknowledge the record. This guarantees that the
              record will not be lost as long as at least one in-sync replica
              remains alive. This is the strongest available guarantee.

            If unset, defaults to ``acks=1``. If `enable_idempotence` is
            :data:`True` defaults to ``acks=all``
        compression_type (str): The compression type for all data generated by
            the producer. Valid values are ``gzip``, ``snappy``, ``lz4``, ``zstd``
            or :data:`None`.
            Compression is of full batches of data, so the efficacy of batching
            will also impact the compression ratio (more batching means better
            compression). Default: :data:`None`.
        max_batch_size (int): Maximum size of buffered data per partition.
            After this amount :meth:`send` coroutine will block until batch is
            drained.
            Default: 16384
        linger_ms (int): The producer groups together any records that arrive
            in between request transmissions into a single batched request.
            Normally this occurs only under load when records arrive faster
            than they can be sent out. However in some circumstances the client
            may want to reduce the number of requests even under moderate load.
            This setting accomplishes this by adding a small amount of
            artificial delay; that is, if first request is processed faster,
            than `linger_ms`, producer will wait ``linger_ms - process_time``.
            Default: 0 (i.e. no delay).
        partitioner (Callable): Callable used to determine which partition
            each message is assigned to. Called (after key serialization):
            ``partitioner(key_bytes, all_partitions, available_partitions)``.
            The default partitioner implementation hashes each non-None key
            using the same murmur2 algorithm as the Java client so that
            messages with the same key are assigned to the same partition.
            When a key is :data:`None`, the message is delivered to a random partition
            (filtered to partitions with available leaders only, if possible).
        max_request_size (int): The maximum size of a request. This is also
            effectively a cap on the maximum record size. Note that the server
            has its own cap on record size which may be different from this.
            This setting will limit the number of record batches the producer
            will send in a single request to avoid sending huge requests.
            Default: 1048576.
        metadata_max_age_ms (int): The period of time in milliseconds after
            which we force a refresh of metadata even if we haven't seen any
            partition leadership changes to proactively discover any new
            brokers or partitions. Default: 300000
        request_timeout_ms (int): Produce request timeout in milliseconds.
            As it's sent as part of
            :class:`~kafka.protocol.produce.ProduceRequest` (it's a blocking
            call), maximum waiting time can be up to ``2 *
            request_timeout_ms``.
            Default: 40000.
        retry_backoff_ms (int): Milliseconds to backoff when retrying on
            errors. Default: 100.
        api_version (str): specify which kafka API version to use.
            If set to ``auto``, will attempt to infer the broker version by
            probing various APIs. Default: ``auto``
        ssl_context (ssl.SSLContext): pre-configured :class:`~ssl.SSLContext`
            for wrapping socket connections. Directly passed into asyncio's
            :meth:`~asyncio.loop.create_connection`. For more
            information see :ref:`ssl_auth`.
            Default: :data:`None`
        connections_max_idle_ms (int): Close idle connections after the number
            of milliseconds specified by this config. Specifying :data:`None` will
            disable idle checks. Default: 540000 (9 minutes).
        enable_idempotence (bool): When set to :data:`True`, the producer will
            ensure that exactly one copy of each message is written in the
            stream. If :data:`False`, producer retries due to broker failures,
            etc., may write duplicates of the retried message in the stream.
            Note that enabling idempotence acks to set to ``all``. If it is not
            explicitly set by the user it will be chosen. If incompatible
            values are set, a :exc:`ValueError` will be thrown.
            New in version 0.5.0.
        sasl_oauth_token_provider (: class:`~aiokafka.abc.AbstractTokenProvider`):
            OAuthBearer token provider instance. (See
            :mod:`kafka.oauth.abstract`).
            Default: :data:`None`
    """

    with create_testing_topic(
        **use_parameters_of(create_testing_topic, **kwargs)
    ) as topic:
        await produce_messages(
            topic=topic, **use_parameters_of(produce_messages, **kwargs)
        )

        yield topic

In [17]:
# print(combine_params(combine_params(create_and_fill_testing_topic, create_missing_topics), produce_messages).__doc__)

In [18]:
msgs_count = 120_000


class Hello(BaseModel):
    msg: str


msgs_count = 120_000
msgs = (
    [b"Hello world bytes" for _ in range(msgs_count // 3)]
    + [f"Hello world as string for the {i+1}. time!" for i in range(msgs_count // 3)]
    + [
        Hello(msg="Hello workd as Pydantic object for the {i+1}. time!")
        for i in range(msgs_count // 3)
    ]
)


async with create_and_fill_testing_topic(
    topic_prefix="my_topic_test_create_and_fill_testing_topic_",
    msgs=msgs,
    seed=1,
    **aiokafka_config,
) as topic:
    await consumes_messages(
        topic=topic,
        msgs_count=msgs_count,
        auto_offset_reset="earliest",
        #         group_id="test_group",
        **aiokafka_config,
    )

print("ok")

[INFO] fastkafka.helpers: create_missing_topics(['my_topic_test_create_and_fill_testing_topic_9167024629']): new_topics = [NewTopic(topic=my_topic_test_create_and_fill_testing_topic_9167024629,num_partitions=3)]


producing to 'my_topic_test_create_and_fill_testing_topic_9167024629':   0%|          | 0/120000 [00:00<?, ?it…

[INFO] aiokafka.consumer.subscription_state: Updating subscribed topics to: frozenset({'my_topic_test_create_and_fill_testing_topic_9167024629'})
[INFO] aiokafka.consumer.group_coordinator: Metadata for topic has changed from {} to {'my_topic_test_create_and_fill_testing_topic_9167024629': 3}. 


consuming from 'my_topic_test_create_and_fill_testing_topic_9167024629':   0%|          | 0/120000 [00:00<?, ?…

ok


In [19]:
# TODO: Send repeatedly?

In [20]:
# | export


@contextmanager
def mock_AIOKafkaProducer_send() -> Generator[unittest.mock.Mock, None, None]:
    """Mocks **send** method of **AIOKafkaProducer**"""
    with unittest.mock.patch("__main__.AIOKafkaProducer.send") as mock:

        async def _f():
            pass

        mock.return_value = asyncio.create_task(_f())

        yield mock

In [21]:
# | export


@contextlib.contextmanager
def change_dir(d: str) -> Generator[None, None, None]:
    curdir = os.getcwd()
    os.chdir(d)
    try:
        yield
    finally:
        os.chdir(curdir)

In [22]:
# TODO: tests
with TemporaryDirectory() as d:
    original_wd = os.getcwd()
    assert original_wd != d
    with change_dir(d):
        assert os.getcwd() == d
    assert os.getcwd() == original_wd

In [23]:
# | export


async def run_script_and_cancel(
    script: str,
    *,
    script_file: Optional[str] = None,
    cmd: Optional[str] = None,
    cancel_after: int = 10,
    app_name: str = "app",
    kafka_app_name: str = "kafka_app",
    generate_docs: bool = False,
) -> Tuple[int, bytes]:
    """Run script and cancel after predefined time

    Args:
        script: a python source code to be executed in a separate subprocess
        script_file: name of the script where script source will be saved
        cmd: command to execute. If None, it will be set to 'python3 -m {Path(script_file).stem}'
        cancel_after: number of seconds before sending SIGTERM signal

    Returns:
        A tuple containing exit code and combined stdout and stderr as a binary string
    """
    if script_file is None:
        script_file = "script.py"

    if cmd is None:
        cmd = f"python3 -m {Path(script_file).stem}"

    with TemporaryDirectory() as d:
        consumer_script = Path(d) / script_file

        with open(consumer_script, "w") as file:
            file.write(script)

        with change_dir(d):
            if generate_docs:
                logger.info(
                    f"Generating docs for: {Path(script_file).stem}:{kafka_app_name}"
                )
                try:
                    kafka_app: FastKafka = _import_from_string(
                        f"{Path(script_file).stem}:{kafka_app_name}"
                    )
                    await asyncer.asyncify(kafka_app.create_docs)()
                except Exception as e:
                    logger.warning(
                        f"Generating docs failed for: {Path(script_file).stem}:{kafka_app_name}, ignoring it for now."
                    )

            proc = subprocess.Popen(  # nosec: [B603:subprocess_without_shell_equals_true] subprocess call - check for execution of untrusted input.
                shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.STDOUT
            )
            await asyncio.sleep(cancel_after)
            proc.terminate()
            output, _ = proc.communicate()

        return (proc.returncode, output)

In [24]:
# Check exit code 0
script = """
from time import sleep
print("hello")
sleep({t})
"""

exit_code, output = await run_script_and_cancel(script.format(t=0), cancel_after=2)
assert exit_code == 0, exit_code
assert output.decode("utf-8") == "hello\n", output.decode("utf-8")

exit_code, output = await run_script_and_cancel(script.format(t=5), cancel_after=2)
assert exit_code < 0, exit_code

In [25]:
# Check exit code 1
script = "exit(1)"

exit_code, output = await run_script_and_cancel(script, cancel_after=1)

assert exit_code == 1
assert output.decode("utf-8") == ""

In [26]:
# Check exit code 0 and output to stdout and stderr
script = """
import sys
sys.stderr.write("hello from stderr\\n")
sys.stderr.flush()
print("hello, exiting with exit code 0")
exit(0)
"""

exit_code, output = await run_script_and_cancel(script, cancel_after=1)

assert exit_code == 0, exit_code
assert (
    output.decode("utf-8") == "hello from stderr\nhello, exiting with exit code 0\n"
), output.decode("utf-8")

In [27]:
# Check random exit code and output
script = """
print("hello\\nexiting with exit code 143")
exit(143)
"""

exit_code, output = await run_script_and_cancel(script, cancel_after=1)

assert exit_code == 143
assert output.decode("utf-8") == "hello\nexiting with exit code 143\n"

print("ok")

ok


### Local Kafka

In [50]:
#| export 

class LocalKafkaBroker():
    def __init__(self, *, port: Optional[int]=None, **kwargs: Dict[str, Any]):
        raise NotImplementedError
    
    @classmethod
    def _install(cls) -> None:
        raise NotImplementedError
        
    def start(self) -> Dict[str, Any]:
        LocalKafkaBroker._install()
        raise NotImplementedError
        
    def stop(self) -> None:
        raise NotImplementedError
        
    def __enter__(self) -> Dict[str, Any]:
        return self.start()
        
    def __exit__(self, *args, **kwargs):
        self.stop()
            

In [51]:
with LocalKafkaBroker(port=9999) as config:
    print(config)
    assert config == {
        "bootstrap_servers": f"127.0.0.1:9999",
    }

NotImplementedError: 

In [28]:
#| export 

def install_java():
    if not shutil.which("java"):
        logger.info("Installing Java...")
        logger.info(" - installing install-jdk...")
        subprocess.run("pip install install-jdk", shell=True, check=True)
        import jdk
        logger.info(" - installing jdk...")
        jdk_bin_path = jdk.install("11")
        jdk_bin_path
        os.environ["PATH"] = os.environ["PATH"] + f":{jdk_bin_path}/bin"
        logger.info("Java installed.")
    else:
        logger.info("Java is already installed.")

In [31]:
#| notest

install_java()
assert shutil.which("java")
install_java()
assert shutil.which("java")

[INFO] __main__: Installing Java...
[INFO] __main__:  - installing install-jdk...
Defaulting to user installation because normal site-packages is not writeable
[INFO] __main__:  - installing jdk...
[INFO] __main__: Java installed.
[INFO] __main__: Java is already installed.


In [32]:
# | export


def install_kafka():
    if not shutil.which("kafka-server-start.sh"):
        logger.info("Installing Kafka...")
        kafka_version = "3.3.2"
        kafka_fname = f"kafka_2.13-{kafka_version}"
        kafka_url = f"https://dlcdn.apache.org/kafka/{kafka_version}/{kafka_fname}.tgz"
        local_path = Path(os.environ["HOME"]) / ".local"
        local_path.mkdir(exist_ok=True, parents=True)
        tgz_path = local_path / f"{kafka_fname}.tgz"
        kafka_path = local_path / f"{kafka_fname}"

        response = requests.get(kafka_url, stream=True, )
        try:
            total = response.raw.length_remaining // 128
        except Exception:
            total = None

        with open(tgz_path, "wb") as f:
            for data in tqdm(response.iter_content(chunk_size=128), total=total):
                f.write(data)

        tar = tarfile.open(tgz_path)
        tar.extractall(local_path)
        tar.close()

        os.environ["PATH"] = os.environ["PATH"] + f":{kafka_path}/bin"
        logger.info(f"Kafka installed in {kafka_path}.")
    else:
        logger.info("Kafka is already installed")


In [33]:
#| notest

install_kafka()
assert shutil.which("kafka-server-start.sh")
install_kafka()
assert shutil.which("kafka-server-start.sh")

[INFO] __main__: Installing Kafka...


  0%|          | 0/832968 [00:00<?, ?it/s]

[INFO] __main__: Kafka installed in /home/davor/.local/kafka_2.13-3.3.2.
[INFO] __main__: Kafka is already installed


In [34]:
#| export 

def install_kafka_and_deps():
    install_java()
    install_kafka()

In [35]:
#| notest
install_kafka_and_deps()

[INFO] __main__: Java is already installed.
[INFO] __main__: Kafka is already installed


In [36]:
# | export


async def run_localkafka() -> None:
    loop = asyncio.get_event_loop()

    HANDLED_SIGNALS = (
        signal.SIGINT,  # Unix signal 2. Sent by Ctrl+C.
        signal.SIGTERM,  # Unix signal 15. Sent by `kill <pid>`.
    )

    d = {"should_exit": False}

    def handle_exit(sig: int, d: Dict[str, bool] = d) -> None:
        d["should_exit"] = True

    for sig in HANDLED_SIGNALS:
        loop.add_signal_handler(sig, handle_exit, sig)

    async with asyncer.create_task_group() as tg:
        zookeeper_properties_path = str(
            (
                Path(shutil.which("zookeeper-server-start.sh"))
                / ".."
                / ".."
                / "config"
                / "zookeeper.properties"
            ).resolve()
        )
        server_properties_path = str(
            (
                Path(shutil.which("kafka-server-start.sh"))
                / ".."
                / ".."
                / "config"
                / "server.properties"
            ).resolve()
        )

        #         subprocess.run(["zookeeper-server-start.sh", zookeeper_properties_path], check=True)
        logger.info("Starting zookeeper and waiting for 10 seconds...")
        tasks = [
            tg.soonify(asyncio.create_subprocess_exec)(
                "zookeeper-server-start.sh",
                zookeeper_properties_path,
                stdout=asyncio.subprocess.PIPE,
                stdin=asyncio.subprocess.PIPE,
            )
        ]
        await asyncio.sleep(10)
        
        logger.info("Starting Kafka server and waiting for another 10 seconds...")
        tasks = tasks + [
            tg.soonify(asyncio.create_subprocess_exec)(
                "kafka-server-start.sh",
                server_properties_path,
                stdout=asyncio.subprocess.PIPE,
                stdin=asyncio.subprocess.PIPE,
            )
        ]
        await asyncio.sleep(10)

    procs = [task.value for task in tasks]

    async def log_output(
        output: Optional[asyncio.StreamReader], pid: int, d: Dict[str, bool] = d
    ) -> None:
        if output is None:
            raise RuntimeError("Expected StreamReader, got None. Is stdout piped?")
        while not output.at_eof():
            outs = await output.readline()
            if outs != b"":
                typer.echo(f"[{pid:03d}]: " + outs.decode("utf-8"), nl=False)

    async with asyncer.create_task_group() as tg:
        for proc in procs:
            tg.soonify(log_output)(proc.stdout, proc.pid)

        while not d["should_exit"]:
            await asyncio.sleep(0.2)

        typer.echo("Starting process cleanup, this may take a few seconds...")
        for proc in procs:
            tg.soonify(terminate_asyncio_process)(proc)

    for proc in procs:
        output, _ = await proc.communicate()
        if output:
            typer.echo(f"[{proc.pid:03d}]: " + output.decode("utf-8"), nl=False)

    returncodes = [proc.returncode for proc in procs]
    if not returncodes == [0] * len(procs):
        typer.secho(
            f"Return codes are not all zero: {returncodes}",
            err=True,
            fg=typer.colors.RED,
        )
        raise typer.Exit(1)

In [37]:
await run_localkafka()

[INFO] __main__: Starting zookeeper and waiting for 10 seconds...
[INFO] __main__: Starting Kafka server and waiting for another 10 seconds...
[4751]: [2023-02-07 15:21:33,206] INFO Reading configuration from: /home/davor/.local/kafka_2.13-3.3.2/config/zookeeper.properties (org.apache.zookeeper.server.quorum.QuorumPeerConfig)
[4751]: [2023-02-07 15:21:33,209] INFO clientPortAddress is 0.0.0.0:2181 (org.apache.zookeeper.server.quorum.QuorumPeerConfig)
[4751]: [2023-02-07 15:21:33,209] INFO secureClientPort is not set (org.apache.zookeeper.server.quorum.QuorumPeerConfig)
[4751]: [2023-02-07 15:21:33,209] INFO observerMasterPort is not set (org.apache.zookeeper.server.quorum.QuorumPeerConfig)
[4751]: [2023-02-07 15:21:33,209] INFO metricsProvider.className is org.apache.zookeeper.metrics.impl.DefaultMetricsProvider (org.apache.zookeeper.server.quorum.QuorumPeerConfig)
[4751]: [2023-02-07 15:21:33,210] INFO autopurge.snapRetainCount set to 3 (org.apache.zookeeper.server.DatadirCleanupManag

[4751]: [2023-02-07 15:21:33,228] INFO Server environment:java.library.path=/usr/java/packages/lib:/usr/lib64:/lib64:/lib:/usr/lib (org.apache.zookeeper.server.ZooKeeperServer)
[4751]: [2023-02-07 15:21:33,228] INFO Server environment:java.io.tmpdir=/tmp (org.apache.zookeeper.server.ZooKeeperServer)
[4751]: [2023-02-07 15:21:33,228] INFO Server environment:java.compiler=<NA> (org.apache.zookeeper.server.ZooKeeperServer)
[4751]: [2023-02-07 15:21:33,228] INFO Server environment:os.name=Linux (org.apache.zookeeper.server.ZooKeeperServer)
[4751]: [2023-02-07 15:21:33,228] INFO Server environment:os.arch=amd64 (org.apache.zookeeper.server.ZooKeeperServer)
[4751]: [2023-02-07 15:21:33,228] INFO Server environment:os.version=5.15.0-58-generic (org.apache.zookeeper.server.ZooKeeperServer)
[4751]: [2023-02-07 15:21:33,228] INFO Server environment:user.name=davor (org.apache.zookeeper.server.ZooKeeperServer)
[4751]: [2023-02-07 15:21:33,228] INFO Server environment:user.home=/home/davor (org.ap

[5135]: [2023-02-07 15:21:43,521] INFO Client environment:java.home=/home/davor/.jdk/jdk-11.0.18+10 (org.apache.zookeeper.ZooKeeper)
[5135]: [2023-02-07 15:21:43,521] INFO Client environment:java.class.path=/home/davor/.local/kafka_2.13-3.3.2/bin/../libs/activation-1.1.1.jar:/home/davor/.local/kafka_2.13-3.3.2/bin/../libs/aopalliance-repackaged-2.6.1.jar:/home/davor/.local/kafka_2.13-3.3.2/bin/../libs/argparse4j-0.7.0.jar:/home/davor/.local/kafka_2.13-3.3.2/bin/../libs/audience-annotations-0.5.0.jar:/home/davor/.local/kafka_2.13-3.3.2/bin/../libs/commons-cli-1.4.jar:/home/davor/.local/kafka_2.13-3.3.2/bin/../libs/commons-lang3-3.12.0.jar:/home/davor/.local/kafka_2.13-3.3.2/bin/../libs/commons-lang3-3.8.1.jar:/home/davor/.local/kafka_2.13-3.3.2/bin/../libs/connect-api-3.3.2.jar:/home/davor/.local/kafka_2.13-3.3.2/bin/../libs/connect-basic-auth-extension-3.3.2.jar:/home/davor/.local/kafka_2.13-3.3.2/bin/../libs/connect-json-3.3.2.jar:/home/davor/.local/kafka_2.13-3.3.2/bin/../libs/connec

[5135]: [2023-02-07 15:21:43,521] INFO Client environment:java.compiler=<NA> (org.apache.zookeeper.ZooKeeper)
[5135]: [2023-02-07 15:21:43,521] INFO Client environment:os.name=Linux (org.apache.zookeeper.ZooKeeper)
[5135]: [2023-02-07 15:21:43,521] INFO Client environment:os.arch=amd64 (org.apache.zookeeper.ZooKeeper)
[5135]: [2023-02-07 15:21:43,521] INFO Client environment:os.version=5.15.0-58-generic (org.apache.zookeeper.ZooKeeper)
[5135]: [2023-02-07 15:21:43,521] INFO Client environment:user.name=davor (org.apache.zookeeper.ZooKeeper)
[5135]: [2023-02-07 15:21:43,521] INFO Client environment:user.home=/home/davor (org.apache.zookeeper.ZooKeeper)
[5135]: [2023-02-07 15:21:43,521] INFO Client environment:user.dir=/work/fastkafka/nbs (org.apache.zookeeper.ZooKeeper)
[5135]: [2023-02-07 15:21:43,521] INFO Client environment:os.memory.free=1009MB (org.apache.zookeeper.ZooKeeper)
[5135]: [2023-02-07 15:21:43,521] INFO Client environment:os.memory.max=1024MB (org.apache.zookeeper.ZooKee

[5135]: 	offsets.commit.timeout.ms = 5000
[5135]: 	offsets.load.buffer.size = 5242880
[5135]: 	offsets.retention.check.interval.ms = 600000
[5135]: 	offsets.retention.minutes = 10080
[5135]: 	offsets.topic.compression.codec = 0
[5135]: 	offsets.topic.num.partitions = 50
[5135]: 	offsets.topic.replication.factor = 1
[5135]: 	offsets.topic.segment.bytes = 104857600
[5135]: 	password.encoder.cipher.algorithm = AES/CBC/PKCS5Padding
[5135]: 	password.encoder.iterations = 4096
[5135]: 	password.encoder.key.length = 128
[5135]: 	password.encoder.keyfactory.algorithm = null
[5135]: 	password.encoder.old.secret = null
[5135]: 	password.encoder.secret = null
[5135]: 	principal.builder.class = class org.apache.kafka.common.security.authenticator.DefaultKafkaPrincipalBuilder
[5135]: 	process.roles = []
[5135]: 	producer.purgatory.purge.interval.requests = 1000
[5135]: 	queued.max.request.bytes = -1
[5135]: 	queued.max.requests = 500
[5135]: 	quota.window.num = 11
[5135]: 	quota.window.size.seconds

[5135]: [2023-02-07 15:21:43,928] INFO Feature ZK node at path: /feature does not exist (kafka.server.FinalizedFeatureChangeListener)
[5135]: [2023-02-07 15:21:43,958] INFO [BrokerToControllerChannelManager broker=0 name=forwarding]: Starting (kafka.server.BrokerToControllerRequestThread)
[5135]: [2023-02-07 15:21:44,176] INFO Updated connection-accept-rate max connection creation rate to 2147483647 (kafka.network.ConnectionQuotas)
[5135]: [2023-02-07 15:21:44,179] INFO Awaiting socket connections on 0.0.0.0:9092. (kafka.network.DataPlaneAcceptor)
[5135]: [2023-02-07 15:21:44,199] INFO [SocketServer listenerType=ZK_BROKER, nodeId=0] Created data-plane acceptor and processors for endpoint : ListenerName(PLAINTEXT) (kafka.network.SocketServer)
[5135]: [2023-02-07 15:21:44,204] INFO [BrokerToControllerChannelManager broker=0 name=alterPartition]: Starting (kafka.server.BrokerToControllerRequestThread)
[5135]: [2023-02-07 15:21:44,223] INFO [ExpirationReaper-0-Produce]: Starting (kafka.ser

[5135]: [2023-02-07 15:22:03,861] INFO [TransactionCoordinator id=0] Shutdown complete. (kafka.coordinator.transaction.TransactionCoordinator)
[5135]: [2023-02-07 15:22:03,862] INFO [GroupCoordinator 0]: Shutting down. (kafka.coordinator.group.GroupCoordinator)
[5135]: [2023-02-07 15:22:03,863] INFO [ExpirationReaper-0-Heartbeat]: Shutting down (kafka.server.DelayedOperationPurgatory$ExpiredOperationReaper)
[5135]: [2023-02-07 15:22:03,863] INFO [ExpirationReaper-0-Heartbeat]: Stopped (kafka.server.DelayedOperationPurgatory$ExpiredOperationReaper)
[5135]: [2023-02-07 15:22:03,863] INFO [ExpirationReaper-0-Heartbeat]: Shutdown completed (kafka.server.DelayedOperationPurgatory$ExpiredOperationReaper)
[5135]: [2023-02-07 15:22:03,864] INFO [ExpirationReaper-0-Rebalance]: Shutting down (kafka.server.DelayedOperationPurgatory$ExpiredOperationReaper)
[5135]: [2023-02-07 15:22:03,865] INFO [ExpirationReaper-0-Rebalance]: Stopped (kafka.server.DelayedOperationPurgatory$ExpiredOperationReaper)


NameError: name 'terminate_asyncio_process' is not defined

In [None]:
zookeeper_properties_path = str((Path(shutil.which("zookeeper-server-start.sh")) / ".." / ".." / "config" / "zookeeper.properties").resolve())
server_properties_path = str((Path(shutil.which("kafka-server-start.sh")) / ".." / ".." / "config" / "server.properties").resolve())

subprocess.run(["zookeeper-server-start.sh", zookeeper_properties_path], check=True)

In [None]:
# !./{kafka_fname}/bin/zookeeper-server-start.sh ./{kafka_fname}/config/zookeeper.properties
# !./{kafka_fname}/bin/zookeeper-server-start.sh -daemon ./{kafka_fname}/config/zookeeper.properties
# !sleep 3
# !./{kafka_fname}/bin/kafka-server-start.sh -daemon ./{kafka_fname}/config/server.properties
!./{kafka_fname}/bin/kafka-server-start.sh ./{kafka_fname}/config/server.properties
# !echo "Waiting for 10 secs until kafka and zookeeper services are up and running"
# !sleep 10

In [None]:
!ps -ef | grep kafka

In [None]:
import shutil

if shutil.which("java"):
    print("Java is installed on the system")
else:
    print("I guess not")

In [None]:
!pip install install-jdk

In [None]:
import jdk

jdk.install('11')

In [None]:
from os import environ
environ["PATH"] = environ["PATH"] + ":/home/davor/.jdk/jdk-11.0.18+10/bin"

In [None]:
!env | grep PATH