In [1]:
# thread-safe print
from threading import Thread, Lock

lock = Lock()
def Print(*args):
    with lock:
        print(*args)
        
Print("hi")

hi


In [2]:
from kafka import KafkaAdminClient, KafkaProducer, KafkaConsumer

# KafkaAdminClient

In [3]:
broker = "kafka:9092"
admin = KafkaAdminClient(bootstrap_servers=[broker])
admin

<kafka.admin.client.KafkaAdminClient at 0x7f01934b5f60>

In [4]:
admin.list_topics()

[]

In [18]:
from kafka.admin import NewTopic
from kafka.errors import TopicAlreadyExistsError

In [19]:
# admin.delete_topics(["even_nums", "odd_nums"])

In [20]:
try:
    admin.create_topics([NewTopic("even_nums", num_partitions=1, replication_factor=1)])
except TopicAlreadyExistsError:
    print("already exists")

already exists


In [21]:
try:
    admin.create_topics([NewTopic("odd_nums", num_partitions=2, replication_factor=1)])
except TopicAlreadyExistsError:
    print("already exists")

already exists


In [22]:
admin.list_topics()

['even_nums', 'odd_nums']

# KafkaProducer

In [23]:
producer = KafkaProducer(bootstrap_servers=[broker])
producer

<kafka.producer.kafka.KafkaProducer at 0x7f01ac20b910>

In [25]:
result = producer.send("even_nums", value=bytes(str(0), "utf-8"))

In [27]:
result.get()

RecordMetadata(topic='even_nums', partition=0, topic_partition=TopicPartition(topic='even_nums', partition=0), offset=0, timestamp=1681312260057, log_start_offset=0, checksum=None, serialized_key_size=-1, serialized_value_size=1, serialized_header_size=-1)

In [28]:
import time, threading

In [30]:
def num_producer(topic, start, step):
    producer = KafkaProducer(bootstrap_servers=[broker])
    num = start
    while True:
        if num < 10:
            Print("send", num, "to", topic)
        producer.send(topic, value=bytes(str(num), "utf-8"))
        num += step
        time.sleep(1)
        
threading.Thread(target=num_producer, args=("even_nums", 0, 2)).start()
threading.Thread(target=num_producer, args=("odd_nums", 1, 2)).start()

send 0 to even_nums
send 1 to odd_nums
send 2 to even_nums
send 3 to odd_nums
send 4 to even_nums
send 5 to odd_nums
send 6 to even_nums
send 7 to odd_nums
send 8 to even_nums
send 9 to odd_nums


# KafkaConsumer

In [31]:
consumer = KafkaConsumer(bootstrap_servers=[broker])
consumer

<kafka.consumer.group.KafkaConsumer at 0x7f0192285b70>

In [32]:
batch = consumer.poll(1000)
batch

{}

In [33]:
consumer.assignment()

set()

In [34]:
from kafka import TopicPartition

In [51]:
# manual assignment (use assign method)
consumer = KafkaConsumer(bootstrap_servers=[broker])
consumer.assign([TopicPartition("even_nums", 0)])
consumer.seek_to_beginning()
consumer.assignment()

{TopicPartition(topic='even_nums', partition=0)}

In [56]:
# TODO: put all the following in an infinite loop
batch = consumer.poll(1000)
for topicpartition, messages in batch.items():
    for msg in messages:
        print(int(str(msg.value, "utf-8")))

898
900


In [71]:
# automatic assignment (use subscribe method)
consumer = KafkaConsumer(bootstrap_servers=[broker])
#consumer.subscribe(["odd_nums", "even_nums"])
consumer.subscribe(pattern=r".*_nums$")
print(consumer.assignment())
_ = consumer.poll(1000)
print(consumer.assignment())
consumer.seek_to_beginning()

set()
{TopicPartition(topic='odd_nums', partition=0), TopicPartition(topic='odd_nums', partition=1), TopicPartition(topic='even_nums', partition=0)}


In [92]:
batch = consumer.poll(1000)
for tp, messages in batch.items():
    for msg in messages:
        print(int(str(msg.value, "utf-8")), tp.topic, tp.partition)

2465 odd_nums 0
2467 odd_nums 0
2469 odd_nums 0
2471 odd_nums 1
2473 odd_nums 1
2464 even_nums 0
2466 even_nums 0
2468 even_nums 0
2470 even_nums 0
2472 even_nums 0


In [93]:
# lookup position
positions = {}  # key=TopicPartition, value=offset
for tp in consumer.assignment():
    print(consumer.position(tp))
    positions[tp] = consumer.position(tp)
positions # TODO: save in file or DB

634
603
1238


{TopicPartition(topic='odd_nums', partition=0): 634,
 TopicPartition(topic='odd_nums', partition=1): 603,
 TopicPartition(topic='even_nums', partition=0): 1238}

In [94]:
# have a new consumer take over!
consumer2 = KafkaConsumer(bootstrap_servers=[broker])
consumer2.assign(positions.keys())
for tp, offset in positions.items():
    consumer2.seek(tp, offset)

In [95]:
batch = consumer2.poll(1000)
for tp, messages in batch.items():
    for msg in messages:
        print(int(str(msg.value, "utf-8")), tp.topic, tp.partition)

2475 odd_nums 0
2481 odd_nums 0
2477 odd_nums 1
2479 odd_nums 1
2474 even_nums 0
2476 even_nums 0
2478 even_nums 0
2480 even_nums 0


In [96]:
# consumer groups
def consume_evens(group):
    consumer = KafkaConsumer(bootstrap_servers=[broker], group_id=group)
    consumer.subscribe("even_nums")
    #while True:
    for i in range(10): # TODO: use infinite loop
        batch = consumer.poll(1000)
        for tp, messages in batch.items():
            for msg in messages:
                Print(int(str(msg.value, "utf-8")), group)
                
threading.Thread(target=consume_evens, args=("group1",)).start()
threading.Thread(target=consume_evens, args=("group2",)).start()

3014 group1
3014 group2
3016 group1
3016 group2
3018 group2
3018 group1
3020 group1
3020 group2
3022 group1
3022 group2
3024 group2
3024 group1
3026 group1
3028 group1


# Beach Animals

In [97]:
! python3 -m grpc_tools.protoc -I=. --python_out=. animals.proto

In [98]:
from animals_pb2 import *

In [101]:
s = Sighting(beach="A", animal="Shark")
s.SerializeToString()

b'\n\x01A\x12\x05Shark'

In [102]:
import string, random
string.ascii_uppercase

'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

In [None]:
def animal_gen():
    Print("generating animals")
    producer = KafkaProducer(bootstrap_servers=[broker])
    while True:
        beach = ????
        animal = ????
        s = Sighting(beach=beach, animal=animal)
        value = s.SerializeToString()
        key = beach
        producer.send("anmials", value=value, key=key)