In [2]:
from kafka import KafkaAdminClient, KafkaProducer, KafkaConsumer

# Admin

In [3]:
!lsof -i | grep "9092"

java     389 root  178u  IPv4 114241      0t0  TCP *:9092 (LISTEN)


In [4]:
broker = "localhost:9092"
admin = KafkaAdminClient(bootstrap_servers=[broker])

In [5]:
admin.list_topics()

['even_nums', 'odd_nums', '__consumer_offsets']

In [6]:
from kafka.admin import NewTopic
from kafka.errors import TopicAlreadyExistsError

In [13]:
admin.delete_topics(["even_nums", "odd_nums"])

DeleteTopicsResponse_v3(throttle_time_ms=0, topic_error_codes=[(topic='odd_nums', error_code=0), (topic='even_nums', error_code=0)])

In [14]:
admin.create_topics([
    NewTopic(name="even_nums", num_partitions=1, replication_factor=1)
])

CreateTopicsResponse_v3(throttle_time_ms=0, topic_errors=[(topic='even_nums', error_code=0, error_message=None)])

In [15]:
try:
    admin.create_topics([NewTopic("even_nums", num_partitions=1, replication_factor=1)])
except TopicAlreadyExistsError:
    print("already exists")

already exists


In [16]:
try:
    admin.create_topics([NewTopic("odd_nums", num_partitions=2, replication_factor=1)])
except TopicAlreadyExistsError:
    print("already exists")

In [17]:
admin.list_topics()

['even_nums', 'odd_nums', '__consumer_offsets']

# Producer

In [18]:
producer = KafkaProducer(bootstrap_servers=[broker])

In [19]:
result = producer.send("even_nums", bytes(str(1), "utf-8"))

In [20]:
result.get()

RecordMetadata(topic='even_nums', partition=0, topic_partition=TopicPartition(topic='even_nums', partition=0), offset=0, timestamp=1705183689532, log_start_offset=0, checksum=None, serialized_key_size=-1, serialized_value_size=1, serialized_header_size=-1)

In [21]:
from threading import Thread, Lock

lock = Lock()
def Print(*args):
    with lock:
        print(*args)

Print("hi")

hi


In [22]:
import time, threading

def num_producer(topic, start, step):
    producer = KafkaProducer(bootstrap_servers=[broker])
    num = start
    while True:
        if num < 10:
            Print("send", num, "to", topic)
        producer.send(topic, bytes(str(num), "utf-8"))
        num += step
        time.sleep(5)

threading.Thread(target=num_producer, args=("even_nums", 0, 2)).start()
threading.Thread(target=num_producer, args=("odd_nums", 1, 2)).start()

send 1 to odd_nums
send 0 to even_nums


# Consumer

In [23]:
consumer = KafkaConsumer(bootstrap_servers=[broker])

In [24]:
batch = consumer.poll(1000)
batch

{}

In [25]:
consumer.assignment() # no topics assigned yet, so empty

send 3 to odd_nums
send 2 to even_nums


set()

send 5 to odd_nums
send 4 to even_nums


## Manual Assignment

In [26]:
from kafka import TopicPartition

In [27]:
consumer.assign([TopicPartition("even_nums", 0)])
consumer.assignment()

{TopicPartition(topic='even_nums', partition=0)}

In [28]:
# batch = consumer.poll(1000)
# batch

In [29]:
consumer.seek_to_beginning()

In [30]:
batch = consumer.poll(1000)
for topic_partition, messages in batch.items():
    for msg in messages:
        print(int(str(msg.value, "utf-8")))

1
0
2
4
send 7 to odd_nums
send 6 to even_nums
send 9 to odd_nums
send 8 to even_nums


## Automatic Assignment

In [31]:
consumer = KafkaConsumer(bootstrap_servers=[broker])
consumer.subscribe(["even_nums"])

In [32]:
print(consumer.assignment())

set()


In [33]:
_ = consumer.poll(1000)
print(consumer.assignment())

{TopicPartition(topic='even_nums', partition=0)}


In [34]:
consumer.seek_to_beginning()

In [35]:
batch = consumer.poll(1000)
for topic_partition, messages in batch.items():
    for msg in messages:
        print(int(str(msg.value, "utf-8")))

1
0
2
4
6
8
10
12
14
16
18
20


# Multiple Assignment

In [36]:
consumer = KafkaConsumer(bootstrap_servers=[broker])
consumer.subscribe(["even_nums", "odd_nums"])

In [37]:
print(consumer.assignment())

set()


In [38]:
_ = consumer.poll(1000)
print(consumer.assignment())

{TopicPartition(topic='even_nums', partition=0), TopicPartition(topic='odd_nums', partition=0), TopicPartition(topic='odd_nums', partition=1)}


In [39]:
consumer.seek_to_beginning()

In [43]:
batch = consumer.poll(1000)
for topic_partition, messages in batch.items():
    print(topic_partition)
    for msg in messages:
        print(int(str(msg.value, "utf-8")))

TopicPartition(topic='even_nums', partition=0)
56
TopicPartition(topic='odd_nums', partition=1)
57


In [39]:
positions = {}
for tp in consumer.assignment():
    pos = consumer.position(tp)
    positions[tp] = pos
positions

{TopicPartition(topic='even_nums', partition=0): 9,
 TopicPartition(topic='odd_nums', partition=0): 6,
 TopicPartition(topic='odd_nums', partition=1): 2}

In [40]:
consumer2 = KafkaConsumer(bootstrap_servers=[broker])

In [41]:
consumer2.assign(positions.keys())
for tp, position in positions.items():
    consumer2.seek(tp, position)

In [42]:
batch = consumer2.poll(1000)
for topic_partition, messages in batch.items():
    print(topic_partition)
    for msg in messages:
        print(int(str(msg.value, "utf-8")))

TopicPartition(topic='even_nums', partition=0)
16
18
20
22
24
26
28
30
32
34
TopicPartition(topic='odd_nums', partition=1)
25
27
31
35
TopicPartition(topic='odd_nums', partition=0)
17
19
21
23
29
33


# Take over for another consumers
What if I stop this consumer and want another consumer to pick up where the work was left off? 
moving the job to a different machine or sth like that

record where this consumer is

In [None]:
topic_partition = TopicPartition("even_nums", 0)
consumer.position(topic_partition)

In [49]:
position = {}
for tp in consumer.assignment():
    pos = consumer.position(tp)
    print(tp.topic, tp.partition, '\t', pos)
    position[tp] = pos
position

even_nums 0 	 30
odd_nums 0 	 12
odd_nums 1 	 17


{TopicPartition(topic='even_nums', partition=0): 30,
 TopicPartition(topic='odd_nums', partition=0): 12,
 TopicPartition(topic='odd_nums', partition=1): 17}

In [53]:
consumer2 = KafkaConsumer(bootstrap_servers=[broker])

In [54]:
consumer2.assign(position.keys())

In [55]:
for tp, off in position.items():
    consumer2.seek(tp, off)

In [60]:
batch = consumer2.poll(1000)
for topic_partition, messages in batch.items():
    print(topic_partition)
    for msg in messages:
        print(int(str(msg.value, "utf-8")))

TopicPartition(topic='odd_nums', partition=1)
343
345
347
359
363
367
371
377
381
389
TopicPartition(topic='odd_nums', partition=0)
341
349
351
353
355
357
361
365
369
373
375
379
383
385
387
TopicPartition(topic='even_nums', partition=0)
340
342
344
346
348
350
352
354
356
358
360
362
364
366
368
370
372
374
376
378
380
382
384
386
388


# Consumer Groups (2 of them reading even numbers)

In [61]:
def consume_odds(group, thread):
    consumer = KafkaConsumer(bootstrap_servers=[broker], group_id=group)
    consumer.subscribe(["odd_nums"])
    for i in range(5):
        Print(thread, '\ti', i)
        batch = consumer.poll(1000)
        for tp, messages in batch.items():
            for msg in messages:
                Print('\t', group, thread, msg.value)

threading.Thread(target=consume_odds, args=("g1","t1")).start()
threading.Thread(target=consume_odds, args=("g2","t2")).start()
threading.Thread(target=consume_odds, args=("g2","t3")).start()

t3 	i 0
t2 	i 0
t1 	i 0
t2 	i 1
t1 	i 1
g1 t1 b'349'
g1 t1 b'351'
g1 t1 b'353'
g1 t1 b'355'
g1 t1 b'357'
g1 t1 b'361'
g1 t1 b'365'
g1 t1 b'369'
g1 t1 b'373'
g1 t1 b'375'
g1 t1 b'379'
g1 t1 b'383'
g1 t1 b'385'
g1 t1 b'387'
g1 t1 b'391'
g1 t1 b'393'
g1 t1 b'395'
g1 t1 b'399'
g1 t1 b'401'
g1 t1 b'405'
g1 t1 b'359'
g1 t1 b'363'
g1 t1 b'367'
g1 t1 b'371'
g1 t1 b'377'
g1 t1 b'381'
g1 t1 b'389'
g1 t1 b'397'
g1 t1 b'403'
t1 	i 2
g2 t2 b'359'
g2 t2 b'363'
g2 t2 b'367'
g2 t2 b'371'
g2 t2 b'377'
g2 t2 b'381'
g2 t2 b'389'
g2 t2 b'397'
g2 t2 b'403'
t2 	i 2
t3 	i 1
g2 t3 b'351'
g2 t3 b'353'
g2 t3 b'355'
g2 t3 b'357'
g2 t3 b'361'
g2 t3 b'365'
g2 t3 b'369'
g2 t3 b'373'
g2 t3 b'375'
g2 t3 b'379'
g2 t3 b'383'
g2 t3 b'385'
g2 t3 b'387'
g2 t3 b'391'
g2 t3 b'393'
g2 t3 b'395'
g2 t3 b'399'
g2 t3 b'401'
g2 t3 b'405'
t3 	i 2
g2 t2 b'407'
t2 	i 3
g1 t1 b'407'
t1 	i 3
t3 	i 3
t2 	i 4
t1 	i 4
t3 	i 4
t2 	i 5
t1 	i 5
t3 	i 5
t2 	i 6
t1 	i 6
t3 	i 6
t2 	i 7
t1 	i 7
t3 	i 7
g1 t1 b'409'
t1 	i 8
g2 t3 b'409'
t3 	i 8