## Start the Zookeeper and Kafka Server

In [None]:
# # Run in 2 seperate terminals and keep terminals open.
# zookeeper-server-start.sh ~/kafka_2.12-3.2.0/config/zookeeper.properties
# kafka-server-start.sh ~/kafka_2.12-3.2.0/config/server.properties

## Producer

In [3]:
from kafka import KafkaProducer
import json

In [4]:
bootstrap_servers="localhost:9092"
topic_name="kafka-localhost-python"

In [5]:
producer = KafkaProducer(
 bootstrap_servers=bootstrap_servers,
 value_serializer=lambda v: json.dumps(v).encode('ascii'),
 key_serializer=lambda v: json.dumps(v).encode('ascii')
)

Once the Producer is created, we can produce a couple events

In [None]:
producer.send(
 topic_name,
 key={"id":1},
 value={"name":"👨 Francesco", "pizza":"Margherita 🍕"}
)

producer.flush()

In [5]:
producer.send(
 topic_name,
 key={"id":2},
 value={"name":"👩 Adele", "pizza":"Hawaii 🍕+🍍+🥓"}
)

producer.send(
 topic_name,
 key={"id":3},
 value={"name":"👦 Mark", "pizza":"Choccolate 🍕+🍫"}
)

producer.flush()

In [6]:
producer.send(
 topic_name,
 key={"id":4},
 value={"name":"👨 Dan", "pizza":"Fries 🍕+🍟"}
)
producer.flush()

## Consumer

In [7]:
from kafka import KafkaConsumer
import json

In [8]:
bootstrap_servers="localhost:9092"
topic_name="kafka-localhost-python"
group_id = "my_pizza_group"

In [9]:
consumer = KafkaConsumer(
 bootstrap_servers = bootstrap_servers,
 group_id = group_id,
 auto_offset_reset='smallest',
 value_deserializer = lambda v: json.loads(v.decode('ascii')),
 key_deserializer = lambda v: json.loads(v.decode('ascii')),
 max_poll_records = 10
)

In [10]:
consumer.topics()

{'first-topic', 'kafka-localhost-python', 'second-topic'}

In [11]:
consumer.subscribe(topics=[topic_name])

In [14]:
consumer.subscription()

{'kafka-localhost-python'}

Now we start reading

In [None]:
for message in consumer:
    print ("%d:%d: k=%s v=%s" % (message.partition,
                                 message.offset,
                                 message.key,
                                 message.value))

## Topic Partitions

In [15]:
from kafka import KafkaProducer
from kafka import KafkaConsumer
from kafka import TopicPartition
from kafka.admin import KafkaAdminClient, NewTopic
import json

In [16]:
bootstrap_servers="localhost:9092"
topic_name="kafka-localhost-python"
group_id = "my_pizza_group"
topic_name_partitioned = topic_name +"-partitioned"
timeout_ms="5000"

In [17]:
producer = KafkaProducer(
 bootstrap_servers=bootstrap_servers,
 value_serializer=lambda v: json.dumps(v).encode('ascii'),
 key_serializer=lambda v: json.dumps(v).encode('ascii')
)

Create a topic with two partitions, check num_partitions=2

In [18]:
admin = KafkaAdminClient(
        client_id ='admin',
        bootstrap_servers=bootstrap_servers,
    )

topic=NewTopic(name=topic_name_partitioned, num_partitions=2, replication_factor=1)

admin.create_topics([topic], timeout_ms=int(timeout_ms))

CreateTopicsResponse_v3(throttle_time_ms=0, topic_errors=[(topic='kafka-localhost-python-partitioned', error_code=0, error_message=None)])

And now push data to the two partitions

In [19]:
producer.send(topic_name_partitioned,
              key={"id":1},
              value={"name":"👨 Frank", "pizza":"Margherita 🍕"},
              partition=0
             )
producer.send(topic_name_partitioned,
              key={"id":2},
              value={"name":"👩 Adele", "pizza":"Hawaii 🍕+🍍+🥓"},
              partition=1
             )
producer.flush()

In [20]:
producer.send(topic_name_partitioned,
              key={"id":1},
              value={"name":"🙎 Mark", "pizza":"Banana 🍕+🍌"},
              partition=0
             )
producer.send(topic_name_partitioned,
              key={"id":2},
              value={"name":"👨 Jan", "pizza":"Mushrooms 🍕+🍄"},
              partition=1
             )
producer.flush()

Read from partition 0

In [21]:
consumer_partition_0 = KafkaConsumer(
        group_id=group_id+"-partitioned",
        bootstrap_servers=bootstrap_servers,
        value_deserializer = lambda v: json.loads(v.decode('ascii')),
        key_deserializer = lambda v: json.loads(v.decode('ascii')),
        auto_offset_reset='earliest',
        max_poll_records = 10
    )

In [22]:
tp = TopicPartition(topic_name_partitioned, 0)

#register to the topic
consumer_partition_0.assign([tp])

consumer_partition_0.seek_to_beginning(tp) 

# obtain the last offset value
lastOffset = consumer_partition_0.end_offsets([tp])[tp]

for message in consumer_partition_0:
    print ("p=%d o=%d value=%s" % (message.partition,
                                   message.offset,
                                   message.value))
    if message.offset == lastOffset - 1:
        break

p=0 o=0 value={'name': '👨 Frank', 'pizza': 'Margherita 🍕'}
p=0 o=1 value={'name': '🙎 Mark', 'pizza': 'Banana 🍕+🍌'}


Read from partition 1

In [25]:
consumer_partition_1 = KafkaConsumer(
        group_id=group_id+"-partitioned",
        bootstrap_servers=bootstrap_servers,
        value_deserializer = lambda v: json.loads(v.decode('ascii')),
        key_deserializer = lambda v: json.loads(v.decode('ascii')),
        auto_offset_reset='earliest',
        max_poll_records = 10
    )

In [26]:
tp = TopicPartition(topic_name_partitioned, 1)

#register to the topic
consumer_partition_1.assign([tp])

consumer_partition_1.seek_to_beginning(tp) 

# obtain the last offset value
lastOffset = consumer_partition_1.end_offsets([tp])[tp]

for message in consumer_partition_1:
    print(lastOffset)
    print ("p=%d o=%d value=%s" % (message.partition,
                                   message.offset,
                                   message.value))
    if message.offset == lastOffset - 1:
        break

2
p=1 o=0 value={'name': '👩 Adele', 'pizza': 'Hawaii 🍕+🍍+🥓'}
2
p=1 o=1 value={'name': '👨 Jan', 'pizza': 'Mushrooms 🍕+🍄'}


## New Consumer Group

In [27]:
from kafka import KafkaConsumer
import json

In [28]:
## NEW group_id #########
group_id='my_NEW_pizza_group'
bootstrap_servers="localhost:9092"
topic_name="kafka-localhost-python"

In [29]:
consumer_new_group = KafkaConsumer(
        bootstrap_servers=bootstrap_servers,
        value_deserializer = lambda v: json.loads(v.decode('ascii')),
        key_deserializer = lambda v: json.loads(v.decode('ascii')),
        auto_offset_reset='earliest',
        max_poll_records = 10,
        consumer_timeout_ms=1000
    )

In [30]:
consumer_new_group.subscribe(topics=[topic_name])
consumer_new_group.subscription()

{'kafka-localhost-python'}

In [31]:
for message in consumer_new_group:
    print ("%d:%d: key=%s value=%s" % (message.partition,
                                       message.offset,
                                       message.key,
                                       message.value))

0:0: key={'id': 1} value={'name': '👨 Francesco', 'pizza': 'Margherita 🍕'}
0:1: key={'id': 2} value={'name': '👩 Adele', 'pizza': 'Hawaii 🍕+🍍+🥓'}
0:2: key={'id': 3} value={'name': '👦 Mark', 'pizza': 'Choccolate 🍕+🍫'}
0:3: key={'id': 4} value={'name': '👨 Dan', 'pizza': 'Fries 🍕+🍟'}
