### Producing Data

In [1]:
from kafka import KafkaProducer

# Kafka broker address
bootstrap_servers = 'kafka:9092'

# Create a Kafka producer instance
producer = KafkaProducer(
    bootstrap_servers=bootstrap_servers,
    key_serializer=str.encode,  # Key serializer
    value_serializer=str.encode,  # Value serializer
)

# Topic to send messages to
topic = 'example_topic'

# Send messages with keys
for i in range(0,4):
    producer.send(topic, key="{}".format(i), value="message number {}".format(i))
    print(f"Sent message: Key={i}, Value=message number {i}")

# Close the producer
producer.close()

Sent message: Key=0, Value=message number 0
Sent message: Key=1, Value=message number 1
Sent message: Key=2, Value=message number 2
Sent message: Key=3, Value=message number 3


### Message Types

Kafka supports different payloads, let's have a look. Copy these examples into cells above your Consumer cell if you want to run the Kafka Consumer to see the output.

String is what we've been using recently and with all messages you can send with or without a key.

JSON is another type and very often used, here is an example:


In [4]:
import json
from kafka import KafkaProducer

# Kafka broker address
bootstrap_servers = 'kafka:9092'

producer = KafkaProducer(bootstrap_servers=bootstrap_servers,
value_serializer=lambda v: json.dumps(v).encode('utf-8'))


# Topic to send messages to
topic = 'example_topic'

# Create a Python dictionary representing your message
message = {
'id': 1,
'name': 'John Doe',
'age': 30
}

# Send the message in JSON format
producer.send(topic, value=message)

# Close the producer
producer.close()

cell 1 - Topic

In [5]:
from kafka.admin import KafkaAdminClient, NewTopic


admin_client = KafkaAdminClient(
    bootstrap_servers="kafka:9092", 
    client_id='test'
)

topic_list = []
topic_list.append(NewTopic(name="avro_topic_test", num_partitions=1, replication_factor=1))
admin_client.create_topics(new_topics=topic_list, validate_only=False)

TopicAlreadyExistsError: [Error 36] TopicAlreadyExistsError: Request 'CreateTopicsRequest_v3(create_topic_requests=[(topic='avro_topic_test', num_partitions=1, replication_factor=1, replica_assignment=[], configs=[])], timeout=30000, validate_only=False)' failed with response 'CreateTopicsResponse_v3(throttle_time_ms=0, topic_errors=[(topic='avro_topic_test', error_code=36, error_message="Topic 'avro_topic_test' already exists.")])'.

cell 2 - producer

In [7]:
from kafka import KafkaProducer
from avro import schema, io
import avro.schema
import avro.io 
from io import BytesIO

# Configure Kafka producer
bootstrap_servers = 'kafka:9092'  # Update with your Kafka broker address
topic = 'avro_topic_test'

# Load Avro schema from a file or define it programmatically
avro_schema = avro.schema.parse(open('./work/user.avsc', 'rb').read())


# Create a Python dictionary representing your message
message = {
    'first_name': 'John',
    'last_name': 'Doe',
    'age': 30
}

producer = KafkaProducer(
    bootstrap_servers=bootstrap_servers,
    # key_serializer=str.encode,  # Key serializer
    # value_serializer=str.encode,  # Value serializer
)

avro_bytes_writer = BytesIO()
avro_writer = io.DatumWriter(avro_schema)
encoder = io.BinaryEncoder(avro_bytes_writer)
avro_writer.write(message, encoder)
avro_bytes = avro_bytes_writer.getvalue()

# Send the Avro message to Kafka topic
producer.send(topic=topic, value=avro_bytes)

<kafka.producer.future.FutureRecordMetadata at 0xffff6abda2d0>

cell 3 - consumer

In [8]:
from kafka import KafkaConsumer
from avro.io import DatumReader, BinaryDecoder
from io import BytesIO
import avro.schema
import io

# Create a Kafka consumer instance
consumer = KafkaConsumer(
    'avro_topic_test',
    bootstrap_servers='kafka:9092',
    group_id='tester',
    auto_offset_reset='earliest'
)

avro_schema = avro.schema.parse(open('./work/user.avsc', 'rb').read())

for message in consumer:
    # Process the Avro message
    avro_reader = DatumReader(avro_schema)
    avro_bytes_reader = BytesIO(message.value)
    decoder = BinaryDecoder(avro_bytes_reader)
    decoded_message = avro_reader.read(decoder)
    print("working")
    # print(message)
    print(decoded_message)

working
{'first_name': 'John', 'last_name': 'Doe', 'age': 30}


KeyboardInterrupt: 

In [9]:
from kafka import KafkaProducer
import my_protobuf_pb2

# Configure Kafka producer
bootstrap_servers = 'localhost:9092'  # Update with your Kafka broker address
topic = 'my_topic'

# Create Kafka producer instance
producer = KafkaProducer(bootstrap_servers=bootstrap_servers,
                        value_serializer=lambda v: v.SerializeToString())

# Create a Protobuf message using your defined schema
message = my_protobuf_pb2.MyMessage()
message.id = 1
message.name = 'John Doe'
message.age = 30

# Send the message in Protobuf format
producer.send(topic, value=message)


ModuleNotFoundError: No module named 'my_protobuf_pb2'

### Consuming Data

In [3]:
from kafka import KafkaConsumer

# Create a Kafka consumer instance
consumer = KafkaConsumer(
    'example_topic',
    bootstrap_servers='kafka:9092',
    group_id='my_consumer_group',
    auto_offset_reset='earliest'
)

# Continuously poll for new messages
for message in consumer:
    # Decode the message value assuming it's in bytes
    message_value = message.value.decode('utf-8')
    print(message_value)

# Close the consumer connection
consumer.close()

{"id": 1, "name": "John Doe", "age": 30}
message number 0
message number 1
message number 2
message number 3
{"id": 1, "name": "John Doe", "age": 30}


KeyboardInterrupt: 