In [None]:
from confluent_kafka import Producer
import time

# Kafka Configuration
conf = {
    'bootstrap.servers': "kafka-broker-1:29094,kafka-broker-2:29094",
}

# Create Producer Instance
producer = Producer(conf)

# Kafka Topic
topic = "simple-messages-topic"

# Produce Messages
for i in range(10):
    message = f"Message {i}"
    producer.produce(topic, key=None, value=message)
    print(f"Produced: {message}")
    producer.flush()  # Ensure delivery
    time.sleep(1)  # Simulate delay between messages

print("All messages produced successfully!")

In [None]:
from confluent_kafka import Consumer, KafkaException

# Kafka Consumer Configuration
conf = {
    'bootstrap.servers': "kafka-broker-1:29094,kafka-broker-2:29094",
    'group.id': 'sample-group',
    'auto.offset.reset': 'earliest'  # Start consuming from the beginning
}

# Create Consumer Instance
consumer = Consumer(conf)
topic = "simple-messages-topic"
consumer.subscribe([topic])

# Consume Messages
try:
    while True:
        msg = consumer.poll(1.0)  # Poll for messages
        if msg is None:
            continue
        if msg.error():
            raise KafkaException(msg.error())

        print(f"Consumed: {msg.value().decode('utf-8')}")

except KeyboardInterrupt:
    print("Stopping Consumer...")
finally:
    consumer.close()

## Kafka Serializers

Serializers are used to convert objects into a format that can be sent over the network. Kafka supports different types of serializers:

1. **StringSerializer**: Converts strings into bytes. This is useful when your messages are simple strings.
2. **ByteArraySerializer**: Converts byte arrays into bytes. This is useful when your messages are already in byte array format.
3. **AvroSerializer**: Converts Avro objects into bytes. This is useful when you are using Avro for schema management.
4. **JsonSerializer**: Converts JSON objects into bytes. This is useful when your messages are JSON objects.
5. **ProtobufSerializer**: Converts Protobuf objects into bytes. This is useful when you are using Protocol Buffers for schema management.

Choosing the right serializer depends on the format of your data and your use case.

In [None]:
from confluent_kafka import Producer
import json
import time

# Kafka Configuration
conf = {
    'bootstrap.servers': "kafka-broker-1:29094,kafka-broker-2:29094"
}

# Create Producer Instance
producer = Producer(conf)

# Kafka Topic
topic = "json-messages-topic"

# Produce JSON Messages
for i in range(10):
    message = {'id': i, 'content': f"Message {i}"}
    producer.produce(topic, key=None, value=json.dumps(message).encode('utf-8'))
    print(f"Produced: {message}")
    producer.flush()  # Ensure delivery
    time.sleep(1)  # Simulate delay between messages

print("All JSON messages produced successfully!")

In [None]:
from confluent_kafka import Consumer, KafkaException
import json

# Kafka Consumer Configuration
conf = {
    'bootstrap.servers': "kafka-broker-1:29094,kafka-broker-2:29094",
    'group.id': 'sample-group', # Consumer Group ID
    'auto.offset.reset': 'earliest'  # Start consuming from the beginning
}

# Create Consumer Instance
consumer = Consumer(conf)
topic = "json-messages-topic"
consumer.subscribe([topic])

# Consume JSON Messages
try:
    while True:
        msg = consumer.poll(1.0)  # Poll for messages
        if msg is None:
            continue
        if msg.error():
            raise KafkaException(msg.error())

        message = json.loads(msg.value().decode('utf-8'))
        print(f"Consumed: {message}")

except KeyboardInterrupt:
    print("Stopping Consumer...")
finally:
    consumer.close()

## Multi Event Types in the Same Topic Without Schema

In Kafka, it is possible to publish multiple event types to the same topic. This can be useful in scenarios where different types of events are logically related and should be processed together. However, without schema references, managing and consuming these events can become challenging. Here are some considerations and strategies:

1. **Event Type Identification**: Each event should include a field that identifies its type. This can be a simple string field like `event_type`.
    ```json
    {
        "event_type": "user_signup",
        "user_id": 123,
        "timestamp": "2023-10-01T12:34:56Z"
    }
    ```

2. **Event Validation**: Without schema references, it is important to validate the structure of each event type in the producer and consumer code. This can be done using custom validation logic or libraries like `jsonschema`.

3. **Consumer Logic**: Consumers need to handle different event types appropriately. This can be achieved by checking the `event_type` field and processing the event accordingly.
    ```python
    if event['event_type'] == 'user_signup':
        handle_user_signup(event)
    elif event['event_type'] == 'order_placed':
        handle_order_placed(event)
    ```

4. **Documentation**: Clearly document the structure of each event type and ensure that all producers and consumers adhere to these structures.

5. **Testing**: Thoroughly test the producer and consumer logic to ensure that all event types are handled correctly.

By following these strategies, you can effectively manage multiple event types in the same Kafka topic without relying on schema references.

## Challenges of Publishing Multiple Event Types in a Single Topic Without Schema

While it's possible to publish different event types to the same topic, this approach presents several challenges when not using a schema registry:

1. **Schema Evolution**: Without formal schema management, making changes to event structures becomes risky. Adding, removing, or modifying fields can break downstream consumers if they're not updated simultaneously.

2. **Type Safety**: No built-in type checking means field types can be inconsistent or incorrect. A field intended to be a number might accidentally be sent as a string.

3. **Versioning Complexity**: Managing different versions of events becomes manual and error-prone, requiring custom logic to handle backward/forward compatibility.

4. **Increased Development Overhead**: Developers must implement validation logic in both producers and consumers, duplicating effort and potentially introducing inconsistencies.

5. **Documentation Drift**: Without enforced schemas, documentation about event structures can become outdated or incomplete over time.

6. **Runtime Errors**: Problems with event formats are typically discovered at runtime rather than compile/build time, leading to production issues.

7. **Consumer Complexity**: Consumers need complex branching logic to handle different event types and their potential variations.

8. **Performance Impact**: Without optimized serialization/deserialization that schemas provide, parsing and validation can be less efficient.

9. **Governance Challenges**: Enforcing standards across teams becomes difficult without centralized schema validation.

10. **Testing Burden**: Comprehensive testing becomes more critical and complex to ensure all event variations are properly handled.

These challenges increase with system scale, number of event types, and frequency of changes. Using a schema registry like Confluent Schema Registry with Avro, Protobuf, or JSON Schema can address many of these issues by providing centralized schema management, validation, and evolution controls.

In [None]:
from confluent_kafka import Producer
import json
import time

# Kafka Configuration
conf = {
    'bootstrap.servers': "kafka-broker-1:29094,kafka-broker-2:29094"
}

# Create Producer Instance
producer = Producer(conf)

# Kafka Topic
topic = "multi-event-types-topic"

# Produce Multiple Event Types
events = [
    {'event_type': 'user_signup', 'user_id': 1, 'timestamp': '2023-10-01T12:34:56Z'},
    {'event_type': 'order_placed', 'order_id': 101, 'amount': 250.75, 'timestamp': '2023-10-01T12:35:56Z'},
    {'event_type': 'user_signup', 'user_id': 2, 'timestamp': '2023-10-01T12:36:56Z'},
    {'event_type': 'order_placed', 'order_id': 102, 'amount': 150.50, 'timestamp': '2023-10-01T12:37:56Z'}
]

for event in events:
    producer.produce(topic, key=None, value=json.dumps(event).encode('utf-8'))
    print(f"Produced: {event}")
    producer.flush()  # Ensure delivery
    time.sleep(1)  # Simulate delay between messages

print("All events produced successfully!")

In [None]:
from confluent_kafka import Consumer, KafkaException
import json

# Kafka Consumer Configuration
conf = {
    'bootstrap.servers': "kafka-broker-1:29094,kafka-broker-2:29094",
    'group.id': 'multi-event-group',
    'auto.offset.reset': 'earliest'  # Start consuming from the beginning
}

# Create Consumer Instance
consumer = Consumer(conf)
topic = "multi-event-types-topic"
consumer.subscribe([topic])

# Consume Multiple Event Types
try:
    while True:
        msg = consumer.poll(1.0)  # Poll for messages
        if msg is None:
            continue
        if msg.error():
            raise KafkaException(msg.error())

        event = json.loads(msg.value().decode('utf-8'))
        event_type = event.get('event_type')

        if event_type == 'user_signup':
            print(f"User Signup Event: {event}")
        elif event_type == 'order_placed':
            print(f"Order Placed Event: {event}")
        else:
            print(f"Unknown Event Type: {event}")

except KeyboardInterrupt:
    print("Stopping Consumer...")
finally:
    consumer.close()