In [27]:
pip install confluent_kafka

Looking in indexes: https://nexus.corp.indeed.com/repository/pypi/simple

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [28]:
import json
from confluent_kafka import Producer
import time
from datetime import datetime
import random

In [None]:
config = {
    'bootstrap.servers': 'pkc-619z3.us-east1.gcp.confluent.cloud:9092',  # Replace with your bootstrap servers
    'security.protocol': 'SASL_SSL',
    'sasl.mechanisms': 'PLAIN',
    'sasl.username': 'username',  # Replace with your API key
    'sasl.password': 'pass'  # Replace with your API secret
}


In [30]:
producer = Producer(**config)

In [31]:
def delivery_report(err, msg):
    timestamp = datetime.now().strftime('%H:%M:%S')
    if err is not None:
        print(f'[{timestamp}] ❌ Message delivery failed: {err}')
    else:
        print(f'[{timestamp}] ✅ Message {msg.value()[:50]}... delivered to {msg.topic()} [partition: {msg.partition()}]')

In [32]:
def produce_messages(file_path, topic_name, batch_size=10, batch_pause=5, message_pause=0.5):
    """
    Produce messages with controlled pacing
    
    Parameters:
    - file_path: Path to the JSON file
    - topic_name: Kafka topic name
    - batch_size: Number of messages to send before pausing
    - batch_pause: Seconds to pause between batches
    - message_pause: Seconds to pause between individual messages
    """
    
    with open(file_path, 'r') as file:
        data = json.load(file)
        total_messages = len(data)
        
        print(f"\n🚀 Starting to process {total_messages} messages...")
        print(f"📊 Configuration: batch size={batch_size}, batch pause={batch_pause}s, message pause={message_pause}s\n")
        
        for i, record in enumerate(data, 1):
            # Add timestamp to the record for tracking
            record['produced_at'] = datetime.now().isoformat()
            
            # Convert record to JSON string
            message = json.dumps(record)
            
            # Produce the message
            producer.produce(
                topic_name, 
                message.encode('utf-8'), 
                callback=delivery_report
            )
            
            # Show progress
            progress = (i / total_messages) * 100
            print(f"\rProgress: {progress:.1f}% ({i}/{total_messages})", end='')
            
            # Serve delivery callbacks
            producer.poll(0)
            
            # Pause between messages
            time.sleep(message_pause)
            
            # If we've reached a batch boundary, take a longer pause
            if i % batch_size == 0:
                print(f"\n💤 Pausing for {batch_pause} seconds after batch of {batch_size} messages...")
                time.sleep(batch_pause)
    
    print("\n\n🔄 Flushing remaining messages...")
    producer.flush()
    print("✨ All messages processed successfully!")

In [None]:
# Usage example
file_path = 'customers.json'
topic_name = 'customers'

# You can adjust these parameters to control the pacing
produce_messages(
    file_path=file_path,
    topic_name=topic_name,
    batch_size=50,      # Process 50 messages at a time
    batch_pause=3,      # Pause for 3 seconds between batches
    message_pause=0.1   # Pause for 0.1 seconds between messages
)


🚀 Starting to process 9999 messages...
📊 Configuration: batch size=50, batch pause=3s, message pause=0.1s

Progress: 0.1% (7/9999)

%6|1732302608.968|GETSUBSCRIPTIONS|rdkafka#producer-4| [thrd:main]: Telemetry client instance id changed from AAAAAAAAAAAAAAAAAAAAAA to qW1O0hHYRt6HauG0Vwlm9w


Progress: 0.1% (13/9999)[13:10:09] ✅ Message b'{"id": 3, "email": "fredy54@gmail.com", "first": "'... delivered to customers [partition: 0]
[13:10:09] ✅ Message b'{"id": 6, "email": "halie47@yahoo.com", "first": "'... delivered to customers [partition: 0]
[13:10:09] ✅ Message b'{"id": 7, "email": "loren_yundt@gmail.com", "first'... delivered to customers [partition: 0]
[13:10:09] ✅ Message b'{"id": 2, "email": "frederique19@gmail.com", "firs'... delivered to customers [partition: 2]
[13:10:09] ✅ Message b'{"id": 5, "email": "turner59@gmail.com", "first": '... delivered to customers [partition: 2]
[13:10:09] ✅ Message b'{"id": 10, "email": "frank34@yahoo.com", "first": '... delivered to customers [partition: 2]
[13:10:09] ✅ Message b'{"id": 1, "email": "isidro_von@hotmail.com", "firs'... delivered to customers [partition: 1]
[13:10:09] ✅ Message b'{"id": 4, "email": "braxton29@hotmail.com", "first'... delivered to customers [partition: 1]
[13:10:09] ✅ Message b'{"id": 8, "email": "kento