In [34]:
from kafka import KafkaProducer, KafkaAdminClient
from kafka.admin import NewTopic
import json
import sys
from datetime import datetime

KAFKA_BROKER = "kafka.d2f.io.vn:9092"  # Thay bằng địa chỉ Kafka của bạn
TOPIC = "model_retrain_event"

def create_topic_if_not_exists():
    try:
        admin_client = KafkaAdminClient(bootstrap_servers=KAFKA_BROKER)
        existing_topics = admin_client.list_topics()

        if TOPIC not in existing_topics:
            topic = NewTopic(name=TOPIC, num_partitions=1, replication_factor=1)
            admin_client.create_topics([topic])
            print(f"Topic '{TOPIC}' created successfully.")
        else:
            print(f"Topic '{TOPIC}' already exists.")

    except Exception as e:
        print(f"Error checking/creating topic: {e}")

def notify_ai_module():
    producer = KafkaProducer(
        bootstrap_servers=KAFKA_BROKER,
        value_serializer=lambda v: json.dumps(v).encode("utf-8"),
    )

    message = {
        "event": "data_ready",
        "timestamp": int(datetime.utcnow().timestamp()),
        "message": "New data has been injected to Qdrant. AI module should retrain the model.",
    }

    producer.send(TOPIC, value=message)
    producer.flush()
    print(f"Sent notification to AI module: {message}")

if __name__ == "__main__":
    try:
        create_topic_if_not_exists()
        notify_ai_module()
    except Exception as e:
        print(f"Failed to notify AI module: {str(e)}")
        sys.exit(1)


INFO:kafka.conn:<BrokerConnection client_id=kafka-python-2.0.6, node_id=bootstrap-0 host=kafka.d2f.io.vn:9092 <connecting> [IPv4 ('103.155.161.100', 9092)]>: connecting to kafka.d2f.io.vn:9092 [('103.155.161.100', 9092) IPv4]
INFO:kafka.conn:Probing node bootstrap-0 broker version
INFO:kafka.conn:<BrokerConnection client_id=kafka-python-2.0.6, node_id=bootstrap-0 host=kafka.d2f.io.vn:9092 <connecting> [IPv4 ('103.155.161.100', 9092)]>: Connection complete.
INFO:kafka.conn:Broker version identified as 2.2.0
INFO:kafka.conn:Set configuration api_version=(2, 2, 0) to skip auto check_version requests on startup
INFO:kafka.conn:<BrokerConnection client_id=kafka-python-2.0.6, node_id=1 host=kafka.d2f.io.vn:9092 <connecting> [IPv4 ('103.155.161.100', 9092)]>: connecting to kafka.d2f.io.vn:9092 [('103.155.161.100', 9092) IPv4]
INFO:kafka.conn:Probing node 1 broker version
INFO:kafka.conn:<BrokerConnection client_id=kafka-python-2.0.6, node_id=1 host=kafka.d2f.io.vn:9092 <connecting> [IPv4 ('10

Topic 'model_retrain_event' created successfully.


INFO:kafka.conn:<BrokerConnection client_id=kafka-python-producer-24, node_id=1 host=kafka.d2f.io.vn:9092 <connecting> [IPv4 ('103.155.161.100', 9092)]>: Connection complete.
INFO:kafka.conn:<BrokerConnection client_id=kafka-python-producer-24, node_id=bootstrap-0 host=kafka.d2f.io.vn:9092 <connected> [IPv4 ('103.155.161.100', 9092)]>: Closing connection. 
INFO:kafka.conn:<BrokerConnection client_id=kafka-python-producer-24, node_id=1 host=kafka.d2f.io.vn:9092 <connected> [IPv4 ('103.155.161.100', 9092)]>: Closing connection. 


Sent notification to AI module: {'event': 'data_ready', 'timestamp': 1741611755, 'message': 'New data has been injected to Qdrant. AI module should retrain the model.'}


In [33]:
from kafka import KafkaProducer
import json
import sys
from datetime import datetime

KAFKA_BROKER = "kafka.d2f.io.vn:9092"  # Thay bằng địa chỉ Kafka của bạn
TOPIC = "model_retrain_event"

def notify_ai_module():
    producer = KafkaProducer(
        bootstrap_servers=KAFKA_BROKER,
        value_serializer=lambda v: json.dumps(v).encode("utf-8"),
    )
    
    message = {
        "event": "data_ready",
        "timestamp": int(datetime.utcnow().timestamp()),
        "message": "New data has been injected to Qdrant. AI module should retrain the model.",
    }
    
    producer.send(TOPIC, value=message)
    producer.flush()
    print(f"Sent notification to AI module: {message}")

if __name__ == "__main__":
    try:
        notify_ai_module()
    except Exception as e:
        print(f"Failed to notify AI module: {str(e)}")
        sys.exit(1)


INFO:kafka.conn:<BrokerConnection client_id=kafka-python-producer-23, node_id=bootstrap-0 host=kafka.d2f.io.vn:9092 <connecting> [IPv4 ('103.155.161.100', 9092)]>: connecting to kafka.d2f.io.vn:9092 [('103.155.161.100', 9092) IPv4]
INFO:kafka.conn:Probing node bootstrap-0 broker version
INFO:kafka.conn:<BrokerConnection client_id=kafka-python-producer-23, node_id=bootstrap-0 host=kafka.d2f.io.vn:9092 <connecting> [IPv4 ('103.155.161.100', 9092)]>: Connection complete.
INFO:kafka.conn:Broker version identified as 2.2.0
INFO:kafka.conn:Set configuration api_version=(2, 2, 0) to skip auto check_version requests on startup
INFO:kafka.conn:<BrokerConnection client_id=kafka-python-producer-23, node_id=1 host=kafka.d2f.io.vn:9092 <connecting> [IPv4 ('103.155.161.100', 9092)]>: connecting to kafka.d2f.io.vn:9092 [('103.155.161.100', 9092) IPv4]
INFO:kafka.conn:<BrokerConnection client_id=kafka-python-producer-23, node_id=1 host=kafka.d2f.io.vn:9092 <connecting> [IPv4 ('103.155.161.100', 9092)]

Sent notification to AI module: {'event': 'data_ready', 'timestamp': 1741611678, 'message': 'New data has been injected to Qdrant. AI module should retrain the model.'}


In [3]:
from kafka import KafkaConsumer, TopicPartition

# Configuration
KAFKA_BROKER = "kafka.d2f.io.vn:9092"
TOPIC = "model_retrain_event"

def count_messages(bootstrap_servers, topic, group_id=None):
    """
    Calculate the total number of messages in a Kafka topic.
    
    Args:
        bootstrap_servers (str): Kafka bootstrap servers
        topic (str): Name of the Kafka topic
        group_id (str, optional): Consumer group ID
    
    Returns:
        int: Total number of messages in the topic
    """
    try:
        # Initialize Kafka consumer
        consumer = KafkaConsumer(
            bootstrap_servers=bootstrap_servers,
            auto_offset_reset='earliest',
            enable_auto_commit=False,
            group_id=group_id
        )

        # Get topic partitions
        partitions = consumer.partitions_for_topic(topic)
        if not partitions:
            raise ValueError(f"Topic '{topic}' does not exist or has no partitions")

        total_messages = 0
        
        # Calculate messages for each partition
        for partition in partitions:
            tp = TopicPartition(topic, partition)
            
            # Assign the partition to consumer
            consumer.assign([tp])
            
            # Get earliest offset
            consumer.seek_to_beginning(tp)
            earliest_offset = consumer.position(tp)
            
            # Get latest offset
            consumer.seek_to_end(tp)
            latest_offset = consumer.position(tp)
            
            # Calculate message count for this partition
            partition_messages = latest_offset - earliest_offset
            total_messages += partition_messages
            
            print(f"Partition {partition}: {partition_messages} messages "
                  f"(offsets {earliest_offset} to {latest_offset})")

        consumer.close()
        return total_messages

    except Exception as e:
        print(f"Error: {str(e)}")
        return -1

def main():
    print(f"\nCalculating total messages in topic '{TOPIC}'...")
    print(f"Connecting to Kafka broker: {KAFKA_BROKER}")
    
    total = count_messages(
        bootstrap_servers=KAFKA_BROKER,
        topic=TOPIC,
        group_id=None  # You can specify a group ID here if needed
    )

    if total >= 0:
        print(f"\nTotal messages in topic '{TOPIC}': {total}")
    else:
        print("Failed to calculate message count")

if __name__ == "__main__":
    main()


Calculating total messages in topic 'model_retrain_event'...
Connecting to Kafka broker: kafka.d2f.io.vn:9092
Partition 0: 1 messages (offsets 13 to 14)

Total messages in topic 'model_retrain_event': 1


Collecting kafka-python
  Downloading kafka_python-2.0.6-py2.py3-none-any.whl (252 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m252.1/252.1 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: kafka-python
Successfully installed kafka-python-2.0.6
Note: you may need to restart the kernel to use updated packages.
