In [20]:
!pip install confluent-kafka



In [2]:
!pip install faker



In [22]:
!pip install protobuf



In [2]:
!pip install --upgrade protobuf

Collecting protobuf
  Using cached protobuf-5.29.3-cp38-abi3-manylinux2014_x86_64.whl (319 kB)
Installing collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 4.21.12
    Uninstalling protobuf-4.21.12:
      Successfully uninstalled protobuf-4.21.12
Successfully installed protobuf-5.29.3


In [3]:
!pip install googleapis-common-protos

Collecting googleapis-common-protos
  Using cached googleapis_common_protos-1.66.0-py2.py3-none-any.whl (221 kB)
Installing collected packages: googleapis-common-protos
Successfully installed googleapis-common-protos-1.66.0


In [4]:
pip install --upgrade confluent-kafka

Note: you may need to restart the kernel to use updated packages.


In [19]:
from pathlib import Path
from confluent_kafka import SerializingProducer
from confluent_kafka.serialization import StringSerializer
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.schema_registry.protobuf import ProtobufSerializer
from faker import Faker
import random
import os
import sys
import time

In [20]:
# Load environment variables from .env file
from dotenv import load_dotenv
dotenv_path = Path('/resources/.env')
# print(dotenv_path)
load_dotenv(dotenv_path=dotenv_path)

True

In [21]:
# Access environment variables
kafka_host = os.getenv('KAFKA_HOST_1')
topic_name = os.getenv('KAFKA_TOPIC_NAME')
replication_factor = int(os.getenv('KAFKA_REPLICATION'))
num_partitions = int(os.getenv('KAFKA_PARTITION'))
schema_registry_host = os.getenv('SCHEMA_REG_HOST')

In [22]:
# Protobuf schema import
sys.path.append('./protobuf')
import protobuf_schema_pb2

In [23]:
# # Kafka Configuration
bootstrap_servers = f'{kafka_host}:9092'
schema_registry_url = f'http://{schema_registry_host}:8081'

In [11]:
# Kafka Configuration
# bootstrap_servers = '172.22.0.3:9092'
# schema_registry_url = 'http://172.22.0.5:8081'

In [24]:
# Producer Configuration
producer_conf = {
    'bootstrap.servers': bootstrap_servers,
    'key.serializer': StringSerializer('utf_8'),
}

In [25]:
# Schema Registry and Protobuf Serializer
schema_registry_client = SchemaRegistryClient({'url': schema_registry_url})

protobuf_serializer = ProtobufSerializer(protobuf_schema_pb2.ProductSale,
                                         schema_registry_client,
                                         {'use.deprecated.format': False})

producer_conf['value.serializer'] = protobuf_serializer

In [26]:
# Kafka Producer
producer = SerializingProducer(producer_conf)
fake = Faker()

In [None]:
# Data generator function
def generate_product_sale_data():
    sale_data = protobuf_schema_pb2.ProductSale()
    sale_data.transaction_id = fake.uuid4()
    sale_data.sale_date = fake.date_time_this_year().strftime('%Y-%m-%d %H:%M:%S')
    sale_data.product = fake.random_element(elements=[
        'Hydrating Serum', 'Matte Lipstick', 'Vitamin C Serum',
        'Moisturizer', 'Eyeliner', 'Sunscreen', 'Foundation'
    ])
    sale_data.category = fake.random_element(elements=['Skincare', 'Makeup'])
    sale_data.unit_price = round(random.uniform(50000.0, 1000000.0), 2)  # Harga per unit (50-500)
    sale_data.units_sold = random.randint(1, 20)  # Unit terjual (1-20)
    sale_data.total_sales = round(sale_data.unit_price * sale_data.units_sold, 2)  # Total penjualan
    return sale_data

# Produce 300 messages    
for i in range(300):
    sale_data = generate_product_sale_data()
    try:
        print(f"Producing: {sale_data}")
        producer.produce(topic=topic_name, key=sale_data.transaction_id, value=sale_data)
        producer.flush()
        print(f"Message {i+1} sent")
    except Exception as e:
        print(f"Error sending message: {e}")
    time.sleep(5)  # Interval pengiriman data

DEBUG:httpcore.connection:connect_tcp.started host='dataeng-schema-registry' port=8081 local_address=None timeout=None socket_options=None
DEBUG:httpcore.connection:connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x7fc31ef0bdf0>
DEBUG:httpcore.http11:send_request_headers.started request=<Request [b'POST']>
DEBUG:httpcore.http11:send_request_headers.complete
DEBUG:httpcore.http11:send_request_body.started request=<Request [b'POST']>
DEBUG:httpcore.http11:send_request_body.complete
DEBUG:httpcore.http11:receive_response_headers.started request=<Request [b'POST']>
DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 24 Jan 2025 16:03:59 GMT'), (b'X-Request-ID', b'a5d0395a-f0f4-4ae0-b0f8-b9ada7c51b8e'), (b'Content-Type', b'application/vnd.schemaregistry.v1+json'), (b'Vary', b'Accept-Encoding, User-Agent'), (b'Content-Length', b'8')])
INFO:httpx:HTTP Request: POST http://dataeng-schema-registry:8081/

Producing: transaction_id: "516b51d5-41aa-45eb-b520-e86512db4be9"
sale_date: "2025-01-07 16:15:13"
product: "Sunscreen"
category: "Makeup"
unit_price: 482510.48
units_sold: 3
total_sales: 1447531.44

Message 1 sent
Producing: transaction_id: "a11ce868-81a5-4f70-8ee6-851d195dfe6a"
sale_date: "2025-01-08 17:56:08"
product: "Hydrating Serum"
category: "Skincare"
unit_price: 212535.02
units_sold: 1
total_sales: 212535.02

Message 2 sent
Producing: transaction_id: "f732f970-6b30-4dd9-bded-4771fd230145"
sale_date: "2025-01-18 19:21:46"
product: "Sunscreen"
category: "Skincare"
unit_price: 547016.54
units_sold: 19
total_sales: 10393314.26

Message 3 sent
Producing: transaction_id: "eaf9b8c8-f27d-4515-8825-065acdf3a8cc"
sale_date: "2025-01-16 02:45:08"
product: "Vitamin C Serum"
category: "Makeup"
unit_price: 527243.35
units_sold: 4
total_sales: 2108973.4

Message 4 sent
Producing: transaction_id: "d7db5d1d-cb63-4730-99b3-437047bc2d37"
sale_date: "2025-01-23 09:49:30"
product: "Vitamin C Serum