In [2]:
!pip install kafka-python pandas

Collecting kafka-python
  Downloading kafka_python-2.2.15-py2.py3-none-any.whl.metadata (10.0 kB)
Downloading kafka_python-2.2.15-py2.py3-none-any.whl (309 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m309.8/309.8 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: kafka-python
Successfully installed kafka-python-2.2.15


In [1]:
# Impor Library
import pandas as pd
from kafka import KafkaProducer
import json
import time
import logging

In [2]:
# Konfigurasi logging untuk melihat output
logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__name__)

In [3]:
# Inisialisasi Kafka Producer
try:
    producer = KafkaProducer(
        bootstrap_servers='kafka:9092',
        value_serializer=lambda v: json.dumps(v).encode('utf-8'),
        api_version=(0, 10, 1)
    )
    log.info("Kafka Producer berhasil terhubung.")
except Exception as e:
    log.error(f"Gagal terhubung ke Kafka: {e}")
    producer = None

INFO:__main__:Kafka Producer berhasil terhubung.
INFO:kafka.conn:<BrokerConnection client_id=kafka-python-producer-1, node_id=bootstrap-0 host=kafka:9092 <connecting> [IPv4 ('172.18.0.4', 9092)]>: connecting to kafka:9092 [('172.18.0.4', 9092) IPv4]
INFO:kafka.conn:<BrokerConnection client_id=kafka-python-producer-1, node_id=bootstrap-0 host=kafka:9092 <connected> [IPv4 ('172.18.0.4', 9092)]>: Connection complete.


In [4]:
# Membaca Dataset dengan Path yang PASTI BENAR
if producer:
    # Nama file harus sesuai dengan yang Anda letakkan di folder 'work'
    file_path = 'data/predictive_maintenance.csv' 
    
    try:
        df = pd.read_csv(file_path)
        log.info(f"Dataset berhasil dimuat. Jumlah baris: {len(df)}")
        
        # Nama topik di Kafka tempat kita akan mengirim data
        KAFKA_TOPIC = 'raw_sensor_data'

        # Loop melalui setiap baris di DataFrame
        for index, row in df.iterrows():
            # Ubah setiap baris menjadi dictionary
            message = row.to_dict()
            
            # Kirim pesan ke Kafka
            producer.send(KAFKA_TOPIC, value=message)
            
            # Cetak pesan ke log setiap 100 baris untuk memantau progres
            if (index + 1) % 100 == 0:
                log.info(f"Mengirim pesan ke-{index + 1}: {message}")
            
            # Beri jeda 0.1 detik untuk mensimulasikan aliran data real-time
            time.sleep(0.1)
            
        # Pastikan semua pesan terkirim sebelum skrip selesai
        producer.flush()
        log.info("Semua data berhasil dikirim ke Kafka.")
        
    except FileNotFoundError:
        log.error(f"File tidak ditemukan di path: {file_path}. Pastikan file CSV ada di folder 'workspace'.")
    except Exception as e:
        log.error(f"Terjadi error saat mengirim data: {e}")

else:
    log.warning("Producer tidak diinisialisasi. Proses pengiriman data dibatalkan.")

ERROR:__main__:File tidak ditemukan di path: work/predictive_maintenance.csv. Pastikan file CSV ada di folder 'workspace'.
