In [1]:
import json
import os
import pandas as pd
import requests
from confluent_kafka import Consumer, KafkaException


In [11]:
consumer_config = {
    "bootstrap.servers": "localhost:9092",
    "group.id": "mygroup",
    "auto.offset.reset": "latest",
}

headers = {
    "accept": "application/json",
    "Content-Type": "application/json",
}

consumer = Consumer(consumer_config)
consumer.subscribe(["diabetes_out.public.sink_diabetes"])

In [12]:
def process_message(msg):
    if msg is None:
        return None
    if msg.error():
        raise KafkaException(msg.error())

    value = json.loads(msg.value().decode("utf-8"))["data"]
    print(f"Received message: {value}")

    response = requests.post(
        "http://localhost:4001/predict", headers=headers, json=value
    )
    if response.status_code != 200:
        print("Failed to get prediction!")
        return None

    result_json = response.json()
    print("API response:", result_json)

    dic = {"Diabetes": 1, "Normal": 0}
    value["Outcome"] = dic[result_json["result"]]

    return value

In [13]:
def append_to_csv(value, file_path="../data/diabetes-kafka/diabetes_new.csv"):
    columns = [
        "Pregnancies", "Glucose", "BloodPressure", "SkinThickness",
        "Insulin", "BMI", "DiabetesPedigreeFunction", "Age", "Outcome"
    ]
    ordered_values = [value[col] for col in columns]
    new_df = pd.DataFrame([ordered_values], columns=columns)

    if os.path.exists(file_path):
        df = pd.read_csv(file_path)
        pd.concat([df, new_df]).to_csv(file_path, index=False)
    else:
        new_df.to_csv(file_path, index=False)

In [14]:
outcome_value = None
try:
    while True:
        msg = consumer.poll(1.0)
        result = process_message(msg)
        if result:
            append_to_csv(result)
            outcome_value = result["Outcome"]
            break  # Xử lý 1 message và dừng lại để debug dễ hơn
except KeyboardInterrupt:
    print("Aborted by user!")
finally:
    consumer.close()

print("Final Outcome returned:", outcome_value)

Aborted by user!
Final Outcome returned: None


In [15]:
import json, os
import pandas as pd
import requests
from confluent_kafka import Consumer, KafkaException

KAFKA_SERVER = "cdc-broker:29092"
API_URL = "http://diabetes-service-api:4001/predict"
CSV_PATH = "/data/diabetes_new.csv"

def main():
    consumer = Consumer({
        "bootstrap.servers": KAFKA_SERVER,
        "group.id": "mygroup",
        "auto.offset.reset": "latest",
    })
    consumer.subscribe(["diabetes_out.public.sink_diabetes"])

    headers = {
        "accept": "application/json",
        "Content-Type": "application/json",
    }

    try:
        while True:
            msg = consumer.poll(1.0)
            if msg is None:
                continue
            if msg.error():
                raise KafkaException(msg.error())

            value = json.loads(msg.value().decode("utf-8"))["data"]
            print(f"Received message: {value}")

            response = requests.post(API_URL, headers=headers, json=value)
            if response.status_code != 200:
                print("Failed to get prediction!")
                continue

            result = response.json()["result"]
            print("Prediction result:", result)

            dic = {"Diabetes": 1, "Normal": 0}
            value["Outcome"] = dic[result]

            cols = [
                "Pregnancies", "Glucose", "BloodPressure", "SkinThickness",
                "Insulin", "BMI", "DiabetesPedigreeFunction", "Age", "Outcome"
            ]
            new_df = pd.DataFrame([[value[c] for c in cols]], columns=cols)

            if os.path.exists(CSV_PATH):
                df = pd.read_csv(CSV_PATH)
                pd.concat([df, new_df]).to_csv(CSV_PATH, index=False)
            else:
                new_df.to_csv(CSV_PATH, index=False)

    except KeyboardInterrupt:
        print("Aborted by user!")

    finally:
        consumer.close()

if __name__ == "__main__":
    main()

%3|1746818902.114|FAIL|rdkafka#consumer-7| [thrd:cdc-broker:29092/bootstrap]: cdc-broker:29092/bootstrap: Failed to resolve 'cdc-broker:29092': nodename nor servname provided, or not known (after 14ms in state CONNECT)
%3|1746818903.135|FAIL|rdkafka#consumer-7| [thrd:cdc-broker:29092/bootstrap]: cdc-broker:29092/bootstrap: Failed to resolve 'cdc-broker:29092': nodename nor servname provided, or not known (after 2ms in state CONNECT, 1 identical error(s) suppressed)


Aborted by user!
