In [None]:
import hopsworks
import json
import os
import pandas as pd
from confluent_kafka import Producer

In [None]:
project = hopsworks.login()

In [None]:
# create kafka topic
KAFKA_TOPIC_NAME = f"{project.name}_real_time_live_transactions"
SCHEMA_NAME = "live_transactions_schema"

kafka_api = project.get_kafka_api()

schema = {
    "type": "record",
    "name": SCHEMA_NAME,
    "namespace": "ai.hopsworks.examples.feldera.fraud",
    "fields": [
        {
            "name": "transaction_id",
            "type": [
                "null",
                "string"
            ]
        },
        {
            "name": "transaction_time",
            "type": [
                "null",
                {
                    "type": "long",
                    "logicalType": "timestamp-micros"
                }
            ]
        },
        {
            "name": "card_id",
            "type": [
                "null",
                "string"
            ]
        },
        {
            "name": "customer_id",
            "type": [
                "null",
                "string"
            ]
        },
        {
            "name": "merchant_id",
            "type": [
                "null",
                "string"
            ]
        },
        {
            "name": "amount",
            "type": [
                "null",
                "double"
            ]
        },
        {
            "name": "transaction_category_name",
            "type": [
                "null",
                "string"
            ]
        },
        {
            "name": "location",
            "type": [
                "null",
                "string"
            ]
        }
    ]
}

if KAFKA_TOPIC_NAME not in [topic.name for topic in kafka_api.get_topics()]:
    kafka_api.create_schema(SCHEMA_NAME, schema)
    kafka_api.create_topic(KAFKA_TOPIC_NAME, SCHEMA_NAME, 1, replicas=1, partitions=1)

In [None]:
# create kafka topic
KAFKA_TOPIC_NAME = f"{project.name}_real_time_live_transactions"
SCHEMA_NAME = "live_transactions_schema"

kafka_api = project.get_kafka_api()

# setup kafka producer

fs = project.get_feature_store()
kafka_config = fs._storage_connector_api.get_kafka_connector(fs.id, True).confluent_options()

print(kafka_config)
producer = Producer(kafka_config)

In [None]:
# read histrorical transactions from the batch feature group and produce records
transactions_pdf = fs.get_feature_group(name="transactions", version=1).read()
transactions_pdf["transaction_time"] = transactions_pdf["transaction_time"].dt.strftime('%Y-%m-%d %H:%M:%S')

In [None]:
transactions_pdf.info()

In [None]:
transactions_pdf

In [None]:
for index, transaction in transactions_pdf.iterrows():
    producer.produce(KAFKA_TOPIC_NAME, transaction.to_json())
    
    if index % 50000 == 0:
        producer.flush()
        print(f'Finished sending index {index}')

producer.flush()