In [21]:
PATIENT_FILE = "../synthea/output/csv/patients.csv"
PATIENT_VALUE_SCHEMA = "../resources/schemas/patients.avsc"
PATIENT_KEY_SCHEMA = "../resources/key_schemas/patient_key.avsc"

In [2]:
import os
import csv
from time import sleep
from typing import Dict

In [33]:
import sys
import os

# Add the project root to sys.path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

In [17]:
# !pip install confluent_kafka
# !pip install cachetools
# !pip install authlib
# !pip install fastavro

In [18]:
from confluent_kafka import Producer
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.schema_registry.avro import AvroSerializer
from confluent_kafka.serialization import SerializationContext, MessageField

In [36]:
from scripts.patient_record_key import PatientRecordKey, patient_record_key_to_dict
from scripts.patient_record import PatientRecord, patient_record_to_dict
# from settings import RIDE_KEY_SCHEMA_PATH, RIDE_VALUE_SCHEMA_PATH,SCHEMA_REGISTRY_URL, BOOTSTRAP_SERVERS, INPUT_DATA_PATH, KAFKA_TOPIC

In [74]:
# INPUT_DATA_PATH = '../resources/rides.csv'

# RIDE_KEY_SCHEMA_PATH = '../resources/schemas/taxi_ride_key.avsc'
# RIDE_VALUE_SCHEMA_PATH = '../resources/schemas/taxi_ride_value.avsc'

SCHEMA_REGISTRY_URL = 'http://schema-registry:8081'
BOOTSTRAP_SERVERS = "broker:29092"
FHIR_TOPIC = "FHIR_TOPIC"

In [67]:
def delivery_report(err, msg):
    if err is not None:
        print("Delivery failed for record {}: {}".format(msg.key(), err))
        return
    print('Record {} successfully produced to {} [{}] at offset {}'.format(
        msg.key(), msg.topic(), msg.partition(), msg.offset()))

In [68]:
class PatientProducer():
    def __init__(self, props: Dict):
        key_schema_str = self.load_schema(props['schema.key'])
        value_schema_str = self.load_schema(props['schema.value'])
        schema_registry_props = {'url': props['schema_registry.url']}
        schema_registry_client = SchemaRegistryClient(schema_registry_props)
        self.key_serializer = AvroSerializer(schema_registry_client, key_schema_str, patient_record_key_to_dict)
        self.value_serializer = AvroSerializer(schema_registry_client, value_schema_str, patient_record_to_dict)

                # Producer Configuration
        producer_props = {'bootstrap.servers': props['bootstrap.servers']}
        self.producer = Producer(producer_props)


    @staticmethod
    def load_schema(schema_path: str):
        with open(f"{schema_path}") as f:
            schema_str = f.read()
        return schema_str

    @staticmethod
    def read_records(resource_path: str):
        patient_records, patient_keys = [], []
        with open(resource_path, 'r') as f:
            reader = csv.reader(f)
            header = next(reader)  # skip the header
            for row in reader:
                patient_records.append(PatientRecord(row))
                patient_keys.append(PatientRecordKey())
        return zip(patient_keys, patient_records)


    def publish(self, topic: str, records: [PatientRecordKey, PatientRecord]):
        for key_value in records:
            key, value = key_value
            try:
                self.producer.produce(topic=topic,
                                      key=self.key_serializer(key, SerializationContext(topic=topic,
                                                                                        field=MessageField.KEY)),
                                      value=self.value_serializer(value, SerializationContext(topic=topic,
                                                                                              field=MessageField.VALUE)),
                                      on_delivery=delivery_report)
            except KeyboardInterrupt:
                break
            except Exception as e:
                print(f"Exception while producing record - {value}: {e}")

        self.producer.flush()
        sleep(1)

In [78]:
if __name__ == "__main__":
    config = {
        'bootstrap.servers': BOOTSTRAP_SERVERS,
        'schema_registry.url': SCHEMA_REGISTRY_URL,
        'schema.key': PATIENT_KEY_SCHEMA,
        'schema.value': PATIENT_VALUE_SCHEMA
    }
    producer = PatientProducer(props=config)
    patient_records = producer.read_records(resource_path=PATIENT_FILE)
    producer.publish(topic=FHIR_TOPIC, records=patient_records)

Record b'\x00\x00\x00\x00\x01\x0epatient' successfully produced to FHIR_TOPIC [0] at offset 0
Record b'\x00\x00\x00\x00\x01\x0epatient' successfully produced to FHIR_TOPIC [0] at offset 1
Record b'\x00\x00\x00\x00\x01\x0epatient' successfully produced to FHIR_TOPIC [0] at offset 2
Record b'\x00\x00\x00\x00\x01\x0epatient' successfully produced to FHIR_TOPIC [0] at offset 3
Record b'\x00\x00\x00\x00\x01\x0epatient' successfully produced to FHIR_TOPIC [0] at offset 4
Record b'\x00\x00\x00\x00\x01\x0epatient' successfully produced to FHIR_TOPIC [0] at offset 5
Record b'\x00\x00\x00\x00\x01\x0epatient' successfully produced to FHIR_TOPIC [0] at offset 6
Record b'\x00\x00\x00\x00\x01\x0epatient' successfully produced to FHIR_TOPIC [0] at offset 7
Record b'\x00\x00\x00\x00\x01\x0epatient' successfully produced to FHIR_TOPIC [0] at offset 8
Record b'\x00\x00\x00\x00\x01\x0epatient' successfully produced to FHIR_TOPIC [0] at offset 9
Record b'\x00\x00\x00\x00\x01\x0epatient' successfully produ