## Kafka Ingestion

### Required Imports + Subscribing to topic

In [11]:
from confluent_kafka import Consumer
import uuid
import os
from dotenv import load_dotenv
import snowflake.connector
import os
load_dotenv()

KAFKA_SERVER = os.getenv('KAFKA_SERVER')
KAFKA_USERNAME=os.getenv('KAFKA_USERNAME')
KAFKA_PASSWORD=os.getenv('KAFKA_PASSWORD')
KAFKA_TOPIC_NAME = os.getenv('KAFKA_TOPIC_NAME')

values = []

# Consumer with `confluent_kafka`
c = Consumer({
    'bootstrap.servers': KAFKA_SERVER,
    'group.id': f'deleton' +str(uuid.uuid1()),
    'security.protocol': 'SASL_SSL',
    'sasl.mechanisms': 'PLAIN',
    'sasl.username': KAFKA_USERNAME,
    'sasl.password': KAFKA_PASSWORD,
    'session.timeout.ms': 6000,
    'heartbeat.interval.ms': 1000,
    'fetch.wait.max.ms': 6000,
    'auto.offset.reset': 'latest',
    'enable.auto.commit': 'false',
    'max.poll.interval.ms': '86400000',
    'topic.metadata.refresh.interval.ms': "-1",
    "client.id": 'id-002-005',
})

c.subscribe([KAFKA_TOPIC_NAME])


### Testing Kafka Messages

In [68]:
import json
i = 0
while i < 50:
    kafka_message = c.poll(1)

    if kafka_message is not None:
        
        log = kafka_message.value().decode('utf-8')
        jsoned = json.loads(log)
        print(jsoned)

    i += 1

{'log': '2022-10-04 10:24:47.698653 mendoza v9: [INFO]: Telemetry - hrt = 109; rpm = 49; power = 13.448875699999999\n'}
{'log': '2022-10-04 10:24:48.199335 mendoza v9: [INFO]: Ride - duration = 195.0; resistance = 30\n'}
{'log': '2022-10-04 10:24:48.700018 mendoza v9: [INFO]: Telemetry - hrt = 109; rpm = 46; power = 12.17413263\n'}
{'log': '2022-10-04 10:24:49.200693 mendoza v9: [INFO]: Ride - duration = 196.0; resistance = 30\n'}
{'log': '2022-10-04 10:24:49.701369 mendoza v9: [INFO]: Telemetry - hrt = 109; rpm = 49; power = 13.448875699999999\n'}
{'log': '2022-10-04 10:24:50.202035 mendoza v9: [INFO]: Ride - duration = 197.0; resistance = 30\n'}
{'log': '2022-10-04 10:24:50.702765 mendoza v9: [INFO]: Telemetry - hrt = 109; rpm = 44; power = 11.34454484\n'}
{'log': '2022-10-04 10:24:51.203444 mendoza v9: [INFO]: Ride - duration = 198.0; resistance = 30\n'}
{'log': '2022-10-04 10:24:51.704119 mendoza v9: [INFO]: Telemetry - hrt = 109; rpm = 48; power = 13.02004103\n'}
{'log': '2022-10-

### TODO: 

* Read kafka data into a dataframe (Pandas or PySpark?)
* Write data into Snowflake

In [13]:
USER = os.environ.get('USER')
ACCOUNT = os.environ.get('ACCOUNT')
PASSWORD = os.environ.get('PASSWORD')
WAREHOUSE= os.environ.get('WAREHOUSE')
DATABASE= os.environ.get('DATABASE')
SCHEMA= os.environ.get('SCHEMA')

In [16]:
conn = snowflake.connector.connect(
    user=USER,
    password=PASSWORD,
    account=ACCOUNT,
    warehouse=WAREHOUSE,
    database=DATABASE,
    schema='ZOOKEEPERS_BATCH_PRODUCTION'
)
cs = conn.cursor()


In [18]:
query = "SHOW TABLES"
result = cs.execute(query)
result.fetchmany(100)

[]