## Kafka Ingestion

### Required Imports + Subscribing to topic

In [11]:
from confluent_kafka import Consumer
import uuid
import os
from dotenv import load_dotenv
import snowflake.connector
import os
load_dotenv()

KAFKA_SERVER = os.getenv('KAFKA_SERVER')
KAFKA_USERNAME=os.getenv('KAFKA_USERNAME')
KAFKA_PASSWORD=os.getenv('KAFKA_PASSWORD')
KAFKA_TOPIC_NAME = os.getenv('KAFKA_TOPIC_NAME')

values = []

# Consumer with `confluent_kafka`
c = Consumer({
    'bootstrap.servers': KAFKA_SERVER,
    'group.id': f'deleton' +str(uuid.uuid1()),
    'security.protocol': 'SASL_SSL',
    'sasl.mechanisms': 'PLAIN',
    'sasl.username': KAFKA_USERNAME,
    'sasl.password': KAFKA_PASSWORD,
    'session.timeout.ms': 6000,
    'heartbeat.interval.ms': 1000,
    'fetch.wait.max.ms': 6000,
    'auto.offset.reset': 'latest',
    'enable.auto.commit': 'false',
    'max.poll.interval.ms': '86400000',
    'topic.metadata.refresh.interval.ms': "-1",
    "client.id": 'id-002-005',
})

c.subscribe([KAFKA_TOPIC_NAME])


### Testing Kafka Messages

In [12]:
i = 0
while i < 50:
    kafka_message = c.poll(0.5)

    if kafka_message is not None:

        kafka_message_value = kafka_message.value().decode('utf-8')

        print(kafka_message_value)

    i += 1

{"log": "2022-10-04 10:05:10.588793 mendoza v9: [INFO]: Ride - duration = 57.0; resistance = 30\n"}
{"log": "2022-10-04 10:05:11.089457 mendoza v9: [INFO]: Telemetry - hrt = 72; rpm = 32; power = 6.7621935010000005\n"}
{"log": "2022-10-04 10:05:11.590129 mendoza v9: [INFO]: Ride - duration = 58.0; resistance = 30\n"}
{"log": "2022-10-04 10:05:12.090798 mendoza v9: [INFO]: Telemetry - hrt = 77; rpm = 32; power = 6.7621935010000005\n"}
{"log": "2022-10-04 10:05:12.591476 mendoza v9: [INFO]: Ride - duration = 59.0; resistance = 30\n"}
{"log": "2022-10-04 10:05:13.092147 mendoza v9: [INFO]: Telemetry - hrt = 77; rpm = 31; power = 6.415485154\n"}
{"log": "2022-10-04 10:05:13.592819 mendoza v9: [INFO]: Ride - duration = 60.0; resistance = 30\n"}
{"log": "2022-10-04 10:05:14.093491 mendoza v9: [INFO]: Telemetry - hrt = 78; rpm = 32; power = 6.7621935010000005\n"}
{"log": "2022-10-04 10:05:14.594163 mendoza v9: [INFO]: Ride - duration = 61.0; resistance = 30\n"}
{"log": "2022-10-04 10:05:15.09

### TODO: 

* Read kafka data into a dataframe (Pandas or PySpark?)
* Write data into Snowflake

In [13]:
USER = os.environ.get('USER')
ACCOUNT = os.environ.get('ACCOUNT')
PASSWORD = os.environ.get('PASSWORD')
WAREHOUSE= os.environ.get('WAREHOUSE')
DATABASE= os.environ.get('DATABASE')
SCHEMA= os.environ.get('SCHEMA')

In [14]:
conn = snowflake.connector.connect(
    user=USER,
    password=PASSWORD,
    account=ACCOUNT,
    warehouse=WAREHOUSE,
    database=DATABASE
)
cs = conn.cursor()


In [15]:
query = "SHOW SCHEMAS"
result = cs.execute(query)
result.fetchmany(100)

[(datetime.datetime(2022, 9, 29, 1, 9, 44, 900000, tzinfo=<DstTzInfo 'America/Los_Angeles' PDT-1 day, 17:00:00 DST>),
  'BIKE',
  'N',
  'N',
  'DELATON',
  'SYSADMIN',
  '',
  '',
  '1'),
 (datetime.datetime(2022, 10, 4, 3, 5, 27, 865000, tzinfo=<DstTzInfo 'America/Los_Angeles' PDT-1 day, 17:00:00 DST>),
  'INFORMATION_SCHEMA',
  'N',
  'N',
  'DELATON',
  '',
  'Views describing the contents of schemas in this database',
  '',
  '1'),
 (datetime.datetime(2022, 9, 29, 1, 9, 6, 401000, tzinfo=<DstTzInfo 'America/Los_Angeles' PDT-1 day, 17:00:00 DST>),
  'PUBLIC',
  'N',
  'N',
  'DELATON',
  'SYSADMIN',
  '',
  '',
  '1'),
 (datetime.datetime(2022, 10, 4, 3, 4, 46, 918000, tzinfo=<DstTzInfo 'America/Los_Angeles' PDT-1 day, 17:00:00 DST>),
  'ZOOKEEPERS_BATCH_PRODUCTION',
  'N',
  'N',
  'DELATON',
  'SYSADMIN',
  '',
  '',
  '1')]