# Produce NY taxi trips to Kafka

## Setup

In [47]:
import os
from confluent_kafka import SerializingProducer, DeserializingConsumer
from confluent_kafka.serialization import StringSerializer, StringDeserializer
from confluent_kafka.admin import AdminClient, NewTopic
from uuid import uuid4
import sys, random
import csv, json
import time
from datetime import datetime 

In [44]:
BOOTSTRAP_SERVERS = os.environ.get('BOOTSTRAP_SERVERS')
assert BOOTSTRAP_SERVERS is not None, 'BOOTSTRAP_SERVERS must be set'

STOCKS_CSV = "sample.csv"
assert os.path.exists(STOCKS_CSV), f'{STOCKS_CSV} file not found'

STOCKS_TOPIC = 'stocks'

PRODUCER_CONFIG = {
    'bootstrap.servers': BOOTSTRAP_SERVERS,
    'partitioner': 'murmur2_random',
    'key.serializer': StringSerializer('utf_8'),
    'value.serializer':  StringSerializer('utf_8')
}

## Utility functions

In [45]:
def get_topics():
    global BOOTSTRAP_SERVERS

    kafka_broker = {'bootstrap.servers': BOOTSTRAP_SERVERS}
    admin_client = AdminClient(kafka_broker)
    return admin_client.list_topics().topics

def delivery_report(err, msg):
    if err:
        print('Message delivery failed: {}'.format(err))

## Producer

In [46]:
p = SerializingProducer(PRODUCER_CONFIG)

In [42]:
#2023-10-13T08:16:13Z
def construct_stock(row):
    time_stamp = time.time()
    date_time = datetime.fromtimestamp(time_stamp)
    str_date_time = date_time.strftime("%Y-%m-%dT%H:%M:%SZ") #"%d-%m-%Y, %H:%M:%S"
    stock = {"TODO": row[6],
            #  "TODO": float(row[2]),
            "TODO": row[2],
             "timestamp":str_date_time
             }
    return stock

In [50]:
n = 0
with open(STOCKS_CSV) as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    try:
        for row in csv_reader:
            # if n == 10:
            #   break
            stock = construct_stock(row)
            if n % 50 == 0:
               print(f"Produced {n} messages")
            p.poll(0)
            p.produce('stock', value=json.dumps(stock), on_delivery=delivery_report)            
            time.sleep(0.5)
            n = n + 1

        p.flush()
    except BufferError:
        sys.stderr.write('%% Local producer queue is full (%d messages awaiting delivery): try again\n' % len(p))
    

Produced 0 messages
Produced 50 messages
Produced 100 messages
Produced 150 messages
Produced 200 messages
Produced 250 messages
Produced 300 messages
Produced 350 messages
Produced 400 messages
Produced 450 messages
Produced 500 messages
Produced 550 messages
Produced 600 messages
Produced 650 messages
Produced 700 messages
Produced 750 messages
Produced 800 messages
Produced 850 messages
Produced 900 messages
Produced 950 messages
Produced 1000 messages
Produced 1050 messages
Produced 1100 messages
Produced 1150 messages
Produced 1200 messages
Produced 1250 messages
Produced 1300 messages
Produced 1350 messages
Produced 1400 messages
Produced 1450 messages
Produced 1500 messages
Produced 1550 messages
Produced 1600 messages
Produced 1650 messages
Produced 1700 messages
Produced 1750 messages
Produced 1800 messages


### Cleanup

In [28]:
# admin_client = AdminClient({"bootstrap.servers":brokers})
# admin_client.delete_topics(topics=['stock'])

{'stock': <Future at 0x7fb2fb7c9c10 state=running>}