## About this notebook 
This notebook is used to send data to the `green-trips` topic with a 1 second delay to simulate messages being produced and sent to the message queue at a regular interval.

In [2]:
# imports
import time 
import json
import pandas as pd 
from kafka import KafkaProducer

In [3]:
# helper functions
def json_serializer(data):
    return json.dumps(data).encode('utf-8')

def peek(mini_batch, batch_id):
    first_row = mini_batch.take(1)
    if first_row:
        print(first_row[0])

In [4]:
# Configure Producer
# Config producer 
server = 'localhost:9092'

producer = KafkaProducer(
    bootstrap_servers=[server],
    value_serializer=json_serializer
)

# Check connection to producer
producer.bootstrap_connected()

True

In [5]:
# Read in green taxi data; create a list for required columns
filename = '../green_tripdata_2019-10.csv.gz'
df_green = pd.read_csv(filename)

# required columns
sel_cols = [
    'lpep_pickup_datetime',
    'lpep_dropoff_datetime',
    'PULocationID',
    'DOLocationID', 
    'passenger_count',
    'trip_distance',
    'tip_amount'
]

  df_green = pd.read_csv(filename)


In [6]:
# Send data with a delay; only send required columns
delay = 1
t_green0 = time.time()
for row in df_green.itertuples(index=False):
    row_dict = {col: getattr(row, col) for col in row._fields if col in sel_cols}

    producer.send('green-trips', value=row_dict)
    print(f"Sent: {row_dict}")
    time.sleep(delay)

producer.flush()
t_green1 = time.time()

print(f"Time to send green trip data {(t_green1 - t_green0):.2f} seconds")

Sent: {'lpep_pickup_datetime': '2019-10-01 00:26:02', 'lpep_dropoff_datetime': '2019-10-01 00:39:58', 'PULocationID': 112, 'DOLocationID': 196, 'passenger_count': 1.0, 'trip_distance': 5.88, 'tip_amount': 0.0}
Sent: {'lpep_pickup_datetime': '2019-10-01 00:18:11', 'lpep_dropoff_datetime': '2019-10-01 00:22:38', 'PULocationID': 43, 'DOLocationID': 263, 'passenger_count': 1.0, 'trip_distance': 0.8, 'tip_amount': 0.0}
Sent: {'lpep_pickup_datetime': '2019-10-01 00:09:31', 'lpep_dropoff_datetime': '2019-10-01 00:24:47', 'PULocationID': 255, 'DOLocationID': 228, 'passenger_count': 2.0, 'trip_distance': 7.5, 'tip_amount': 0.0}
Sent: {'lpep_pickup_datetime': '2019-10-01 00:37:40', 'lpep_dropoff_datetime': '2019-10-01 00:41:49', 'PULocationID': 181, 'DOLocationID': 181, 'passenger_count': 1.0, 'trip_distance': 0.9, 'tip_amount': 0.0}
Sent: {'lpep_pickup_datetime': '2019-10-01 00:08:13', 'lpep_dropoff_datetime': '2019-10-01 00:17:56', 'PULocationID': 97, 'DOLocationID': 188, 'passenger_count': 1.

Sent: {'lpep_pickup_datetime': '2019-10-01 00:54:17', 'lpep_dropoff_datetime': '2019-10-01 01:17:54', 'PULocationID': 25, 'DOLocationID': 75, 'passenger_count': 1.0, 'trip_distance': 10.3, 'tip_amount': 6.91}
Sent: {'lpep_pickup_datetime': '2019-10-01 00:22:21', 'lpep_dropoff_datetime': '2019-10-01 00:34:36', 'PULocationID': 244, 'DOLocationID': 239, 'passenger_count': 1.0, 'trip_distance': 5.1, 'tip_amount': 2.0}
Sent: {'lpep_pickup_datetime': '2019-10-01 00:04:20', 'lpep_dropoff_datetime': '2019-10-01 00:15:24', 'PULocationID': 82, 'DOLocationID': 226, 'passenger_count': 1.0, 'trip_distance': 2.36, 'tip_amount': 0.0}
Sent: {'lpep_pickup_datetime': '2019-10-01 00:15:36', 'lpep_dropoff_datetime': '2019-10-01 00:30:16', 'PULocationID': 66, 'DOLocationID': 189, 'passenger_count': 1.0, 'trip_distance': 2.54, 'tip_amount': 2.0}
Sent: {'lpep_pickup_datetime': '2019-10-01 00:35:23', 'lpep_dropoff_datetime': '2019-10-01 01:08:45', 'PULocationID': 22, 'DOLocationID': 37, 'passenger_count': 2.0

Sent: {'lpep_pickup_datetime': '2019-10-01 00:04:41', 'lpep_dropoff_datetime': '2019-10-01 00:20:39', 'PULocationID': 74, 'DOLocationID': 239, 'passenger_count': 1.0, 'trip_distance': 3.64, 'tip_amount': 3.71}
Sent: {'lpep_pickup_datetime': '2019-10-01 00:57:24', 'lpep_dropoff_datetime': '2019-10-01 01:08:01', 'PULocationID': 168, 'DOLocationID': 60, 'passenger_count': 1.0, 'trip_distance': 2.43, 'tip_amount': 0.0}
Sent: {'lpep_pickup_datetime': '2019-10-01 00:53:10', 'lpep_dropoff_datetime': '2019-10-01 00:56:50', 'PULocationID': 129, 'DOLocationID': 129, 'passenger_count': 1.0, 'trip_distance': 0.38, 'tip_amount': 0.0}
Sent: {'lpep_pickup_datetime': '2019-10-01 00:57:47', 'lpep_dropoff_datetime': '2019-10-01 01:00:14', 'PULocationID': 41, 'DOLocationID': 42, 'passenger_count': 4.0, 'trip_distance': 0.6, 'tip_amount': 3.0}
Sent: {'lpep_pickup_datetime': '2019-10-01 00:34:46', 'lpep_dropoff_datetime': '2019-10-01 00:44:34', 'PULocationID': 260, 'DOLocationID': 7, 'passenger_count': 1.0

Sent: {'lpep_pickup_datetime': '2019-10-01 00:15:31', 'lpep_dropoff_datetime': '2019-10-01 00:19:25', 'PULocationID': 256, 'DOLocationID': 255, 'passenger_count': 1.0, 'trip_distance': 0.62, 'tip_amount': 4.2}
Sent: {'lpep_pickup_datetime': '2019-10-01 00:48:14', 'lpep_dropoff_datetime': '2019-10-01 00:53:53', 'PULocationID': 129, 'DOLocationID': 260, 'passenger_count': 1.0, 'trip_distance': 0.68, 'tip_amount': 0.0}
Sent: {'lpep_pickup_datetime': '2019-10-01 00:16:45', 'lpep_dropoff_datetime': '2019-10-01 00:31:01', 'PULocationID': 74, 'DOLocationID': 235, 'passenger_count': 1.0, 'trip_distance': 3.74, 'tip_amount': 0.0}
Sent: {'lpep_pickup_datetime': '2019-10-01 00:10:08', 'lpep_dropoff_datetime': '2019-10-01 00:15:52', 'PULocationID': 24, 'DOLocationID': 152, 'passenger_count': 1.0, 'trip_distance': 1.28, 'tip_amount': 1.95}
Sent: {'lpep_pickup_datetime': '2019-10-01 00:12:45', 'lpep_dropoff_datetime': '2019-10-01 00:13:00', 'PULocationID': 92, 'DOLocationID': 92, 'passenger_count': 

Sent: {'lpep_pickup_datetime': '2019-10-01 00:48:33', 'lpep_dropoff_datetime': '2019-10-01 00:53:42', 'PULocationID': 74, 'DOLocationID': 168, 'passenger_count': 1.0, 'trip_distance': 1.24, 'tip_amount': 2.34}
Sent: {'lpep_pickup_datetime': '2019-10-01 00:03:09', 'lpep_dropoff_datetime': '2019-10-01 00:28:01', 'PULocationID': 82, 'DOLocationID': 75, 'passenger_count': 1.0, 'trip_distance': 8.19, 'tip_amount': 0.0}
Sent: {'lpep_pickup_datetime': '2019-10-01 00:02:26', 'lpep_dropoff_datetime': '2019-10-01 00:12:24', 'PULocationID': 74, 'DOLocationID': 69, 'passenger_count': 2.0, 'trip_distance': 2.28, 'tip_amount': 2.0}
Sent: {'lpep_pickup_datetime': '2019-10-01 00:25:15', 'lpep_dropoff_datetime': '2019-10-01 00:47:25', 'PULocationID': 129, 'DOLocationID': 28, 'passenger_count': 1.0, 'trip_distance': 9.22, 'tip_amount': 0.0}
Sent: {'lpep_pickup_datetime': '2019-10-01 00:44:27', 'lpep_dropoff_datetime': '2019-10-01 00:47:17', 'PULocationID': 223, 'DOLocationID': 223, 'passenger_count': 1.

Sent: {'lpep_pickup_datetime': '2019-10-01 00:05:53', 'lpep_dropoff_datetime': '2019-10-01 00:12:36', 'PULocationID': 260, 'DOLocationID': 223, 'passenger_count': 1.0, 'trip_distance': 2.13, 'tip_amount': 1.96}
Sent: {'lpep_pickup_datetime': '2019-10-01 00:25:39', 'lpep_dropoff_datetime': '2019-10-01 00:31:54', 'PULocationID': 95, 'DOLocationID': 95, 'passenger_count': 1.0, 'trip_distance': 1.21, 'tip_amount': 0.0}
Sent: {'lpep_pickup_datetime': '2019-10-01 00:08:36', 'lpep_dropoff_datetime': '2019-10-01 00:19:27', 'PULocationID': 41, 'DOLocationID': 244, 'passenger_count': 1.0, 'trip_distance': 2.34, 'tip_amount': 0.0}
Sent: {'lpep_pickup_datetime': '2019-10-01 00:29:22', 'lpep_dropoff_datetime': '2019-10-01 00:45:14', 'PULocationID': 25, 'DOLocationID': 188, 'passenger_count': 1.0, 'trip_distance': 2.6, 'tip_amount': 2.85}
Sent: {'lpep_pickup_datetime': '2019-10-01 00:13:24', 'lpep_dropoff_datetime': '2019-10-01 00:19:39', 'PULocationID': 74, 'DOLocationID': 262, 'passenger_count': 1

Sent: {'lpep_pickup_datetime': '2019-10-01 01:46:06', 'lpep_dropoff_datetime': '2019-10-01 01:46:24', 'PULocationID': 152, 'DOLocationID': 152, 'passenger_count': 1.0, 'trip_distance': 0.0, 'tip_amount': 0.0}
Sent: {'lpep_pickup_datetime': '2019-10-01 01:50:48', 'lpep_dropoff_datetime': '2019-10-01 02:09:38', 'PULocationID': 25, 'DOLocationID': 165, 'passenger_count': 1.0, 'trip_distance': 4.63, 'tip_amount': 0.0}
Sent: {'lpep_pickup_datetime': '2019-10-01 01:21:14', 'lpep_dropoff_datetime': '2019-10-01 01:25:46', 'PULocationID': 42, 'DOLocationID': 41, 'passenger_count': 1.0, 'trip_distance': 1.07, 'tip_amount': 0.0}
Sent: {'lpep_pickup_datetime': '2019-10-01 01:32:57', 'lpep_dropoff_datetime': '2019-10-01 01:47:32', 'PULocationID': 166, 'DOLocationID': 166, 'passenger_count': 1.0, 'trip_distance': 1.95, 'tip_amount': 0.0}
Sent: {'lpep_pickup_datetime': '2019-10-01 01:50:05', 'lpep_dropoff_datetime': '2019-10-01 01:51:50', 'PULocationID': 166, 'DOLocationID': 152, 'passenger_count': 1

Sent: {'lpep_pickup_datetime': '2019-10-01 01:03:32', 'lpep_dropoff_datetime': '2019-10-01 01:11:04', 'PULocationID': 82, 'DOLocationID': 173, 'passenger_count': 1.0, 'trip_distance': 1.57, 'tip_amount': 0.0}
Sent: {'lpep_pickup_datetime': '2019-10-01 01:34:05', 'lpep_dropoff_datetime': '2019-10-01 01:40:26', 'PULocationID': 129, 'DOLocationID': 129, 'passenger_count': 1.0, 'trip_distance': 0.8, 'tip_amount': 0.0}
Sent: {'lpep_pickup_datetime': '2019-10-01 01:50:11', 'lpep_dropoff_datetime': '2019-10-01 02:03:01', 'PULocationID': 78, 'DOLocationID': 182, 'passenger_count': 1.0, 'trip_distance': 2.28, 'tip_amount': 0.0}
Sent: {'lpep_pickup_datetime': '2019-10-01 01:59:17', 'lpep_dropoff_datetime': '2019-10-01 02:09:22', 'PULocationID': 92, 'DOLocationID': 252, 'passenger_count': 2.0, 'trip_distance': 2.38, 'tip_amount': 0.0}
Sent: {'lpep_pickup_datetime': '2019-10-01 01:04:21', 'lpep_dropoff_datetime': '2019-10-01 01:14:51', 'PULocationID': 129, 'DOLocationID': 82, 'passenger_count': 1.

KeyboardInterrupt: 