In [1]:
import json
import uuid
import pandas as pd
import time

from kafka import KafkaProducer, KafkaAdminClient
from kafka.admin.new_topic import NewTopic
from kafka.errors import TopicAlreadyExistsError

### Configuration Parameters 

> **TODO:** Change the configuration prameters to the appropriate values for your setup.

In [2]:
config = dict(
    bootstrap_servers=['kafka.kafka.svc.cluster.local:9092'],
    first_name='Kyle',
    last_name='Morris'
)

config['client_id'] = '{}{}'.format(
    config['last_name'], 
    config['first_name']
)
config['topic_prefix'] = '{}{}'.format(
    config['last_name'], 
    config['first_name']
)

config

{'bootstrap_servers': ['kafka.kafka.svc.cluster.local:9092'],
 'first_name': 'Kyle',
 'last_name': 'Morris',
 'client_id': 'MorrisKyle',
 'topic_prefix': 'MorrisKyle'}

### Create Topic Utility Function

The `create_kafka_topic` helps create a Kafka topic based on your configuration settings.  For instance, if your first name is *John* and your last name is *Doe*, `create_kafka_topic('locations')` will create a topic with the name `DoeJohn-locations`.  The function will not create the topic if it already exists. 

In [3]:
def create_kafka_topic(topic_name, config=config, num_partitions=1, replication_factor=1):
    bootstrap_servers = config['bootstrap_servers']
    client_id = config['client_id']
    topic_prefix = config['topic_prefix']
    name = '{}-{}'.format(topic_prefix, topic_name)
    
    admin_client = KafkaAdminClient(
        bootstrap_servers=bootstrap_servers, 
        client_id=client_id
    )
    
    topic = NewTopic(
        name=name,
        num_partitions=num_partitions,
        replication_factor=replication_factor
    )

    topic_list = [topic]
    try:
        admin_client.create_topics(new_topics=topic_list)
        print('Created topic "{}"'.format(name))
    except TopicAlreadyExistsError as e:
        print('Topic "{}" already exists'.format(name))
    
create_kafka_topic('locations')
create_kafka_topic('accelerations')

Topic "MorrisKyle-locations" already exists
Topic "MorrisKyle-accelerations" already exists


### Kafka Producer

The following code creates a `KafkaProducer` object which you can use to send Python objects that are serialized as JSON.

**Note:** This producer serializes Python objects as JSON. This means that object must be JSON serializable.  As an example, Python `DateTime` values are not JSON serializable and must be converted to a string (e.g. ISO 8601) or a numeric value (e.g. a Unix timestamp) before being sent.

In [4]:
producer = KafkaProducer(
  bootstrap_servers=config['bootstrap_servers'],
  value_serializer=lambda x: json.dumps(x).encode('utf-8')
)

### Send Data Function

The `send_data` function sends a Python object to a Kafka topic. This function adds the `topic_prefix` to the topic so `send_data('locations', data)` sends a JSON serialized message to `DoeJohn-locations`. The function also registers callbacks to let you know if the message has been sent or if an error has occured. 

In [5]:
def on_send_success(record_metadata):
    print('Message sent:\n    Topic: "{}"\n    Partition: {}\n    Offset: {}'.format(
        record_metadata.topic,
        record_metadata.partition,
        record_metadata.offset
    ))
    
def on_send_error(excp):
    print('I am an errback', exc_info=excp)
    # handle exception

def send_data(topic, data, config=config, producer=producer, msg_key=None):
    topic_prefix = config['topic_prefix']
    topic_name = '{}-{}'.format(topic_prefix, topic)
    
    if msg_key is not None:
        key = msg_key
    else:
        key = uuid.uuid4().hex
    
    producer.send(
        topic_name, 
        value=data,
        key=key.encode('utf-8')
    ).add_callback(on_send_success).add_errback(on_send_error)

In [6]:
example_data = dict(
    key1='value1',
    key2='value2'
)

send_data('locations', example_data)

Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1771


In [7]:
import os, glob
from pathlib import Path

current_dir = Path(os.getcwd()).absolute()
data_dir = current_dir.joinpath('data')
loc_dir = data_dir.joinpath('locations')
acc_dir = data_dir.joinpath('accelerations')

timeList = []
dirList = []
for file__ in os.listdir(loc_dir):
    timeList.append(float(file__[-5:]))
    dirList.append(file__)
timeList.sort()
dirList.sort()

# This section of code creates our list of directories and pulls the timestamps. These
# two lists are basically one that's just the directory name, for loading in the file system, and
# the other is just the integer time values which we use to run in real time.

In [8]:
final = max(timeList) # This is the last timestamp.
counter = 0
start = time.perf_counter()
elapsed = 0

while (elapsed) <= final:
    if elapsed >= timeList[counter]:
        clocdir = loc_dir.joinpath(dirList[counter])
        caccdir = acc_dir.joinpath(dirList[counter])
        for filename in glob.glob(os.path.join(clocdir, '*.parquet')):
            with open(filename, 'r') as f:
                message = {}
                df = pd.read_parquet(filename)
                df['timestamp'] = df['timestamp'].astype(str)
                df['timelapse'] = df['timelapse'].astype(str)
                for row in df:
                    message[row] = df[row][0]

                send_data('locations', message)

        for filename in glob.glob(os.path.join(caccdir, '*.parquet')):
            with open(filename, 'r') as f:
                message = {}
                df = pd.read_parquet(filename)
                df['timestamp'] = df['timestamp'].astype(str)
                df['timelapse'] = df['timelapse'].astype(str)
                for row in df:
                    message[row] = df[row][0]
                send_data('accelerations', message)

        print('Completed: ', timeList[counter])
        counter += 1
      
    elapsed = time.perf_counter() - start
    
clocdir = loc_dir.joinpath(dirList[counter])
caccdir = acc_dir.joinpath(dirList[counter])
for filename in glob.glob(os.path.join(clocdir, '*.parquet')):
    with open(filename, 'r') as f:
        message = {}
        df = pd.read_parquet(filename)
        df['timestamp'] = df['timestamp'].astype(str)
        df['timelapse'] = df['timelapse'].astype(str)
        for row in df:
            message[row] = df[row][0]

        send_data('locations', message)

for filename in glob.glob(os.path.join(caccdir, '*.parquet')):
    with open(filename, 'r') as f:
        message = {}
        df = pd.read_parquet(filename)
        df['timestamp'] = df['timestamp'].astype(str)
        df['timelapse'] = df['timelapse'].astype(str)
        for row in df:
            message[row] = df[row][0]
        send_data('accelerations', message)
print('Completed: ', timeList[counter])

Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1772
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1773
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 1805
Completed:  0.0
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 1806
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1774
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1775
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 1807
Completed:  4.5
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 1808
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1776
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1777
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 1809
Completed:  7.8
Message sent:
    Topic: "Morr

Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 1847
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 1848
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 1849
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 1850
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 1851
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 1852
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 1853
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 1854
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 1855
Completed:  49.5
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 1856
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1826
Message sent:
    Topic: "MorrisKyle-loca

Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 1906
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1874
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1875
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1876
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1877
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1878
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1879
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1880
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1881
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1882
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1883
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1884
Message sent

Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1930
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1931
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1932
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1933
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1934
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1935
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1936
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1937
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1938
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1939
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1940
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1941
Message sent:
  

Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1976
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1977
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1978
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1979
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1980
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1981
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1982
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1983
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1984
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1985
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1986
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 1987
Message sent:
  

Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 2057
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 2058
Completed:  81.4
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 2059
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 2026
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 2027
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 2028
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 2029
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 2030
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 2031
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 2032
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 2033
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    

Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 2110
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 2111
Completed:  88.3
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 2112
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 2073
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 2074
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 2075
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 2076
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 2077
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 2078
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 2079
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 2080
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    

Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 2158
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 2159
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 2160
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 2161
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 2162
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 2163
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 2164
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 2165
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 2166
Completed:  98.8
Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 2167
Message sent:
    Topic: "MorrisKyle-locations"
    Partition: 0
    Offset: 2126
Message sent:
    Topic: "MorrisKyle-loca

Message sent:
    Topic: "MorrisKyle-accelerations"
    Partition: 0
    Offset: 2213
