In [1]:
!pip install kafka-python



In [2]:
from kafka import KafkaConsumer, KafkaProducer
import json
import uuid
import os
import logging
import sys
import time
import multiprocessing
from scipy.fft import fft
import numpy as np


multiprocessing.set_start_method('fork')

In [3]:

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


consumer = KafkaConsumer(os.environ.get('KAFKA_TOPIC', "accelerometer"), 
                         auto_offset_reset = 'earliest',
                         bootstrap_servers = os.environ.get('KAFKA_BROKER', 'broker1:9093').split(","), 
                         group_id = os.environ.get('KAFKA_GROUP_ID', "accelerometer-group"),
                         api_version = (0, 10), 
                         value_deserializer = json.loads,
                         consumer_timeout_ms = 1000)


INFO:kafka.consumer.subscription_state:Updating subscribed topics to: ('accelerometer',)


In [25]:
def consume_messages(messages_by_key):
    sys.stdout.write("Starting\n")
    sys.stdout.flush()
    count = 0
    while True:
        try:
            for message in consumer:
                if count == 0:
                    # sys.stdout.write(f"{str(message)}\n")
                    sys.stdout.write(str(message))
                    sys.stdout.flush()
                if message.key not in messages_by_key:
                    messages_by_key[message.key] = {}
                    
                messages_current_key = messages_by_key[message.key]

                seconds = int(message.timestamp / 1000)
                if seconds not in messages_current_key:
                    messages_current_key[seconds] = []

                messages_current_key_seconds = messages_current_key[seconds]

                messages_current_key_seconds.append([
                    message.timestamp, 
                    message.value.get('x'),
                    message.value.get('y'), 
                    message.value.get('z')
                    ])

                sys.stdout.write(f"\rRead Message {count}")
                count += 1
            # commit offsets so we won't get the same messages again
            consumer.commit()
        except Exception as ex:
            logger.error('Exception in consuming message', exc_info=True)

        time.sleep(0.05)

In [19]:
def process_messages(messages_by_key):
    while True:
        # iterate over each key and group messages by seconds
        # then, process all seconds except the newest one
        for key in messages_by_key.keys():
            seconds = sorted(messages_by_key[key].keys())
            for second in seconds[:-1]:
                # process messages in this second
                # 1 order by timestamp
                sorted_messages = sorted(messages_by_key[key][second], key=lambda x: x[0])
                # 2 extract x, y, z and create numpy arrays
                x = np.array([m[1] for m in sorted_messages])
                y = np.array([m[2] for m in sorted_messages])
                z = np.array([m[3] for m in sorted_messages])
                # 3 compute fft
                x_fft = fft(x, n=10)
                y_fft = fft(y, n=10)
                z_fft = fft(z, n=10)
                # 4 log to console
                print(f"Key: {key}, Second: {second}, X: {x_fft}, Y: {y_fft}, Z: {z_fft}")
                # remove processed messages
                del messages_by_key[key][second]

In [None]:
# start two processes, one for consuming messages and one for processing messages


messages_by_key = multiprocessing.Manager().dict()

p1 = multiprocessing.Process(target=consume_messages, args=(messages_by_key,))
#p2 = multiprocessing.Process(target=process_messages, args=(messages_by_key,))
p1.start()
#p2.start()
p1.join()
p2.join()


Starting


INFO:kafka.conn:<BrokerConnection node_id=bootstrap-0 host=broker1:9093 <connecting> [IPv4 ('172.18.0.7', 9093)]>: connecting to broker1:9093 [('172.18.0.7', 9093) IPv4]
INFO:kafka.conn:<BrokerConnection node_id=bootstrap-0 host=broker1:9093 <connecting> [IPv4 ('172.18.0.7', 9093)]>: Connection complete.
INFO:kafka.cluster:Group coordinator for accelerometer-group is BrokerMetadata(nodeId='coordinator-3', host='broker3', port=9097, rack=None)
INFO:kafka.coordinator:Discovered coordinator coordinator-3 for group accelerometer-group
INFO:kafka.coordinator:Starting new heartbeat thread
INFO:kafka.coordinator.consumer:Revoking previously assigned partitions set() for group accelerometer-group
INFO:kafka.conn:<BrokerConnection node_id=coordinator-3 host=broker3:9097 <connecting> [IPv4 ('172.18.0.6', 9097)]>: connecting to broker3:9097 [('172.18.0.6', 9097) IPv4]
INFO:kafka.conn:<BrokerConnection node_id=coordinator-3 host=broker3:9097 <connecting> [IPv4 ('172.18.0.6', 9097)]>: Connection co