# Debug

This notebook is used for debugging purposes, and to view and follow Kafka topics.

In [2]:
%%bash
# Ensure the required Python 3 dependencies are installed.
python3 -m pip install kafka-python

Collecting kafka-python
  Downloading kafka_python-2.0.2-py2.py3-none-any.whl (246 kB)
Installing collected packages: kafka-python
Successfully installed kafka-python-2.0.2


## Show the last 10 messages on a Kafka topic

In [3]:
import json
from datetime import datetime

from kafka import KafkaConsumer, TopicPartition

client = "localhost:9092"
consumer = KafkaConsumer(bootstrap_servers=['localhost:9092'],
                         enable_auto_commit=False,
                         auto_offset_reset='latest')

#
#  CHANGE THIS TO THE TOPIC YOU WANT TO DEBUG
#
topic = 'ingest-cleaned'


tp = TopicPartition(topic,0)
consumer.assign([tp])

consumer.seek_to_end(tp)
lastOffset = consumer.position(tp)
print(f"Last offset: {lastOffset}")

tailOffset = lastOffset - 10
if tailOffset < 0:
    print("Topic has no messages!")
else :
    consumer.seek(tp, tailOffset)    

    for message in consumer:
        print(f"RAW MESSAGE VALUE: {message.value}")
        try:
            m_obj = json.loads(message.value)
            print("RECODED JSON:", json.dumps(m_obj, indent=4))
        except:
            print("FAILED to decode message")
        ts = message.timestamp
        ts = datetime.fromtimestamp(ts/1000.0)
        print(f"TIMESTAMP: {ts}")
        print()
        if message.offset >= lastOffset - 1:
                break

Last offset: 70
RAW MESSAGE VALUE: b'{"lat":40.297875899999994,"lng":-75.5812935,"desc":"REINDEER CT & DEAD END;  NEW HANOVER; Station 332; 2015-12-10 @ 17:10:52;","zip":19525,"title":"EMS: BACK PAINS/INJURY","timeStamp":"2015-12-10T17:10:52.000Z","twp":"NEW HANOVER","addr":"REINDEER CT & DEAD END","e":1,"majorTitle":"EMS","minorTitle":" BACK PAINS/INJURY"}'
RECODED JSON: {
    "lat": 40.297875899999994,
    "lng": -75.5812935,
    "desc": "REINDEER CT & DEAD END;  NEW HANOVER; Station 332; 2015-12-10 @ 17:10:52;",
    "zip": 19525,
    "title": "EMS: BACK PAINS/INJURY",
    "timeStamp": "2015-12-10T17:10:52.000Z",
    "twp": "NEW HANOVER",
    "addr": "REINDEER CT & DEAD END",
    "e": 1,
    "majorTitle": "EMS",
    "minorTitle": " BACK PAINS/INJURY"
}
TIMESTAMP: 2020-11-26 18:47:32.306000

RAW MESSAGE VALUE: b'{"lat":40.2580614,"lng":-75.26467990000002,"desc":"BRIAR PATH & WHITEMARSH LN;  HATFIELD TOWNSHIP; Station 345; 2015-12-10 @ 17:29:21;","zip":19446,"title":"EMS: DIABETIC EMER

## Follow a Kafka topic

This script prints the content of messages of a Kafka topic when they get produced.

In [4]:
import json
from datetime import datetime
from IPython.display import clear_output

from kafka import KafkaConsumer, TopicPartition

client = "localhost:9092"
consumer = KafkaConsumer(bootstrap_servers=['localhost:9092'],
                         enable_auto_commit=False,
                         auto_offset_reset='latest')

#
#  CHANGE THIS TO THE TOPIC YOU WANT TO DEBUG
#
topic = 'ingest-cleaned'

consumer.subscribe(topic)

i = 0

print(f"START LISTENING ON '{topic}'")
for message in consumer:
    if i > 500:
        clear_output()
        i = 0
    i = i+1
    print(f"RAW MESSAGE VALUE: {message.value}")
    try:
        m_obj = json.loads(message.value)
        print("DECODED MESSAGE VALUE:", m_obj)
    except:
        print("FAILED to decode message")
    ts = message.timestamp
    ts = datetime.fromtimestamp(ts/1000.0)
    print(f"TIMESTAMP: {ts}")
    print()


START LISTENING ON 'ingest-cleaned'


KeyboardInterrupt: 