In [1]:
from time import sleep
from json import dumps
from kafka import KafkaProducer
import random
import datetime as dt
import pandas

In [2]:
def readCSV():
    climate_streaming_data = pandas.read_csv('hotspot_TERRA_streaming.csv')
    streaming_data = []
    for _, row in climate_streaming_data.iterrows(): # Iterate through each row in the CSV
        data_point = {} # Create dictionary for individual row & format data appropriately.
        data_point['latitude'] = float(row['latitude'])
        data_point['longitude'] = float(row['longitude'])
        data_point['confidence'] = float(row['confidence'])
        data_point['surface_temperature_celcius'] = float(
            row['surface_temperature_celcius'])

        streaming_data.append(data_point)

    return streaming_data

In [3]:
def publish_message(producer_instance, topic_name, data):
    try:
        producer_instance.send(topic_name, value=data)
        producer_instance.flush()
        print('Message published successfully. Data: ' + str(data))
    except Exception as ex:
        print('Exception in publishing message.')
        print(str(ex))


def connect_kafka_producer():
    _producer = None
    try:
        _producer = KafkaProducer( # Added serializer on the producer, which will automatically serialize to JSON string format.
            bootstrap_servers=['192.168.1.5:9092'], value_serializer=lambda x: dumps(x).encode('ascii'), api_version=(0, 10))
    except Exception as ex:
        print('Exception while connecting Kafka.')
        print(str(ex))
    finally:
        return _producer

In [4]:
if __name__ == '__main__':

    data = readCSV()
    topic = 'Hotspot_TERRA'
    producer = connect_kafka_producer()
    created_date = dt.datetime(2021, 12, 31)

    count = 0

    while True:
        count += 3

        random_number = random.randrange(0, len(data))
        selected_data = data[random_number]

        if count > 12: # 10 seconds make up a day with 2 sec interval so 5 iterations as we increase the count by 3 
            created_date += dt.timedelta(days=1) # Set to next day & reset time back to 0 hours, 0 minutes & 0 seconds.
            created_date.replace(hour=0, minute=0, second=0)
            count = 0

        # Every two seconds, I select a random hour (in the space of 4 hours), random minutes & seconds, to make a more simulated flow of data.
        # As the count increases (as it does 4), the next two seconds, will be a random time, keeping the output in sequential & random order.
        created_time = created_date + dt.timedelta(
            hours=(random.randrange(count - 3, count)), # so the prediction will be in range 0,1 , 2 previous predictions
            minutes=(random.randrange(0, 60)),
            seconds=(random.randrange(0, 60)))
        selected_data['created_time'] = created_time.isoformat()
        # print(selected_data['created_time'].strftime("%m/%d/%Y, %H:%M:%S"))
        selected_data['producer_id'] = 'producer_hotspot_terra'

        publish_message(producer, topic, selected_data)

        sleep(2)

Message published successfully. Data: {'latitude': -37.7836, 'longitude': 142.9437, 'confidence': 69.0, 'surface_temperature_celcius': 44.0, 'created_time': '2021-12-31T02:57:01', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. Data: {'latitude': -35.9435, 'longitude': 145.6489, 'confidence': 78.0, 'surface_temperature_celcius': 51.0, 'created_time': '2021-12-31T05:31:07', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. Data: {'latitude': -36.1376, 'longitude': 145.84, 'confidence': 76.0, 'surface_temperature_celcius': 55.0, 'created_time': '2021-12-31T07:04:46', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. Data: {'latitude': -35.7108, 'longitude': 143.7836, 'confidence': 86.0, 'surface_temperature_celcius': 60.0, 'created_time': '2021-12-31T09:36:21', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. Data: {'latitude': -35.2363, 'longitude': 143.0004, 'confidence': 91.0, 'surface_tempe

KeyboardInterrupt: 