# FIT3182 Assignment Part B

### Eu Jia Xin (30881676)

## Task 1b:

### Event Producer 2 
Write a python program that loads all the data from `hotspot_AQUA_streaming.csv` and randomly (with replacement) feed the data to the stream every 2 seconds. AQUA is the satellite from NASA that reports latitude, longitude, confidence and surface temperature of a location. You will need to append additional information such as **producer information** to identify the
producer and **created date & time**.


In [1]:
# import relevant libraries 

from time import sleep
from json import dumps
from kafka import KafkaProducer
import random
import pandas as pd
import datetime
import json
from pprint import pprint

ENCODING_FORMAT = 'utf-8'
HOTSPOT_AQUA_STREAMING_PATH = 'datasets/hotspot_AQUA_streaming.csv'


In [2]:
def get_hotspot_aqua_streaming_data():
    """
    Convert the CSV file into pandas dataframe, pre-process then convert into JSON.
    """
    # get hotspot aqua data as pandas dataframe
    hotspot_aqua_streaming_df = pd.read_csv(HOTSPOT_AQUA_STREAMING_PATH)

    # convert hotspot data to a list of JSON objects
    result = hotspot_aqua_streaming_df.to_json(orient="records")
    hotspot_aqua_streaming_json = json.loads(result)
    
    return hotspot_aqua_streaming_json

# check for first record - looks good (additional info to be added in main function)
get_hotspot_aqua_streaming_data()[0]

{'latitude': -37.623,
 'longitude': 149.323,
 'confidence': 51,
 'surface_temperature_celcius': 38}

In [3]:
def publish_message(producer_instance, topic_name, data):
    try:
        # directly send data as json because we already specify the JSON serializer in KafkaProducer 
        producer_instance.send(topic_name, value=data)
        print('Message published successfully. ' + str(data))
    except Exception as e:
        print('Exception in publishing message.')
        print(str(e))

In [4]:
def connect_kafka_producer():
    """
    KafkaProducer has extra value_serializer to properly handle json message. 
    Reference: https://kafka-python.readthedocs.io/en/master/usage.html?highlight=json#kafkaproducer
    """
    _producer = None
    try:
        _producer = KafkaProducer(bootstrap_servers=['localhost:9092'],
                                  value_serializer=lambda m: json.dumps(m).encode('ascii'), # produce json messages 
                                  api_version=(0, 10))
    except Exception as e:
        print('Exception while connecting Kafka.')
        print(str(e))
    finally:
        return _producer

In [5]:
if __name__ == '__main__':
    """
    Declare topic and producer instance.
    Randomly gets a hotspot record from hotspot_aqua_streaming.csv, and appends the appropriate date and producer id.
    Waits for 2 seconds before publishing next hotspot record.
    To stop the publishing, can interrupt the kernel to trigger a KeyboardInterrupt. 
    
    Handling the time increment:
    Since data uploaded every 2 seconds, and every 10s = 1 day, this means that 5 records are produced in a day.
    To simulate this, each record would take 24/5 = 4.8 hours to be published. 
    """
    topic = 'Hotspot_AQUA'
    print('Publishing records..')
    producer = connect_kafka_producer()
    
    hotspot_aqua_streaming_data = get_hotspot_aqua_streaming_data()
    
    # based on requirements, our first created date should be the latest date from climate_historic
    date = datetime.datetime(2021, 12, 31)
    
    # add some time to the date before publishing first record (so that it will start on the next day)
    date += datetime.timedelta(hours=19.2)
    
    while True:
        
        for _ in range(5):
            
            date += datetime.timedelta(hours=24/5)
            
            i = random.randrange(0, len(hotspot_aqua_streaming_data)) # [0, 2644)

            random_hotspot_aqua_streaming_data = hotspot_aqua_streaming_data[i]

            # append additional information
            # only time is needed in producer (not date) - this is clarified during consultation
            random_hotspot_aqua_streaming_data['time'] = date.time().strftime("%H:%M")

            random_hotspot_aqua_streaming_data['producer_id'] = 'producer_hotspot_aqua'

            publish_message(producer_instance=producer, topic_name=topic, data=random_hotspot_aqua_streaming_data)

            # 2 seconds interval of publishing data
            sleep(2)


Publishing records..
Message published successfully. {'latitude': -36.5548, 'longitude': 142.5237, 'confidence': 70, 'surface_temperature_celcius': 45, 'time': '00:00', 'producer_id': 'producer_hotspot_aqua'}
Message published successfully. {'latitude': -36.4606, 'longitude': 141.0562, 'confidence': 55, 'surface_temperature_celcius': 39, 'time': '04:48', 'producer_id': 'producer_hotspot_aqua'}
Message published successfully. {'latitude': -36.0374, 'longitude': 143.8136, 'confidence': 86, 'surface_temperature_celcius': 61, 'time': '09:36', 'producer_id': 'producer_hotspot_aqua'}
Message published successfully. {'latitude': -37.9091, 'longitude': 143.5105, 'confidence': 87, 'surface_temperature_celcius': 70, 'time': '14:24', 'producer_id': 'producer_hotspot_aqua'}
Message published successfully. {'latitude': -37.4022, 'longitude': 143.2016, 'confidence': 69, 'surface_temperature_celcius': 45, 'time': '19:12', 'producer_id': 'producer_hotspot_aqua'}
Message published successfully. {'latit

Message published successfully. {'latitude': -37.7379, 'longitude': 143.1706, 'confidence': 58, 'surface_temperature_celcius': 40, 'time': '19:12', 'producer_id': 'producer_hotspot_aqua'}
Message published successfully. {'latitude': -36.0313, 'longitude': 143.6296, 'confidence': 72, 'surface_temperature_celcius': 47, 'time': '00:00', 'producer_id': 'producer_hotspot_aqua'}
Message published successfully. {'latitude': -35.0821, 'longitude': 142.4532, 'confidence': 83, 'surface_temperature_celcius': 57, 'time': '04:48', 'producer_id': 'producer_hotspot_aqua'}
Message published successfully. {'latitude': -36.8373, 'longitude': 146.1811, 'confidence': 100, 'surface_temperature_celcius': 50, 'time': '09:36', 'producer_id': 'producer_hotspot_aqua'}
Message published successfully. {'latitude': -35.89, 'longitude': 145.606, 'confidence': 84, 'surface_temperature_celcius': 59, 'time': '14:24', 'producer_id': 'producer_hotspot_aqua'}
Message published successfully. {'latitude': -36.1569, 'longit

Message published successfully. {'latitude': -37.017, 'longitude': 148.1297, 'confidence': 100, 'surface_temperature_celcius': 121, 'time': '14:24', 'producer_id': 'producer_hotspot_aqua'}
Message published successfully. {'latitude': -37.3847, 'longitude': 142.8935, 'confidence': 100, 'surface_temperature_celcius': 88, 'time': '19:12', 'producer_id': 'producer_hotspot_aqua'}
Message published successfully. {'latitude': -37.9292, 'longitude': 143.3923, 'confidence': 79, 'surface_temperature_celcius': 52, 'time': '00:00', 'producer_id': 'producer_hotspot_aqua'}
Message published successfully. {'latitude': -37.586, 'longitude': 148.038, 'confidence': 77, 'surface_temperature_celcius': 47, 'time': '04:48', 'producer_id': 'producer_hotspot_aqua'}
Message published successfully. {'latitude': -36.0374, 'longitude': 143.8136, 'confidence': 86, 'surface_temperature_celcius': 61, 'time': '09:36', 'producer_id': 'producer_hotspot_aqua'}
Message published successfully. {'latitude': -37.4996, 'long

Message published successfully. {'latitude': -36.1556, 'longitude': 141.5904, 'confidence': 89, 'surface_temperature_celcius': 65, 'time': '09:36', 'producer_id': 'producer_hotspot_aqua'}
Message published successfully. {'latitude': -36.7871, 'longitude': 146.9604, 'confidence': 65, 'surface_temperature_celcius': 42, 'time': '14:24', 'producer_id': 'producer_hotspot_aqua'}
Message published successfully. {'latitude': -36.942, 'longitude': 143.292, 'confidence': 67, 'surface_temperature_celcius': 54, 'time': '19:12', 'producer_id': 'producer_hotspot_aqua'}
Message published successfully. {'latitude': -37.8662, 'longitude': 143.251, 'confidence': 71, 'surface_temperature_celcius': 46, 'time': '00:00', 'producer_id': 'producer_hotspot_aqua'}
Message published successfully. {'latitude': -37.4463, 'longitude': 142.7829, 'confidence': 92, 'surface_temperature_celcius': 70, 'time': '04:48', 'producer_id': 'producer_hotspot_aqua'}
Message published successfully. {'latitude': -36.8283, 'longitu

KeyboardInterrupt: 