# FIT3182 Assignment Part B

### Eu Jia Xin (30881676)

## Task 1c:

### Event Producer 3 
Write a python program that loads all the data from `hotspot_TERRA_streaming.csv` and randomly (with replacement) feeds the data to the stream every 2 seconds. TERRA is another satellite from NASA that reports latitude, longitude, confidence and surface temperature of a location. You will need to append additional information such as **producer information** to identify
the producer and **created date & time**. 

In [1]:
# import relevant libraries 

from time import sleep
from json import dumps
from kafka import KafkaProducer
import random
import pandas as pd
import datetime
import json
from pprint import pprint

ENCODING_FORMAT = 'utf-8'
HOTSPOT_TERRA_STREAMING_PATH = 'datasets/hotspot_TERRA_streaming.csv'


In [6]:
def get_hotspot_terra_streaming_data():
    """
    Convert the CSV file into pandas dataframe, pre-process then convert into JSON.
    """
    # get hotspot terra data as pandas dataframe
    hotspot_terra_streaming_df = pd.read_csv(HOTSPOT_TERRA_STREAMING_PATH)

    # convert hotspot data to a list of JSON objects
    result = hotspot_terra_streaming_df.to_json(orient="records")
    hotspot_terra_streaming_json = json.loads(result)
    
    return hotspot_terra_streaming_json

# check for first record - looks good (additional info to be added in main function)
get_hotspot_terra_streaming_data()[0]

{'latitude': -37.966,
 'longitude': 145.051,
 'confidence': 78,
 'surface_temperature_celcius': 68}

In [3]:
def publish_message(producer_instance, topic_name, data):
    try:
        # directly send data as json because we already specify the JSON serializer in KafkaProducer 
        producer_instance.send(topic_name, value=data)
        print('Message published successfully. ' + str(data))
    except Exception as e:
        print('Exception in publishing message.')
        print(str(e))

In [4]:
def connect_kafka_producer():
    """
    KafkaProducer has extra value_serializer to properly handle json message. 
    Reference: https://kafka-python.readthedocs.io/en/master/usage.html?highlight=json#kafkaproducer
    """
    _producer = None
    try:
        _producer = KafkaProducer(bootstrap_servers=['localhost:9092'],
                                  value_serializer=lambda m: json.dumps(m).encode('ascii'), # produce json messages 
                                  api_version=(0, 10))
    except Exception as e:
        print('Exception while connecting Kafka.')
        print(str(e))
    finally:
        return _producer

In [5]:
if __name__ == '__main__':
    """
    Declare topic and producer instance.
    Randomly gets a hotspot record from hotspot_terra_streaming.csv, and appends the appropriate date and producer id.
    Waits for 2 seconds before publishing next hotspot record.
    To stop the publishing, can interrupt the kernel to trigger a KeyboardInterrupt. 
    
    Handling the time increment:
    Since data uploaded every 2 seconds, and every 10s = 1 day, this means that 5 records are produced in a day.
    To simulate this, each record would take 24/5 = 4.8 hours to be published. 
    """
    topic = 'Hotspot_TERRA'
    print('Publishing records..')
    producer = connect_kafka_producer()
    
    hotspot_terra_streaming_data = get_hotspot_terra_streaming_data()
    
    # based on requirements, our first created date should be the latest date from climate_historic
    date = datetime.datetime(2021, 12, 31)
    
    # add some time to the date before publishing first record (so that it will start on the next day)
    date += datetime.timedelta(hours=19.2)
    
    while True:
        
        for _ in range(5):
            
            date += datetime.timedelta(hours=24/5)
            
            i = random.randrange(0, len(hotspot_terra_streaming_data)) # [0, 2623)

            random_hotspot_terra_streaming_data = hotspot_terra_streaming_data[i]

            # append additional information
            random_hotspot_terra_streaming_data['time'] = date.time().strftime("%H:%M")
            
            random_hotspot_terra_streaming_data['producer_id'] = 'producer_hotspot_terra'

            publish_message(producer_instance=producer, topic_name=topic, data=random_hotspot_terra_streaming_data)

            # 2 seconds interval of publishing data
            sleep(2)


Publishing records..
Message published successfully. {'latitude': -37.9284, 'longitude': 143.108, 'confidence': 92, 'surface_temperature_celcius': 70, 'time': '00:00', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. {'latitude': -37.7236, 'longitude': 143.3995, 'confidence': 99, 'surface_temperature_celcius': 86, 'time': '04:48', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. {'latitude': -37.7851, 'longitude': 141.4593, 'confidence': 52, 'surface_temperature_celcius': 38, 'time': '09:36', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. {'latitude': -38.1748, 'longitude': 143.0552, 'confidence': 69, 'surface_temperature_celcius': 44, 'time': '14:24', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. {'latitude': -37.3009, 'longitude': 143.4777, 'confidence': 52, 'surface_temperature_celcius': 39, 'time': '19:12', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. {'l

Message published successfully. {'latitude': -36.9299, 'longitude': 142.9671, 'confidence': 71, 'surface_temperature_celcius': 46, 'time': '19:12', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. {'latitude': -36.0765, 'longitude': 146.4896, 'confidence': 69, 'surface_temperature_celcius': 46, 'time': '00:00', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. {'latitude': -36.4324, 'longitude': 141.5582, 'confidence': 77, 'surface_temperature_celcius': 52, 'time': '04:48', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. {'latitude': -37.7379, 'longitude': 143.1706, 'confidence': 58, 'surface_temperature_celcius': 40, 'time': '09:36', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. {'latitude': -37.569, 'longitude': 148.024, 'confidence': 87, 'surface_temperature_celcius': 60, 'time': '14:24', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. {'latitude': -37.8644, 'l

Message published successfully. {'latitude': -37.9184, 'longitude': 142.5436, 'confidence': 50, 'surface_temperature_celcius': 42, 'time': '14:24', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. {'latitude': -37.335, 'longitude': 148.064, 'confidence': 88, 'surface_temperature_celcius': 60, 'time': '19:12', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. {'latitude': -36.5418, 'longitude': 144.666, 'confidence': 70, 'surface_temperature_celcius': 45, 'time': '00:00', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. {'latitude': -36.9827, 'longitude': 141.4064, 'confidence': 62, 'surface_temperature_celcius': 41, 'time': '04:48', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. {'latitude': -36.098, 'longitude': 143.74, 'confidence': 92, 'surface_temperature_celcius': 67, 'time': '09:36', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. {'latitude': -36.9071, 'longi

Message published successfully. {'latitude': -37.3601, 'longitude': 145.8519, 'confidence': 82, 'surface_temperature_celcius': 62, 'time': '09:36', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. {'latitude': -36.7317, 'longitude': 142.0162, 'confidence': 64, 'surface_temperature_celcius': 42, 'time': '14:24', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. {'latitude': -36.7381, 'longitude': 141.4541, 'confidence': 84, 'surface_temperature_celcius': 58, 'time': '19:12', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. {'latitude': -37.0884, 'longitude': 141.0357, 'confidence': 81, 'surface_temperature_celcius': 55, 'time': '00:00', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. {'latitude': -37.9309, 'longitude': 143.381, 'confidence': 68, 'surface_temperature_celcius': 44, 'time': '04:48', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. {'latitude': -36.882, 'l

KeyboardInterrupt: 