# FIT3182 Assignment Part B

### Eu Jia Xin (30881676)

## Task 1a:

### Event Producer 1 
Write a python program that loads all the data from `climate_streaming.csv` and randomly (with replacement) feed the data to the stream every 10 seconds. You will need to append additional information such as producer information to identify the **producer** and **created date**.

In [2]:
# data modification - every 10s = 1 day

from time import sleep
from json import dumps
from kafka import KafkaProducer
import random
import pandas as pd
import datetime
import json
from pprint import pprint

ENCODING_FORMAT = 'utf-8'
CLIMATE_STREAMING_PATH = 'datasets/climate_streaming.csv'


In [3]:
def get_climate_streaming_data():
    """
    Convert the CSV file into pandas dataframe, pre-process then convert into JSON.
    """
    # get climate data as pandas dataframe
    climate_streaming_df = pd.read_csv(CLIMATE_STREAMING_PATH)
    
    # pre-process dataframe
    
    # strip whitespaces in column names + rename to GHI for easier JSON read after
    climate_streaming_df.rename(columns=lambda x: x.strip(), inplace=True)
    climate_streaming_df.rename(columns={"GHI_w/m2":"GHI"}, inplace=True)

    # # pre-process precipitation into value and flag (2 separate columns)
    result = climate_streaming_df['precipitation'].str.split('(\d.[\d+]*)([A-I])', expand=True)
    result = result.loc[:,[1,2]]
    result.rename(columns={1:'precipitation', 2:'precipitation_flag'}, inplace=True)

    # update climate dataframe with pre-processed precipitation columns
    climate_streaming_df['precipitation'] = pd.to_numeric(result['precipitation'])
    climate_streaming_df.insert(loc=7, column='precipitation_flag', value=result['precipitation_flag'].astype(str))

    # convert climate data to a list of JSON objects
    result = climate_streaming_df.to_json(orient="records")
    climate_streaming_json = json.loads(result)
    
    return climate_streaming_json


In [4]:
def publish_message(producer_instance, topic_name, data):
    try:
        # directly send data as json because we already specify the JSON serializer in KafkaProducer 
        producer_instance.send(topic_name, value=data)
        print('Message published successfully. ' + str(data))
    except Exception as e:
        print('Exception in publishing message.')
        print(str(e))

In [5]:
def connect_kafka_producer():
    """
    KafkaProducer has extra value_serializer to properly handle json message. 
    Reference: https://kafka-python.readthedocs.io/en/master/usage.html?highlight=json#kafkaproducer
    """
    _producer = None
    try:
        _producer = KafkaProducer(bootstrap_servers=['localhost:9092'],
                                  value_serializer=lambda m: json.dumps(m).encode('ascii'), # produce json messages 
                                  api_version=(0, 10))
    except Exception as e:
        print('Exception while connecting Kafka.')
        print(str(e))
    finally:
        return _producer

In [8]:
if __name__ == '__main__':
    """
    Declare topic and producer instance.
    Randomly gets a climate record from climate_streaming.csv, and appends the appropriate date and producer id.
    Waits for 10 seconds before publishing next random climate record. 
    To stop the publishing, can interrupt the kernel to trigger a KeyboardInterrupt. 
    """
    topic = 'Climate'
    print('Publishing records..')
    producer = connect_kafka_producer()
    
    climate_streaming_data = get_climate_streaming_data()
    
    # based on requirements, our first created date should be the latest date from climate_historic
    date = datetime.datetime(2021, 12, 31)
    
    while True:
        i = random.randrange(0, len(climate_streaming_data)) # [0, 366)
        # increment date by 1 day for next data
        date += datetime.timedelta(days=1)
        
        random_climate_streaming_data = climate_streaming_data[i]
        
        # append additional information
        random_climate_streaming_data['date'] = date.date().strftime("%Y-%m-%d")
        random_climate_streaming_data['producer_id'] = 'producer_climate'

        publish_message(producer_instance=producer, topic_name=topic, data=random_climate_streaming_data)
        
        # 10 seconds interval of publishing data
        sleep(10)


Publishing records..
Message published successfully. {'latitude': -37.609, 'longitude': 149.32, 'air_temperature_celcius': 16, 'relative_humidity': 48.3, 'windspeed_knots': 9.4, 'max_wind_speed': 14.0, 'precipitation': 0.01, 'precipitation_flag': 'G', 'GHI': 139, 'date': '2022-01-01', 'producer_id': 'producer_climate'}
Message published successfully. {'latitude': -36.2111, 'longitude': 141.505, 'air_temperature_celcius': 24, 'relative_humidity': 55.5, 'windspeed_knots': 7.9, 'max_wind_speed': 15.0, 'precipitation': 0.0, 'precipitation_flag': 'I', 'GHI': 196, 'date': '2022-01-02', 'producer_id': 'producer_climate'}
Message published successfully. {'latitude': -37.397, 'longitude': 148.121, 'air_temperature_celcius': 13, 'relative_humidity': 44.0, 'windspeed_knots': 13.6, 'max_wind_speed': 21.0, 'precipitation': 0.04, 'precipitation_flag': 'G', 'GHI': 117, 'date': '2022-01-03', 'producer_id': 'producer_climate'}
Message published successfully. {'latitude': -37.331, 'longitude': 143.122, 

Message published successfully. {'latitude': -36.779, 'longitude': 146.108, 'air_temperature_celcius': 15, 'relative_humidity': 51.0, 'windspeed_knots': 9.6, 'max_wind_speed': 15.9, 'precipitation': 0.16, 'precipitation_flag': 'G', 'GHI': 128, 'date': '2022-01-29', 'producer_id': 'producer_climate'}
Message published successfully. {'latitude': -37.467, 'longitude': 148.127, 'air_temperature_celcius': 10, 'relative_humidity': 37.9, 'windspeed_knots': 8.5, 'max_wind_speed': 14.0, 'precipitation': 0.02, 'precipitation_flag': 'G', 'GHI': 94, 'date': '2022-01-30', 'producer_id': 'producer_climate'}
Message published successfully. {'latitude': -37.59, 'longitude': 149.31, 'air_temperature_celcius': 19, 'relative_humidity': 57.2, 'windspeed_knots': 8.9, 'max_wind_speed': 15.0, 'precipitation': 0.98, 'precipitation_flag': 'G', 'GHI': 153, 'date': '2022-01-31', 'producer_id': 'producer_climate'}
Message published successfully. {'latitude': -37.452, 'longitude': 148.115, 'air_temperature_celcius

Message published successfully. {'latitude': -35.6374, 'longitude': 142.3787, 'air_temperature_celcius': 14, 'relative_humidity': 41.6, 'windspeed_knots': 13.1, 'max_wind_speed': 18.1, 'precipitation': 0.0, 'precipitation_flag': 'I', 'GHI': 128, 'date': '2022-02-26', 'producer_id': 'producer_climate'}
Message published successfully. {'latitude': -36.2229, 'longitude': 143.4245, 'air_temperature_celcius': 23, 'relative_humidity': 58.8, 'windspeed_knots': 9.1, 'max_wind_speed': 15.0, 'precipitation': 0.01, 'precipitation_flag': 'G', 'GHI': 183, 'date': '2022-02-27', 'producer_id': 'producer_climate'}
Message published successfully. {'latitude': -38.038, 'longitude': 142.986, 'air_temperature_celcius': 15, 'relative_humidity': 50.7, 'windspeed_knots': 9.2, 'max_wind_speed': 13.0, 'precipitation': 0.02, 'precipitation_flag': 'G', 'GHI': 128, 'date': '2022-02-28', 'producer_id': 'producer_climate'}
Message published successfully. {'latitude': -37.591, 'longitude': 149.33, 'air_temperature_c

Message published successfully. {'latitude': -36.4422, 'longitude': 141.427, 'air_temperature_celcius': 15, 'relative_humidity': 48.1, 'windspeed_knots': 7.9, 'max_wind_speed': 12.0, 'precipitation': 0.0, 'precipitation_flag': 'G', 'GHI': 131, 'date': '2022-03-26', 'producer_id': 'producer_climate'}
Message published successfully. {'latitude': -37.478, 'longitude': 148.117, 'air_temperature_celcius': 11, 'relative_humidity': 43.9, 'windspeed_knots': 11.2, 'max_wind_speed': 16.9, 'precipitation': 0.12, 'precipitation_flag': 'G', 'GHI': 99, 'date': '2022-03-27', 'producer_id': 'producer_climate'}
Message published successfully. {'latitude': -37.336, 'longitude': 148.073, 'air_temperature_celcius': 7, 'relative_humidity': 40.5, 'windspeed_knots': 8.1, 'max_wind_speed': 15.0, 'precipitation': 0.12, 'precipitation_flag': 'G', 'GHI': 65, 'date': '2022-03-28', 'producer_id': 'producer_climate'}
Message published successfully. {'latitude': -36.398, 'longitude': 145.286, 'air_temperature_celciu

Message published successfully. {'latitude': -37.379, 'longitude': 148.132, 'air_temperature_celcius': 12, 'relative_humidity': 44.9, 'windspeed_knots': 7.9, 'max_wind_speed': 11.1, 'precipitation': 0.0, 'precipitation_flag': 'G', 'GHI': 107, 'date': '2022-04-23', 'producer_id': 'producer_climate'}
Message published successfully. {'latitude': -37.238, 'longitude': 141.145, 'air_temperature_celcius': 8, 'relative_humidity': 41.6, 'windspeed_knots': 8.3, 'max_wind_speed': 15.9, 'precipitation': 0.24, 'precipitation_flag': 'G', 'GHI': 73, 'date': '2022-04-24', 'producer_id': 'producer_climate'}
Message published successfully. {'latitude': -37.467, 'longitude': 143.351, 'air_temperature_celcius': 17, 'relative_humidity': 53.4, 'windspeed_knots': 9.4, 'max_wind_speed': 25.1, 'precipitation': 0.16, 'precipitation_flag': 'G', 'GHI': 142, 'date': '2022-04-25', 'producer_id': 'producer_climate'}
Message published successfully. {'latitude': -37.603, 'longitude': 149.324, 'air_temperature_celcius

Message published successfully. {'latitude': -37.1926, 'longitude': 143.8095, 'air_temperature_celcius': 8, 'relative_humidity': 36.3, 'windspeed_knots': 6.3, 'max_wind_speed': 13.0, 'precipitation': 0.01, 'precipitation_flag': 'G', 'GHI': 76, 'date': '2022-05-21', 'producer_id': 'producer_climate'}
Message published successfully. {'latitude': -36.1704, 'longitude': 144.0433, 'air_temperature_celcius': 11, 'relative_humidity': 41.7, 'windspeed_knots': 8.7, 'max_wind_speed': 19.0, 'precipitation': 0.0, 'precipitation_flag': 'I', 'GHI': 101, 'date': '2022-05-22', 'producer_id': 'producer_climate'}
Message published successfully. {'latitude': -36.4422, 'longitude': 141.427, 'air_temperature_celcius': 15, 'relative_humidity': 48.1, 'windspeed_knots': 7.9, 'max_wind_speed': 12.0, 'precipitation': 0.0, 'precipitation_flag': 'G', 'GHI': 131, 'date': '2022-05-23', 'producer_id': 'producer_climate'}
Message published successfully. {'latitude': -37.446, 'longitude': 148.102, 'air_temperature_cel

Message published successfully. {'latitude': -37.415, 'longitude': 148.105, 'air_temperature_celcius': 12, 'relative_humidity': 47.0, 'windspeed_knots': 7.7, 'max_wind_speed': 15.0, 'precipitation': 0.08, 'precipitation_flag': 'G', 'GHI': 105, 'date': '2022-06-18', 'producer_id': 'producer_climate'}
Message published successfully. {'latitude': -36.7084, 'longitude': 142.7354, 'air_temperature_celcius': 13, 'relative_humidity': 44.1, 'windspeed_knots': 12.9, 'max_wind_speed': 19.0, 'precipitation': 0.02, 'precipitation_flag': 'G', 'GHI': 117, 'date': '2022-06-19', 'producer_id': 'producer_climate'}
Message published successfully. {'latitude': -37.335, 'longitude': 148.064, 'air_temperature_celcius': 8, 'relative_humidity': 41.0, 'windspeed_knots': 11.0, 'max_wind_speed': 16.9, 'precipitation': 0.47, 'precipitation_flag': 'G', 'GHI': 74, 'date': '2022-06-20', 'producer_id': 'producer_climate'}
Message published successfully. {'latitude': -36.3114, 'longitude': 142.7605, 'air_temperature_

KeyboardInterrupt: 