# Simulating real-time data using Apache Kafka Producers.

In this section we will implement multiple Apache Kafka producers to simulate the real-time streaming of the data which will be processed by Apache Spark Streaming client and then inserted into MongoDB. 


Description: A python program that loads all the data from hotspot_AQUA_streaming.csv and randomly feed the data to the stream every 10-30 seconds. AQUA is the satellite from NASA that reports latitude, longitude, confidence and surface temperature of a location. Some additional information such as sender_id and created_time are also appended.  

In [None]:
# importing required libraries
from time import sleep
from json import dumps
from kafka import KafkaProducer
import random
import datetime as dt
import pandas as pd


#reading in data
df = pd.read_csv("hotspot_AQUA_streaming.csv") # using pandas library to read the data set called hotspot_AQUA_streaming.csv
df = df.astype("object") # casting the data type of the entire dataset to primitive type so that it can be converted to json.


def publish_message(producer_instance, topic_name, data):
    try:
        producer_instance.send(topic_name, value=data)
        print('Message published successfully. Data: ' + str(data))
    except Exception as ex:
        print('Exception in publishing message.')
        print(str(ex))
        
def connect_kafka_producer():
    _producer = None
    try:
        _producer = KafkaProducer(bootstrap_servers=['localhost:9092'],
                                  value_serializer=lambda x: dumps(x).encode('ascii'),
                                  api_version=(0, 10))
    except Exception as ex:
        print('Exception while connecting Kafka.')
        print(str(ex))
    finally:
        return _producer
    
if __name__ == '__main__':
   
    topic = 'hotspot'# to create partitioning of data based on this topic in Kafka cluster
                    # all three producers belong to the same topic 'hotspot'
    print('Publishing records..')
    producer = connect_kafka_producer()

    
    while True:
        r = random.randint(0,len(df)-1)
        # Appending sender_id and created_time:
        data = df.loc[r].append(pd.Series({"created_at":dt.datetime.now().strftime("%X"), "sender_id": "producer_2"}))
        data = data.to_dict()
        publish_message(producer,topic,data)
        # To feed the data to the stream every 10 to 30 seconds:
        sleep(random.randint(10,30))

Publishing records..
Message published successfully. Data: {'confidence': 83, 'longitude': 145.9954, 'created_at': '19:48:22', 'sender_id': 'producer_2', 'latitude': -37.82, 'surface_temperature_celcius': 57}
Message published successfully. Data: {'confidence': 69, 'longitude': 142.9437, 'created_at': '19:48:39', 'sender_id': 'producer_2', 'latitude': -37.7836, 'surface_temperature_celcius': 44}
Message published successfully. Data: {'confidence': 100, 'longitude': 144.6259, 'created_at': '19:49:06', 'sender_id': 'producer_2', 'latitude': -36.6029, 'surface_temperature_celcius': 115}
Message published successfully. Data: {'confidence': 82, 'longitude': 141.4277, 'created_at': '19:49:36', 'sender_id': 'producer_2', 'latitude': -36.3879, 'surface_temperature_celcius': 55}
Message published successfully. Data: {'confidence': 78, 'longitude': 147.1366, 'created_at': '19:49:59', 'sender_id': 'producer_2', 'latitude': -36.7394, 'surface_temperature_celcius': 44}
Message published successfull

Message published successfully. Data: {'confidence': 76, 'longitude': 141.8273, 'created_at': '20:02:36', 'sender_id': 'producer_2', 'latitude': -36.3973, 'surface_temperature_celcius': 49}
Message published successfully. Data: {'confidence': 61, 'longitude': 143.186, 'created_at': '20:03:02', 'sender_id': 'producer_2', 'latitude': -37.6659, 'surface_temperature_celcius': 41}
Message published successfully. Data: {'confidence': 77, 'longitude': 143.3362, 'created_at': '20:03:13', 'sender_id': 'producer_2', 'latitude': -36.1648, 'surface_temperature_celcius': 55}
Message published successfully. Data: {'confidence': 87, 'longitude': 144.1287, 'created_at': '20:03:27', 'sender_id': 'producer_2', 'latitude': -36.3933, 'surface_temperature_celcius': 63}
Message published successfully. Data: {'confidence': 57, 'longitude': 146.0788, 'created_at': '20:03:40', 'sender_id': 'producer_2', 'latitude': -36.9733, 'surface_temperature_celcius': 40}
Message published successfully. Data: {'confidence'