In [1]:
!pip install kafka-python

Collecting kafka-python
  Downloading kafka_python-2.0.2-py2.py3-none-any.whl (246 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m246.5/246.5 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: kafka-python
Successfully installed kafka-python-2.0.2


In [6]:
from time import sleep
from json import dumps
from kafka import KafkaProducer
import random
import datetime as dt
import pandas

#host_ip = "192.168.1.5"

In [7]:
# Reading data from CSV
def readCSV():
    climate_streaming_data = pandas.read_csv('climate_streaming.csv') # Get data from CSV
    streaming_data = []
    for _, row in climate_streaming_data.iterrows(): # Iterate through each row in the CSV
        data_point = {} # Create dictionary & create key for each item from CSV data.
        # storing numerical values as float based on the data 
        data_point['latitude'] = float(row['latitude'])
        data_point['longitude'] = float(row['longitude'])
        data_point['air_temperature_celcius'] = float(
            row['air_temperature_celcius'])
        data_point['relative_humidity'] = float(row['relative_humidity'])
        data_point['windspeed_knots'] = float(row['windspeed_knots'])
        data_point['max_wind_speed'] = float(row['max_wind_speed'])

        # Unncessary space at beginning of value is removed.
        # We also split precipation type and amount, to make it easier for sorting/searching later.
        precipitation = str(row['precipitation ']).replace(" ", "")
        data_point['precipitation_type'] = precipitation[-1]
        data_point['precipitation'] = float(precipitation[0:-1])

        data_point['ghi'] = float(row['GHI_w/m2'])

        streaming_data.append(data_point)

    return streaming_data

In [8]:
def publish_message(producer_instance, topic_name, data):
    try:
        producer_instance.send(topic_name, value=data)
        producer_instance.flush()
        print('Message published successfully. Data: ' + str(data))
    except Exception as ex:
        print('Exception in publishing message.')
        print(str(ex))


def connect_kafka_producer():
    _producer = None
    try:
        _producer = KafkaProducer( # Added serializer on the producer, which will automatically serialize to JSON string format.
            bootstrap_servers=['192.168.1.5:9092'], value_serializer=lambda x: dumps(x).encode('ascii'), api_version=(0, 10)) 
    except Exception as ex:
        print('Exception while connecting Kafka.')
        print(str(ex))
    finally:
        return _producer

In [10]:
if __name__ == '__main__':

    data = readCSV()
    topic = 'Climate'
    producer = connect_kafka_producer()
    created_date = dt.datetime(2023, 1, 1) 

    while True:
        random_number = random.randrange(0, len(data))
        selected_data = data[random_number] # Pick a random climate data point.
        created_date += dt.timedelta(days=1) # Increase date from previous date.
        selected_data['created_date'] = created_date.isoformat() # Set date to string format (to be stored in JSON)
        selected_data['producer_id'] = 'producer_climate'

        publish_message(producer, topic, selected_data) # Publish message

        sleep(10)

Message published successfully. Data: {'latitude': -37.856, 'longitude': 143.416, 'air_temperature_celcius': 14.0, 'relative_humidity': 51.0, 'windspeed_knots': 9.1, 'max_wind_speed': 12.0, 'precipitation_type': 'G', 'precipitation': 0.0, 'ghi': 119.0, 'created_date': '2023-01-02T00:00:00', 'producer_id': 'producer_climate'}
Message published successfully. Data: {'latitude': -37.623, 'longitude': 149.284, 'air_temperature_celcius': 21.0, 'relative_humidity': 64.5, 'windspeed_knots': 10.6, 'max_wind_speed': 15.9, 'precipitation_type': 'G', 'precipitation': 1.26, 'ghi': 158.0, 'created_date': '2023-01-03T00:00:00', 'producer_id': 'producer_climate'}
Message published successfully. Data: {'latitude': -37.336, 'longitude': 148.073, 'air_temperature_celcius': 7.0, 'relative_humidity': 40.5, 'windspeed_knots': 8.1, 'max_wind_speed': 15.0, 'precipitation_type': 'G', 'precipitation': 0.12, 'ghi': 65.0, 'created_date': '2023-01-04T00:00:00', 'producer_id': 'producer_climate'}
Message published 

KeyboardInterrupt: 