In [1]:
from pprint import pprint
import pandas as pd
import json
from datetime import datetime, timedelta

host_ip = "192.168.0.129"

In [2]:
import csv

#Climate_streaming list contains dictionaries, where each dictionary represents a row of data from the CSV file
climate_streaming = []
with open('Data/climate_streaming.csv', encoding='utf-8') as file1:
    reader = csv.DictReader(file1)
    for row in reader:
        climate_streaming.append(row)   


In [3]:
from time import sleep
from json import dumps
from kafka3 import KafkaProducer
import random
import datetime as dt

In [4]:
#Used to publish messages to a Kafka topic by passing a Kafka producer instance, topic name, key, and data as parameters.
def publish_message(producer_instance, topic_name, key, data):
    try:
        key_bytes = bytes(key, encoding='utf-8')
        producer_instance.send(topic_name, key=key_bytes, value=data)
        producer_instance.flush()
        print('Message published successfully. Data: ' + str(data))
    except Exception as ex:
        print('Exception in publishing message.')
        print(str(ex))

In [5]:
#Used to establish a connection to a Kafka broker and obtain a KafkaProducer instance by calling connect_kafka_producer(). 
#The returned KafkaProducer instance can then be used to publish messages to Kafka topics.
def connect_kafka_producer():
    _producer = None
    try:
        _producer = KafkaProducer(bootstrap_servers=[f'{host_ip}:9092'],
                                  value_serializer=lambda x:dumps(x).encode('ascii'),
                                  api_version=(0, 10))
    except Exception as ex:
        print('Exception while connecting Kafka.')
        print(str(ex))
    finally:
        return _producer

In [None]:
#Publishes each item in the climate_streaming list as a message to the Kafka topic every 10 seconds.
#The data is transformed into a dictionary format and sent to the Kafka broker using the publish_message() function.
#Data sent is appended with producer information for identification purposes
#Datetime is added by 24 hours(1 day) for every 10 seconds
if __name__ == '__main__':
   
    topic = 'Scenario01'
    
    print('Publishing records..')
    producer = connect_kafka_producer()
    
    date = datetime(2023, 1, 1)
    
    for data in climate_streaming:
        date += timedelta(days=1)
        data_to_send = {
                    "latitude": float(data['latitude']),
                    "longitude": float(data['longitude']),
                    "air_temperature_celcius": int(data['air_temperature_celcius']),
                    "relative_humidity": float(data["relative_humidity"]),
                    "windspeed_knots": float(data["windspeed_knots"]),
                    "max_wind_speed": float(data["max_wind_speed"]),
                    "precipitation": data['precipitation '], 
                    "GHI_w/m2": int(data["GHI_w/m2"]),
                    "date": date.strftime("%-d/%-m/%Y"),
                    "datetime": date.isoformat(),
                    "station": 111
                }

        publish_message(producer, topic, 'jsondata', data_to_send)
        sleep(10)

Publishing records..
Message published successfully. Data: {'latitude': -37.623, 'longitude': 149.323, 'air_temperature_celcius': 19, 'relative_humidity': 56.8, 'windspeed_knots': 7.9, 'max_wind_speed': 11.1, 'precipitation': ' 0.00I', 'GHI_w/m2': 154, 'date': '2/1/2023', 'datetime': '2023-01-02T00:00:00', 'station': 111}
Message published successfully. Data: {'latitude': -38.038, 'longitude': 142.986, 'air_temperature_celcius': 15, 'relative_humidity': 50.7, 'windspeed_knots': 9.2, 'max_wind_speed': 13.0, 'precipitation': ' 0.02G', 'GHI_w/m2': 128, 'date': '3/1/2023', 'datetime': '2023-01-03T00:00:00', 'station': 111}
Message published successfully. Data: {'latitude': -37.95, 'longitude': 142.366, 'air_temperature_celcius': 16, 'relative_humidity': 53.6, 'windspeed_knots': 8.1, 'max_wind_speed': 15.0, 'precipitation': ' 0.00G', 'GHI_w/m2': 133, 'date': '4/1/2023', 'datetime': '2023-01-04T00:00:00', 'station': 111}
Message published successfully. Data: {'latitude': -38.231, 'longitude'

Message published successfully. Data: {'latitude': -37.63, 'longitude': 149.232, 'air_temperature_celcius': 18, 'relative_humidity': 57.0, 'windspeed_knots': 7.4, 'max_wind_speed': 15.0, 'precipitation': ' 0.00I', 'GHI_w/m2': 145, 'date': '29/1/2023', 'datetime': '2023-01-29T00:00:00', 'station': 111}
Message published successfully. Data: {'latitude': -37.644, 'longitude': 149.233, 'air_temperature_celcius': 22, 'relative_humidity': 58.0, 'windspeed_knots': 6.9, 'max_wind_speed': 12.0, 'precipitation': ' 0.00I', 'GHI_w/m2': 176, 'date': '30/1/2023', 'datetime': '2023-01-30T00:00:00', 'station': 111}
Message published successfully. Data: {'latitude': -37.642, 'longitude': 149.263, 'air_temperature_celcius': 20, 'relative_humidity': 55.8, 'windspeed_knots': 10.5, 'max_wind_speed': 15.9, 'precipitation': ' 0.01G', 'GHI_w/m2': 163, 'date': '31/1/2023', 'datetime': '2023-01-31T00:00:00', 'station': 111}
Message published successfully. Data: {'latitude': -37.634, 'longitude': 149.237, 'air_t