In [None]:
import json
from kafka import KafkaConsumer
from pyspark import SparkConf, SparkContext
from pyspark.sql import SparkSession
from datetime import datetime, timedelta
from pyspark.sql.functions import from_json,col
from pyspark.sql.types import StructType, StructField, StringType, IntegerType,TimestampType
from pyspark.sql.functions import to_timestamp


# set up the spark configuration and context
conf = SparkConf().setAppName("TemperatureProcessing")
sc = SparkContext(conf=conf)
spark = SparkSession(sc)

# configure the kafka URL 
kafkaParams = {"bootstrap_servers": "ec2-65-0-72-75.ap-south-1.compute.amazonaws.com:9092"}
topic = "IOTTemperatureStream01"

# Create a Kafka consumer to consume messages from the topic
consumer = KafkaConsumer(topic, **kafkaParams)

# define the processing logic for each batch of data
def process(rdd):
    
    data = spark.read.json(rdd)
    data.show()
    # Filter data to only include records from the last 10 minutes
#     last_10_minutes = datetime.now() - timedelta(minutes=10)
#     filtered_data = df.filter(df.timestamp > last_10_minutes)

#     # Filter data to only include records with temperature > 50
#     filtered_data = filtered_data.filter(filtered_data.temperature > 50)

#     # Group the data by component type and count the number of occurrences
#     component_counts = filtered_data.groupBy("component_info.component_type").count()

#     #per lane, which component reported highest temperature
#     max_temp_per_lane = filtered_data.groupBy("lane_number","component_info.component_type").agg({"temperature":"max"}).withColumnRenamed("max(temperature)", "Max Temp")

# #Write the component counts and max temp per lane to a database or other storage solution
# # you need to specify the connection details and the table name, you can use any database type like MySQL,PostgreSQL, MongoDB,Cassandra
#     component_counts.write.format("com.mongodb.spark.sql.DefaultSource")\
#     .option("uri", "mongodb+srv://jaison080:jaison080@cluster0.tym1pnb.mongodb.net/?retryWrites=true&w=majority")\
#     .option("collection", "component_counts")\
#     .mode("append")\
#     .save()

#     max_temp_per_lane.write\
#     .format("com.mongodb.spark.sql.DefaultSource")\
#     .option("uri", "mongodb+srv://jaison080:jaison080@cluster0.tym1pnb.mongodb.net/?retryWrites=true&w=majority")\
#     .option("collection", "max_temp_per_lane")\
#     .mode("append")\
#     .save()
json_schema = StructType([
        StructField("plant_name", StringType()),
        StructField("lane_number", StringType()),
        StructField("timestamp", TimestampType()),
        StructField("temperature", StringType()),
        StructField("component_info", 
                    StructType([
                        StructField("component_type", StringType()),
                        StructField("component_manufacturer", StringType())
                    ])
        )
    ])
while True:
    messages = consumer.poll(1000)
    for tp, message in messages.items():
        for record in message:
            json_data = record.value
            rdd = sc.parallelize([record.value.decode()])
            process(rdd)

In [None]:
import dash
import dash_core_components as dcc
import dash_html_components as html

app = dash.Dash()

In [None]:
app.layout = html.Div([
    html.H1('Report 3: Component Temperature'),
    dcc.Graph(id='component-counts'),
    dcc.Graph(id='max-temp-per-lane')
])

In [None]:
from pyspark.sql.functions import window

@app.callback(
    Output('component-counts', 'figure'),
    [Input('IOTTemperatureStream01', "value")])
def update_component_counts(value):
    data = spark.read.json(value)
    last_10_minutes = datetime.now() - timedelta(minutes=10)
    filtered_data = data.filter(data.timestamp > last_10_minutes)
    filtered_data = filtered_data.filter(filtered_data.temperature > 50)
    component_counts = filtered_data.groupBy("component_info.component_type").count()
    return {'data':[{'x': component_counts['component_info.component_type'], 'y': component_counts['count'], 'type': 'bar'}],
            'layout': {'title': 'Component Counts'}}

@app.callback(
    Output('max-temp-per-lane', 'figure'),
    [Input('IOTTemperatureStream01', "value")])
def update_max_temp_per_lane(value):
    data = spark.read.json(value)
    last_30_minutes = datetime.now() - timedelta(minutes=30)
    filtered_data = data.filter(data.timestamp > last_30_minutes)
    max_temp_per_lane = filtered_data.groupBy("lane_number","component_info.component_type").agg({"temperature":"max"}).withColumnRenamed("max(temperature)","Max Temp")
    return {'data':[{'x': max_temp_per_lane['lane_number'], 'y': max_temp_per_lane['Max Temp'], 'type': 'line'}],
            'layout': {'title': 'Max Temp Per Lane'}}