In [1]:
import json
from kafka import KafkaConsumer
from pyspark import SparkConf, SparkContext
from pyspark.sql import SparkSession
from datetime import datetime, timedelta
from pyspark.sql.functions import from_json, col, when
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, TimestampType
from pyspark.sql.functions import to_timestamp, from_unixtime, unix_timestamp
from pymongo import MongoClient
from pyspark.sql.functions import max, min, avg
from pyspark.sql.window import Window

In [2]:
conf = SparkConf().setAppName("TemperatureProcessing")
sc = SparkContext(conf=conf)
spark = SparkSession(sc)


23/01/12 16:29:18 WARN Utils: Your hostname, user-hp-pavilion-gaming-laptop-15-ec2xxx resolves to a loopback address: 127.0.1.1; using 192.168.184.92 instead (on interface wlo1)
23/01/12 16:29:18 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address


Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


23/01/12 16:29:19 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [3]:
kafkaParams = {
    "bootstrap_servers": "ec2-65-0-72-75.ap-south-1.compute.amazonaws.com:9092"}
topic = "IOTTemperatureStream01"


In [4]:
consumer = KafkaConsumer(topic, **kafkaParams)


In [5]:
df = spark.createDataFrame(spark.sparkContext.emptyRDD(), schema=StructType(
    [StructField("lane_number", StringType(), True),
        StructField("plant_name", StringType(), True),
        StructField("temperature", IntegerType(), True),
        StructField("timestamp", TimestampType(), True),
        StructField("component_type", StringType(), True),
        StructField("component_manufacturer", StringType(), True),
     ])
)


In [6]:
def filterData(dataframe):
    last_10_minutes = datetime.now() - timedelta(minutes=10)
    last_30_minutes= datetime.now() - timedelta(minutes=30)
    filtered_max_temp_data = dataframe.filter(dataframe.timestamp > last_30_minutes)
    filtered_data = dataframe.filter(dataframe.timestamp > last_10_minutes)
    filtered_data = filtered_data.filter(col("temperature") > 50)
    component_counts = filtered_data.groupBy("component_type").count()
    highest_temp_per_lane = filtered_data.groupBy("lane_number", "component_type").agg(max("temperature").alias("Max Temp"))
    # Sort the results by lane and maximum temperature in descending order
    highest_temp_per_lane = highest_temp_per_lane.sort("lane_number", "Max Temp", ascending=[True, False])


    # Create window partitions on lane_number and component_type
    window = Window.partitionBy("lane_number", "component_type")

    # Compute the maximum, minimum, and average temperature for each component type in each lane
    temp_stats = filtered_max_temp_data.select("*", max("temperature").over(window).alias("Max Temp"), min("temperature").over(window).alias("Min Temp"), avg("temperature").over(window).alias("Avg Temp"))

    component_counts.show()
    highest_temp_per_lane.show()
    temp_stats.show()

In [7]:
def process(rdd):
    print(rdd)
    global df
    data = spark.read.json(rdd)
    data = data.filter(data["component_info"].isNotNull())
    data = data.filter(data["timestamp"].isNotNull())
    if(data.count() > 0):
        data = data.withColumn("timestamp", when(col("timestamp").cast("double").isNotNull(
        ), col("timestamp").cast("double").cast("timestamp")).otherwise(col("timestamp")))
        data = data.withColumn("component_manufacturer",
                               data["component_info"]["component_manufacturer"])
        data = data.withColumn(
            "component_type", data["component_info"]["component_type"])
        data = data.drop("component_info")
        df=df.union(data)
    else:
        print("No data")
    filterData(df)


In [8]:
while True:
    messages = consumer.poll(1000)
    for tp, message in messages.items():
        for record in message:
            data = json.loads(record.value)
            print(data)
            if "component_info" in data and data["component_info"] and "component_type" in data["component_info"] and data["component_info"]["component_type"] and data['temperature'] is not None:
                rdd = sc.parallelize([record.value.decode('utf-8')])
                process(rdd)


{'plant_name': 'Plant1', 'lane_number': '4', 'timestamp': '1673521165', 'temperature': '79.57', 'component_info': {'component_type': 'solder_paste_printer', 'component_manufacturer': 'manufact1'}}
ParallelCollectionRDD[5] at readRDDFromFile at PythonRDD.scala:274


                                                                                

+--------------+-----+
|component_type|count|
+--------------+-----+
|     manufact1|    1|
+--------------+-----+

+-----------+--------------+--------+
|lane_number|component_type|Max Temp|
+-----------+--------------+--------+
|          4|     manufact1|   79.57|
+-----------+--------------+--------+

+-----------+----------+-----------+-------------------+--------------+----------------------+--------+--------+--------+
|lane_number|plant_name|temperature|          timestamp|component_type|component_manufacturer|Max Temp|Min Temp|Avg Temp|
+-----------+----------+-----------+-------------------+--------------+----------------------+--------+--------+--------+
|          4|    Plant1|      79.57|2023-01-12 16:29:25|     manufact1|  solder_paste_printer|   79.57|   79.57|   79.57|
+-----------+----------+-----------+-------------------+--------------+----------------------+--------+--------+--------+

{'plant_name': 'Plant3', 'lane_number': '3', 'timestamp': '1673521166', 'temperatu

                                                                                

+-----------+--------------+--------+
|lane_number|component_type|Max Temp|
+-----------+--------------+--------+
|          1|     manufact1|   72.98|
|          2|     manufact2|   73.70|
|          3|     manufact1|   89.57|
|          4|     manufact1|   81.43|
|          5|     manufact3|   87.32|
|          6|     manufact2|   93.28|
+-----------+--------------+--------+

+-----------+----------+-----------+-------------------+--------------+----------------------+--------+--------+--------+
|lane_number|plant_name|temperature|          timestamp|component_type|component_manufacturer|Max Temp|Min Temp|Avg Temp|
+-----------+----------+-----------+-------------------+--------------+----------------------+--------+--------+--------+
|          1|    Plant5|      72.98|2023-01-12 16:29:33|     manufact1|         label_printer|   72.98|   72.98|   72.98|
|          2|    Plant1|      73.70|2023-01-12 16:29:28|     manufact2|  solder_paste_printer|   73.70|   73.70|    73.7|
|        

                                                                                

+-----------+--------------+--------+
|lane_number|component_type|Max Temp|
+-----------+--------------+--------+
|          1|     manufact1|   72.98|
|          2|     manufact2|   73.70|
|          3|     manufact1|   89.57|
|          4|     manufact1|   81.43|
|          5|     manufact3|   87.32|
|          6|     manufact2|   93.28|
+-----------+--------------+--------+



                                                                                

+-----------+----------+-----------+-------------------+--------------+----------------------+--------+--------+------------------+
|lane_number|plant_name|temperature|          timestamp|component_type|component_manufacturer|Max Temp|Min Temp|          Avg Temp|
+-----------+----------+-----------+-------------------+--------------+----------------------+--------+--------+------------------+
|          1|    Plant5|      72.98|2023-01-12 16:29:33|     manufact1|         label_printer|   72.98|   72.98|             72.98|
|          2|    Plant1|      73.70|2023-01-12 16:29:28|     manufact2|  solder_paste_printer|   73.70|   22.79|48.245000000000005|
|          2|    Plant3|      22.79|2023-01-12 16:29:34|     manufact2|  solder_paste_printer|   73.70|   22.79|48.245000000000005|
|          3|    Plant1|      89.57|2023-01-12 16:29:30|     manufact1|  solder_paste_printer|   89.57|   89.57|             89.57|
|          3|    Plant3|      46.53|2023-01-12 16:29:26|     manufact2|  opt

                                                                                

+--------------+-----+
|component_type|count|
+--------------+-----+
|     manufact1|    4|
|     manufact2|    3|
|     manufact3|    1|
+--------------+-----+



                                                                                

+-----------+--------------+--------+
|lane_number|component_type|Max Temp|
+-----------+--------------+--------+
|          1|     manufact1|   72.98|
|          2|     manufact2|   73.70|
|          3|     manufact1|   89.57|
|          4|     manufact1|   81.43|
|          5|     manufact3|   87.32|
|          6|     manufact2|   93.28|
+-----------+--------------+--------+



                                                                                

+-----------+----------+-----------+-------------------+--------------+----------------------+--------+--------+------------------+
|lane_number|plant_name|temperature|          timestamp|component_type|component_manufacturer|Max Temp|Min Temp|          Avg Temp|
+-----------+----------+-----------+-------------------+--------------+----------------------+--------+--------+------------------+
|          1|    Plant5|      72.98|2023-01-12 16:29:33|     manufact1|         label_printer|   72.98|   72.98|             72.98|
|          2|    Plant1|      73.70|2023-01-12 16:29:28|     manufact2|  solder_paste_printer|   73.70|   22.79|48.245000000000005|
|          2|    Plant3|      22.79|2023-01-12 16:29:34|     manufact2|  solder_paste_printer|   73.70|   22.79|48.245000000000005|
|          3|    Plant1|      89.57|2023-01-12 16:29:30|     manufact1|  solder_paste_printer|   89.57|   89.57|             89.57|
|          3|    Plant3|      46.53|2023-01-12 16:29:26|     manufact2|  opt

                                                                                

+--------------+-----+
|component_type|count|
+--------------+-----+
|     manufact1|    4|
|     manufact2|    3|
|     manufact3|    1|
+--------------+-----+



                                                                                

+-----------+--------------+--------+
|lane_number|component_type|Max Temp|
+-----------+--------------+--------+
|          1|     manufact1|   72.98|
|          2|     manufact2|   73.70|
|          3|     manufact1|   89.57|
|          4|     manufact1|   81.43|
|          5|     manufact3|   87.32|
|          6|     manufact2|   93.28|
+-----------+--------------+--------+



                                                                                

+-----------+----------+-----------+-------------------+--------------+----------------------+--------+--------+------------------+
|lane_number|plant_name|temperature|          timestamp|component_type|component_manufacturer|Max Temp|Min Temp|          Avg Temp|
+-----------+----------+-----------+-------------------+--------------+----------------------+--------+--------+------------------+
|          1|    Plant5|      72.98|2023-01-12 16:29:33|     manufact1|         label_printer|   72.98|   72.98|             72.98|
|          2|    Plant1|      73.70|2023-01-12 16:29:28|     manufact2|  solder_paste_printer|   73.70|   22.79|48.245000000000005|
|          2|    Plant3|      22.79|2023-01-12 16:29:34|     manufact2|  solder_paste_printer|   73.70|   22.79|48.245000000000005|
|          2|    Plant1|      27.23|2023-01-12 16:29:36|     manufact3|         label_printer|   27.23|   27.23|             27.23|
|          3|    Plant1|      89.57|2023-01-12 16:29:30|     manufact1|  sol

                                                                                

+--------------+-----+
|component_type|count|
+--------------+-----+
|     manufact1|    5|
|     manufact2|    3|
|     manufact3|    1|
+--------------+-----+



                                                                                

+-----------+--------------+--------+
|lane_number|component_type|Max Temp|
+-----------+--------------+--------+
|          1|     manufact1|   72.98|
|          2|     manufact2|   73.70|
|          3|     manufact1|   89.57|
|          4|     manufact1|   91.52|
|          5|     manufact3|   87.32|
|          6|     manufact2|   93.28|
+-----------+--------------+--------+



                                                                                

+-----------+----------+-----------+-------------------+--------------+----------------------+--------+--------+------------------+
|lane_number|plant_name|temperature|          timestamp|component_type|component_manufacturer|Max Temp|Min Temp|          Avg Temp|
+-----------+----------+-----------+-------------------+--------------+----------------------+--------+--------+------------------+
|          1|    Plant5|      72.98|2023-01-12 16:29:33|     manufact1|         label_printer|   72.98|   72.98|             72.98|
|          2|    Plant1|      73.70|2023-01-12 16:29:28|     manufact2|  solder_paste_printer|   73.70|   22.79|48.245000000000005|
|          2|    Plant3|      22.79|2023-01-12 16:29:34|     manufact2|  solder_paste_printer|   73.70|   22.79|48.245000000000005|
|          2|    Plant1|      27.23|2023-01-12 16:29:36|     manufact3|         label_printer|   27.23|   27.23|             27.23|
|          3|    Plant1|      89.57|2023-01-12 16:29:30|     manufact1|  sol

                                                                                

+--------------+-----+
|component_type|count|
+--------------+-----+
|     manufact1|    5|
|     manufact2|    3|
|     manufact3|    1|
+--------------+-----+



                                                                                

+-----------+--------------+--------+
|lane_number|component_type|Max Temp|
+-----------+--------------+--------+
|          1|     manufact1|   72.98|
|          2|     manufact2|   73.70|
|          3|     manufact1|   89.57|
|          4|     manufact1|   91.52|
|          5|     manufact3|   87.32|
|          6|     manufact2|   93.28|
+-----------+--------------+--------+



                                                                                

+-----------+----------+-----------+-------------------+--------------+----------------------+--------+--------+------------------+
|lane_number|plant_name|temperature|          timestamp|component_type|component_manufacturer|Max Temp|Min Temp|          Avg Temp|
+-----------+----------+-----------+-------------------+--------------+----------------------+--------+--------+------------------+
|          1|    Plant5|      72.98|2023-01-12 16:29:33|     manufact1|         label_printer|   72.98|   72.98|             72.98|
|          1|    Plant3|      44.55|2023-01-12 16:29:38|     manufact2|  solder_paste_printer|   44.55|   44.55|             44.55|
|          2|    Plant1|      73.70|2023-01-12 16:29:28|     manufact2|  solder_paste_printer|   73.70|   22.79|48.245000000000005|
|          2|    Plant3|      22.79|2023-01-12 16:29:34|     manufact2|  solder_paste_printer|   73.70|   22.79|48.245000000000005|
|          2|    Plant1|      27.23|2023-01-12 16:29:36|     manufact3|     

                                                                                

+--------------+-----+
|component_type|count|
+--------------+-----+
|     manufact1|    5|
|     manufact2|    4|
|     manufact3|    1|
+--------------+-----+



                                                                                

+-----------+--------------+--------+
|lane_number|component_type|Max Temp|
+-----------+--------------+--------+
|          1|     manufact2|   85.12|
|          1|     manufact1|   72.98|
|          2|     manufact2|   73.70|
|          3|     manufact1|   89.57|
|          4|     manufact1|   91.52|
|          5|     manufact3|   87.32|
|          6|     manufact2|   93.28|
+-----------+--------------+--------+



                                                                                

+-----------+----------+-----------+-------------------+--------------+----------------------+--------+--------+------------------+
|lane_number|plant_name|temperature|          timestamp|component_type|component_manufacturer|Max Temp|Min Temp|          Avg Temp|
+-----------+----------+-----------+-------------------+--------------+----------------------+--------+--------+------------------+
|          1|    Plant5|      72.98|2023-01-12 16:29:33|     manufact1|         label_printer|   72.98|   72.98|             72.98|
|          1|    Plant3|      44.55|2023-01-12 16:29:38|     manufact2|  solder_paste_printer|   85.12|   44.55| 64.83500000000001|
|          1|    Plant4|      85.12|2023-01-12 16:29:39|     manufact2|  solder_paste_printer|   85.12|   44.55| 64.83500000000001|
|          2|    Plant1|      73.70|2023-01-12 16:29:28|     manufact2|  solder_paste_printer|   73.70|   22.79|48.245000000000005|
|          2|    Plant3|      22.79|2023-01-12 16:29:34|     manufact2|  sol

                                                                                

+--------------+-----+
|component_type|count|
+--------------+-----+
|     manufact1|    5|
|     manufact2|    4|
|     manufact3|    2|
+--------------+-----+



                                                                                

+-----------+--------------+--------+
|lane_number|component_type|Max Temp|
+-----------+--------------+--------+
|          1|     manufact2|   85.12|
|          1|     manufact1|   72.98|
|          2|     manufact2|   73.70|
|          3|     manufact1|   89.57|
|          4|     manufact1|   91.52|
|          5|     manufact3|   87.32|
|          6|     manufact2|   93.28|
|          7|     manufact3|   81.67|
+-----------+--------------+--------+



                                                                                

+-----------+----------+-----------+-------------------+--------------+----------------------+--------+--------+------------------+
|lane_number|plant_name|temperature|          timestamp|component_type|component_manufacturer|Max Temp|Min Temp|          Avg Temp|
+-----------+----------+-----------+-------------------+--------------+----------------------+--------+--------+------------------+
|          1|    Plant5|      72.98|2023-01-12 16:29:33|     manufact1|         label_printer|   72.98|   72.98|             72.98|
|          1|    Plant3|      44.55|2023-01-12 16:29:38|     manufact2|  solder_paste_printer|   85.12|   44.55| 64.83500000000001|
|          1|    Plant4|      85.12|2023-01-12 16:29:39|     manufact2|  solder_paste_printer|   85.12|   44.55| 64.83500000000001|
|          2|    Plant1|      73.70|2023-01-12 16:29:28|     manufact2|  solder_paste_printer|   73.70|   22.79|48.245000000000005|
|          2|    Plant3|      22.79|2023-01-12 16:29:34|     manufact2|  sol

                                                                                

+--------------+-----+
|component_type|count|
+--------------+-----+
|     manufact1|    5|
|     manufact2|    5|
|     manufact3|    2|
+--------------+-----+



ERROR:root:KeyboardInterrupt while sending command.                             
Traceback (most recent call last):
  File "/home/user/.local/lib/python3.10/site-packages/py4j/java_gateway.py", line 1038, in send_command
    response = connection.send_command(command)
  File "/home/user/.local/lib/python3.10/site-packages/py4j/clientserver.py", line 511, in send_command
    answer = smart_decode(self.stream.readline()[:-1])
  File "/usr/lib/python3.10/socket.py", line 705, in readinto
    return self._sock.recv_into(b)
KeyboardInterrupt


+-----------+--------------+--------+
|lane_number|component_type|Max Temp|
+-----------+--------------+--------+
|          1|     manufact2|   85.12|
|          1|     manufact1|   72.98|
|          2|     manufact2|   73.70|
|          3|     manufact1|   89.57|
|          4|     manufact1|   91.52|
|          4|     manufact2|   89.26|
|          5|     manufact3|   87.32|
|          6|     manufact2|   93.28|
|          7|     manufact3|   81.67|
+-----------+--------------+--------+



KeyboardInterrupt: 

                                                                                