In [0]:
%sql

drop database if exists dev.iot_sensor cascade;
create database if not exists dev.iot_sensor;

-- Brone Layer table to store kafka data.
create table if not exists dev.iot_sensor.temp_sensor_raw (
  key string,
  value string
) using delta;

INSERT INTO dev.iot_sensor.temp_sensor_raw VALUES
                  ('SET41', '{"CreatedTime": "2019-02-05 09:54:00","Reading": 36.2}'),
                  ('SET41', '{"CreatedTime": "2019-02-05 09:59:00","Reading": 36.5}'),
                  ('SET41', '{"CreatedTime": "2019-02-05 10:04:00","Reading": 36.8}'),
                  ('SET41', '{"CreatedTime": "2019-02-05 10:09:00","Reading": 36.2}'),
                  ('SET41', '{"CreatedTime": "2019-02-05 10:14:00","Reading": 36.5}'),
                  ('SET41', '{"CreatedTime": "2019-02-05 10:19:00","Reading": 36.3}'),
                  ('SET41', '{"CreatedTime": "2019-02-05 10:24:00","Reading": 37.7}'),
                  ('SET41', '{"CreatedTime": "2019-02-05 10:29:00","Reading": 37.2}');


create external volume if not exists dev.iot_sensor.max_temp_sensor_checkpoint
location 'abfss://structured-streaming-course@dbstorageact.dfs.core.windows.net/sensor_analysis_app/max_temp_sensor_checkpoint';



In [0]:
%sql
select * from dev.iot_sensor.temp_sensor_raw;

key,value
SET41,"{""CreatedTime"": ""2019-02-05 09:54:00"",""Reading"": 36.2}"
SET41,"{""CreatedTime"": ""2019-02-05 09:59:00"",""Reading"": 36.5}"
SET41,"{""CreatedTime"": ""2019-02-05 10:04:00"",""Reading"": 36.8}"
SET41,"{""CreatedTime"": ""2019-02-05 10:09:00"",""Reading"": 36.2}"
SET41,"{""CreatedTime"": ""2019-02-05 10:14:00"",""Reading"": 36.5}"
SET41,"{""CreatedTime"": ""2019-02-05 10:19:00"",""Reading"": 36.3}"
SET41,"{""CreatedTime"": ""2019-02-05 10:24:00"",""Reading"": 37.7}"
SET41,"{""CreatedTime"": ""2019-02-05 10:29:00"",""Reading"": 37.2}"


In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

class MaxTempAnalysis:
    
    def __init__(self):
        self.bz_layer_raw_tb = "dev.iot_sensor.temp_sensor_raw"

    def get_schema(self):
        return StructType([
            StructField("CreatedTime", StringType(), True),
            StructField("Reading", DoubleType(), True),
        ])
    
    def load_raw_data(self):
        return spark.readStream.format("delta").table(self.bz_layer_raw_tb)
    
    def formaat_raw_data(self, raw_df):
        formatted_df = (raw_df
                        .withColumn("SensorId",raw_df.key)
                        .withColumn("value",from_json(raw_df.value,self.get_schema()))
                        .select("SensorId","value.*")
                        .withColumn("CreatedTime",to_timestamp("CreatedTime","yyyy-MM-dd HH:mm:ss"))
                        )
        return formatted_df
    
    def find_max_temp_reading(self, formatted_df):
        agg_df = (formatted_df
                  .groupBy("SensorId",window("CreatedTime","15 minutes","5 minutes"))
                  .agg(max("Reading").alias("max_temp"))
                  .select("SensorId","window.start","window.end","max_temp")
                 )
        return agg_df
    
    def write_to_table(self, agg_df):
        streaming_query = (agg_df
                           .writeStream
                           .queryName("max_temp_reading")
                           .outputMode("complete")
                           .option("checkpointLocation","/Volumes/dev/iot_sensor/max_temp_sensor_checkpoint")
                           .table("dev.iot_sensor.max_temp_readings")
                           )
        return streaming_query
    
    def start_stream(self):
       raw_data = self.load_raw_data()
       formatted_data = self.formaat_raw_data(raw_data)
       agg_data = self.find_max_temp_reading(formatted_data)
       return self.write_to_table(agg_data)


In [0]:
max_temp = MaxTempAnalysis()
streaming_query= max_temp.start_stream()

In [0]:
streaming_query.stop()

In [0]:
%sql
select * from dev.iot_sensor.max_temp_readings order by end;

SensorId,start,end,max_temp
SET41,2019-02-05T09:40:00Z,2019-02-05T09:55:00Z,36.2
SET41,2019-02-05T09:45:00Z,2019-02-05T10:00:00Z,36.5
SET41,2019-02-05T09:50:00Z,2019-02-05T10:05:00Z,36.8
SET41,2019-02-05T09:55:00Z,2019-02-05T10:10:00Z,36.8
SET41,2019-02-05T10:00:00Z,2019-02-05T10:15:00Z,36.8
SET41,2019-02-05T10:05:00Z,2019-02-05T10:20:00Z,36.5
SET41,2019-02-05T10:10:00Z,2019-02-05T10:25:00Z,37.7
SET41,2019-02-05T10:15:00Z,2019-02-05T10:30:00Z,37.7
SET41,2019-02-05T10:20:00Z,2019-02-05T10:35:00Z,37.7
SET41,2019-02-05T10:25:00Z,2019-02-05T10:40:00Z,37.2
