# Load Python Packages

In [0]:
import numpy as np 
import pandas as pd 
import os
import pyspark
from pyspark.sql import SparkSession



# Weather Forecast with Simulated IOT Weather Data

In [0]:
# Load lai Delta Table chua du lieu
df_from_delta = spark.read.table("weather_forecast_delta_table")
# Filter theo BBQ_weather = -1 tuc la chua duoc predict, = 0 Thoi tiet xau, = 1 Thoi tiet dep
weather_forecast_df = df_from_delta.filter(df_from_delta["BBQ_weather"] == -1)
display(weather_forecast_df)

key_id,time_stamp,BBQ_weather,cloud_cover,wind_speed,wind_gust,humidity,pressure,global_radiation,precipitation,sunshine,temp_mean,temp_min,temp_max
94603,1714277645,-1,3,7.625449640520935,16.048118964835258,0.5392202578283631,1.0173187701059192,2.3761611017316158,4.514262345091068,12.159595456269557,16.983588302403113,-2.0847366074574234,27.425118819296152
94604,1714277651,-1,4,7.504974422330644,3.295188466982271,0.4215644828801029,1.039074721403685,0.5018122133174047,4.550774991693873,21.4768258181674,16.13140005607749,-0.3590568026075438,34.37575210143853
94602,1714277642,-1,4,6.354334339389647,20.19716624793635,0.2833427120919592,1.0369272214916112,0.7189952564445578,5.357868179702652,1.409730401715974,19.879597030185717,2.6510822383737693,34.04260839559076
94617,1714277778,-1,5,5.664542575857061,22.59583469637844,0.6048060553732011,1.024395910915981,0.7797177723701675,2.179531290678297,15.336589419766057,17.241614539949104,-3.97263427134364,29.628897460759188
94610,1714277715,-1,5,10.806883610596836,5.631170361696288,0.7768859095607864,1.0152524440672075,3.011726944553172,4.089578331696991,19.20603596950078,15.077626694615732,-5.038192316582684,31.744937667182725
94613,1714277735,-1,1,1.4526240083381898,20.376438378054303,0.4779351012411947,0.9669273318542124,2.071171547263914,5.491804566539907,8.458252660067942,15.396418366580525,0.4280369611584369,25.18428736560688
94607,1714277686,-1,2,5.173197916962663,7.08451339627265,0.4820286526041498,0.9677272191765832,0.1065351565697645,4.259862816683537,21.06538830195858,11.178000794800582,-0.5044363442601147,25.682759004522016
94616,1714277761,-1,1,0.5485845068262672,19.6129504983376,0.7616821127740382,0.97404110908032,2.013471655330606,3.714785682401619,1.8972983787898787,12.162552220310532,-4.084874705227862,29.719151261000874
94608,1714277697,-1,1,9.993249628937216,4.235476996463609,0.3538399337929329,0.97143769296632,0.9749466685484556,2.347631333680621,3.49913689110143,13.255591894906525,2.562351606485385,33.32650342148484
94612,1714277728,-1,7,3.901526564695253,5.016677232301955,0.5000007989938993,0.9775027514046604,2.0858728021187125,0.8236502766688113,15.86420310750901,14.69139902350976,3.092293137831863,29.006841521836694


In [0]:
from pyspark.ml.feature import VectorAssembler
# Tao cot features voi VectorAssembler
weather_forecast_assembler = VectorAssembler(inputCols = ['cloud_cover', 'wind_speed', 'wind_gust', 'humidity', 'pressure', 'global_radiation', 'precipitation', 'sunshine', 'temp_mean', 'temp_min', 'temp_max'], outputCol = 'features')

In [0]:
# Transform Dataframe bang VectorAssembler => Tao data theo ML format
weather_forecast_data = weather_forecast_assembler.transform(weather_forecast_df)

In [0]:
# Them 2 cot 'key_id', 'BBQ_weather' vao weather_forecast_data => final_weather_forecast_data se dua vao model de predict
final_weather_forecast_data = weather_forecast_data.select(['key_id','features','BBQ_weather'])
final_weather_forecast_data.show()

+------+--------------------+-----------+
|key_id|            features|BBQ_weather|
+------+--------------------+-----------+
| 94603|[3.0,7.6254496405...|         -1|
| 94604|[4.0,7.5049744223...|         -1|
| 94602|[4.0,6.3543343393...|         -1|
| 94617|[5.0,5.6645425758...|         -1|
| 94610|[5.0,10.806883610...|         -1|
| 94613|[1.0,1.4526240083...|         -1|
| 94607|[2.0,5.1731979169...|         -1|
| 94616|[1.0,0.5485845068...|         -1|
| 94608|[1.0,9.9932496289...|         -1|
| 94612|[7.0,3.9015265646...|         -1|
| 94621|[2.0,10.868018591...|         -1|
| 94619|[7.0,6.1353696479...|         -1|
| 94622|[7.0,9.4374558840...|         -1|
| 94614|[5.0,1.9783273993...|         -1|
| 94605|[4.0,2.4649769826...|         -1|
| 94606|[2.0,7.5938160859...|         -1|
| 94620|[3.0,1.9697660339...|         -1|
| 94618|[5.0,4.2673878471...|         -1|
| 94609|[1.0,1.9143047915...|         -1|
| 94615|[2.0,9.1812797893...|         -1|
+------+--------------------+-----

In [0]:
%py
# Load lai cac Saved model
from pyspark.ml.classification import RandomForestClassificationModel
# from pyspark.ml.classification import DecisionTreeClassificationModel
# from pyspark.ml.classification import GBTClassificationModel

basePath = "/tmp/mllib-persistence-example"
# savedmodel = DecisionTreeClassificationModel.load(basePath + "/dtc_model")
savedmodel = RandomForestClassificationModel.load(basePath + "/rfc_model")
# savedmodel = GBTClassificationModel.load(basePath + "/gbt_model")

In [0]:
# Model predict cac label
model_predictions = savedmodel.transform(final_weather_forecast_data)

In [0]:
model_predictions = model_predictions.withColumnRenamed("key_id", "key_id_1")
model_predictions.show()

+--------+--------------------+-----------+--------------------+--------------------+----------+
|key_id_1|            features|BBQ_weather|       rawPrediction|         probability|prediction|
+--------+--------------------+-----------+--------------------+--------------------+----------+
|   94603|[3.0,7.6254496405...|         -1|[12.4837838103218...|[0.62418919051609...|       0.0|
|   94604|[4.0,7.5049744223...|         -1|[12.2809471171706...|[0.61404735585853...|       0.0|
|   94602|[4.0,6.3543343393...|         -1|[13.1222305576394...|[0.65611152788197...|       0.0|
|   94617|[5.0,5.6645425758...|         -1|[10.7484896926748...|[0.53742448463374...|       0.0|
|   94610|[5.0,10.806883610...|         -1|[14.7431547438195...|[0.73715773719097...|       0.0|
|   94613|[1.0,1.4526240083...|         -1|[16.8333333333333...|[0.84166666666666...|       0.0|
|   94607|[2.0,5.1731979169...|         -1|[12.0255383682135...|[0.60127691841067...|       0.0|
|   94616|[1.0,0.5485845068...

# Save prediction labels back to Delta Live Table

In [0]:
# Joining based on a common column, Inner join on 'key_id'
result_df = weather_forecast_df.join(model_predictions, weather_forecast_df.key_id == model_predictions.key_id_1, "inner").select(weather_forecast_df["key_id"], weather_forecast_df["time_stamp"], model_predictions["prediction"], weather_forecast_df["cloud_cover"], weather_forecast_df["wind_speed"], weather_forecast_df["wind_gust"], weather_forecast_df["humidity"], weather_forecast_df["pressure"], weather_forecast_df["global_radiation"], weather_forecast_df["precipitation"], weather_forecast_df["sunshine"], weather_forecast_df["temp_mean"], weather_forecast_df["temp_min"], weather_forecast_df["temp_max"])
result_df = result_df.withColumnRenamed("prediction", "BBQ_weather")
result_df.show()

+------+----------+-----------+-----------+------------------+------------------+-------------------+------------------+--------------------+-------------------+------------------+------------------+-------------------+------------------+
|key_id|time_stamp|BBQ_weather|cloud_cover|        wind_speed|         wind_gust|           humidity|          pressure|    global_radiation|      precipitation|          sunshine|         temp_mean|           temp_min|          temp_max|
+------+----------+-----------+-----------+------------------+------------------+-------------------+------------------+--------------------+-------------------+------------------+------------------+-------------------+------------------+
| 94603|1714277645|        0.0|          3| 7.625449640520935|16.048118964835258| 0.5392202578283631|1.0173187701059192|  2.3761611017316153|  4.514262345091068|12.159595456269557|16.983588302403113|-2.0847366074574234|27.425118819296152|
| 94604|1714277651|        0.0|          4| 

In [0]:
# Tao ra 1 table tam thoi tu dataframe result_df (chi truy xuat bang SQL voi bang nay)
result_df.createOrReplaceTempView("result_table")

In [0]:
%sql
-- Kiem tra Bang result_table duoc tao tam thoi
SELECT * FROM result_table

key_id,time_stamp,BBQ_weather,cloud_cover,wind_speed,wind_gust,humidity,pressure,global_radiation,precipitation,sunshine,temp_mean,temp_min,temp_max
94603,1714277645,0.0,3,7.625449640520935,16.048118964835258,0.5392202578283631,1.0173187701059192,2.3761611017316158,4.514262345091068,12.159595456269557,16.983588302403113,-2.0847366074574234,27.425118819296152
94604,1714277651,0.0,4,7.504974422330644,3.295188466982271,0.4215644828801029,1.039074721403685,0.5018122133174047,4.550774991693873,21.4768258181674,16.13140005607749,-0.3590568026075438,34.37575210143853
94602,1714277642,0.0,4,6.354334339389647,20.19716624793635,0.2833427120919592,1.0369272214916112,0.7189952564445578,5.357868179702652,1.409730401715974,19.879597030185717,2.6510822383737693,34.04260839559076
94617,1714277778,0.0,5,5.664542575857061,22.59583469637844,0.6048060553732011,1.024395910915981,0.7797177723701675,2.179531290678297,15.336589419766057,17.241614539949104,-3.97263427134364,29.628897460759188
94624,1714277871,0.0,4,7.618595329079818,20.003391519092933,0.7677589688364038,0.9996838328146908,2.6970347009536555,2.0033718411354173,7.875573127053421,13.985524427234427,-4.403486918077126,29.62780799121034
94610,1714277715,0.0,5,10.806883610596836,5.631170361696288,0.7768859095607864,1.0152524440672075,3.011726944553172,4.089578331696991,19.20603596950078,15.077626694615732,-5.038192316582684,31.744937667182725
94613,1714277735,0.0,1,1.4526240083381898,20.376438378054303,0.4779351012411947,0.9669273318542124,2.071171547263914,5.491804566539907,8.458252660067942,15.396418366580525,0.4280369611584369,25.18428736560688
94607,1714277686,0.0,2,5.173197916962663,7.08451339627265,0.4820286526041498,0.9677272191765832,0.1065351565697645,4.259862816683537,21.06538830195858,11.178000794800582,-0.5044363442601147,25.682759004522016
94616,1714277761,0.0,1,0.5485845068262672,19.6129504983376,0.7616821127740382,0.97404110908032,2.013471655330606,3.714785682401619,1.8972983787898787,12.162552220310532,-4.084874705227862,29.719151261000874
94608,1714277697,0.0,1,9.993249628937216,4.235476996463609,0.3538399337929329,0.97143769296632,0.9749466685484556,2.347631333680621,3.49913689110143,13.255591894906525,2.562351606485385,33.32650342148484


In [0]:
%sql
-- Join 2 bang Delta Table va result table, sau do tim dung key_id de thay BBQ_weather
MERGE INTO weather_forecast_delta_table dt
USING result_table rt
ON dt.key_id=rt.key_id
WHEN MATCHED THEN UPDATE SET dt.BBQ_weather=rt.BBQ_weather

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
25,25,0,0


In [0]:
%sql
-- Kiem tra lai su thay doi trong Delta Table
select  * from weather_forecast_delta_table

key_id,time_stamp,BBQ_weather,cloud_cover,wind_speed,wind_gust,humidity,pressure,global_radiation,precipitation,sunshine,temp_mean,temp_min,temp_max
94602,1714277642,0,4,6.354334339389647,20.19716624793635,0.2833427120919592,1.0369272214916112,0.7189952564445578,5.357868179702652,1.409730401715974,19.879597030185717,2.6510822383737693,34.04260839559076
94603,1714277645,0,3,7.625449640520935,16.048118964835258,0.5392202578283631,1.0173187701059192,2.3761611017316158,4.514262345091068,12.159595456269557,16.983588302403113,-2.0847366074574234,27.425118819296152
94604,1714277651,0,4,7.504974422330644,3.295188466982271,0.4215644828801029,1.039074721403685,0.5018122133174047,4.550774991693873,21.4768258181674,16.13140005607749,-0.3590568026075438,34.37575210143853
94605,1714277668,0,4,2.464976982649582,20.006129799990024,0.4151845212069111,0.9981785344486682,0.9075902539876908,0.5038429498653959,16.92400555302241,11.400605959587928,3.3122054897918307,25.86734320686496
94606,1714277673,0,2,7.593816085942606,13.98283288906952,0.8585351702108338,1.0462164254284347,2.152056906719451,3.657391709636155,4.147151867893573,15.14542401503703,2.5678068340081968,27.083225776065007
94607,1714277686,0,2,5.173197916962663,7.08451339627265,0.4820286526041498,0.9677272191765832,0.1065351565697645,4.259862816683537,21.06538830195858,11.178000794800582,-0.5044363442601147,25.682759004522016
94608,1714277697,0,1,9.993249628937216,4.235476996463609,0.3538399337929329,0.97143769296632,0.9749466685484556,2.347631333680621,3.49913689110143,13.255591894906525,2.562351606485385,33.32650342148484
94621,1714277831,0,2,10.868018591875774,7.858677144517153,0.6600053848348739,1.0407735858905696,2.875803661050963,2.3899431317750257,20.914669156277636,16.53849853991693,1.804783621096166,29.00001443654748
94622,1714277838,0,7,9.43745588401335,4.9009303803062245,0.8634141477043948,1.038648237842784,0.0203078735787912,0.2194249214324958,11.761478815947552,10.278241189497406,-4.4027855202616095,26.505369137837164
94623,1714277855,0,7,3.3730346031295,8.995402634238129,0.5985772243741904,0.9792674329234344,3.1973437917246086,3.996432976281697,16.656312744024408,15.010005977483306,-2.0812759526918474,26.346484051038463
