In [0]:
spark.sql("CREATE DATABASE IF NOT EXISTS flooding_data")
spark.sql("USE flooding_data")

DataFrame[]

In [0]:
tables = spark.sql(f"SHOW TABLES IN flooding_data LIKE 'Tbl_Station_*'")

for row in tables.collect():
    table_name = row['tableName']
    spark.sql(f"DROP TABLE IF EXISTS flooding_data.{table_name}")

In [0]:
import requests
import json
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DateType, MapType, ArrayType, DoubleType

station_id = "E24899"
station_link = f'http://environment.data.gov.uk/flood-monitoring/id/stations/{station_id}/readings'
#station_link ="https://environment.data.gov.uk/flood-monitoring/data/readings"
print(station_link)

http://environment.data.gov.uk/flood-monitoring/id/stations/E24899/readings


In [0]:
station_readings = requests.get(station_link)
station_readings_API_data = station_readings.text
station_API_JSON = json.loads(station_readings_API_data)
station_readings_API_JSON = station_API_JSON['items']

In [0]:
schema = StructType([StructField("@id", StringType(), True),
                    StructField("dateTime", StringType(), True),
                    StructField("measure", StringType(), True),
                    StructField("value", DoubleType(), True)])

df = spark.createDataFrame(station_readings_API_JSON, schema)
df = df.withColumn("dateTime",to_date(col("dateTime")))
df = df.withColumn('After', split(df['measure'], station_id).getItem(1))
df = df.withColumn('rest', split(df['After'], "-").getItem(0))
df = df.withColumn('Level', split(df['After'], "-").getItem(1))
df = df.withColumn('Stage', split(df['After'], "-").getItem(2))
df = df.withColumn('i', split(df['After'], "-").getItem(3))
df = df.withColumn('Interval', split(df['After'], "-").getItem(4))
df = df.withColumn('Unit', split(df['After'], "-").getItem(5))
df = df.withColumn('Station', lit(station_id))
df = df.select("Station","dateTime","Level","Stage","Value","Unit")
df = df.sort("dateTime",ascending=False)

df.createOrReplaceTempView("station_measures_view")
filtered_df = spark.sql("SELECT smv.dateTime, smv.Station, ms.catchmentName, ms.riverName, smv.Level, smv.Value, smv.Unit, ms.lat, ms.long FROM station_measures_view As smv LEFT JOIN flooding_data.tbl_monitoring_stations AS ms ON smv.Station= ms.stationReference")
display(filtered_df)

filtered_df.write.saveAsTable(f"Tbl_Station_{station_id}")

dateTime,Station,catchmentName,riverName,Level,Value,Unit,lat,long
2024-02-09,E24899,Essex,River Wid,level,1.837,mASD,51.648181,0.346098
2024-02-09,E24899,Essex,River Wid,level,1.826,mASD,51.648181,0.346098
2024-02-09,E24899,Essex,River Wid,level,1.814,mASD,51.648181,0.346098
2024-02-09,E24899,Essex,River Wid,level,1.894,mASD,51.648181,0.346098
2024-02-09,E24899,Essex,River Wid,level,1.864,mASD,51.648181,0.346098
2024-02-09,E24899,Essex,River Wid,level,1.849,mASD,51.648181,0.346098
2024-02-09,E24899,Essex,River Wid,level,1.877,mASD,51.648181,0.346098
2024-02-09,E24899,Essex,River Wid,level,1.94,mASD,51.648181,0.346098
2024-02-09,E24899,Essex,River Wid,level,1.922,mASD,51.648181,0.346098
2024-02-09,E24899,Essex,River Wid,level,1.931,mASD,51.648181,0.346098


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT * FROM Tbl_Station_E24899

dateTime,Station,catchmentName,riverName,Level,Value,Unit,lat,long
2024-02-09,E24899,Essex,River Wid,level,1.837,mASD,51.648181,0.346098
2024-02-09,E24899,Essex,River Wid,level,1.826,mASD,51.648181,0.346098
2024-02-09,E24899,Essex,River Wid,level,1.814,mASD,51.648181,0.346098
2024-02-09,E24899,Essex,River Wid,level,1.894,mASD,51.648181,0.346098
2024-02-09,E24899,Essex,River Wid,level,1.864,mASD,51.648181,0.346098
2024-02-09,E24899,Essex,River Wid,level,1.849,mASD,51.648181,0.346098
2024-02-09,E24899,Essex,River Wid,level,1.877,mASD,51.648181,0.346098
2024-02-09,E24899,Essex,River Wid,level,1.94,mASD,51.648181,0.346098
2024-02-09,E24899,Essex,River Wid,level,1.922,mASD,51.648181,0.346098
2024-02-09,E24899,Essex,River Wid,level,1.931,mASD,51.648181,0.346098


Databricks visualization. Run in Databricks to view.