In [1]:
from os import path
import urllib
import json

import pandas as pd


API_URL = "http://environment.data.gov.uk/flood-monitoring/"
url = API_URL + "data/readings?latest" # I've changed the function here a little because I prefer to have unitName==mm 
data = urllib.request.urlopen(url)
data = json.load(data)
data

{'@context': 'http://environment.data.gov.uk/flood-monitoring/meta/context.jsonld',
 'meta': {'publisher': 'Environment Agency',
  'licence': 'http://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/',
  'documentation': 'http://environment.data.gov.uk/flood-monitoring/doc/reference',
  'version': '0.9',
  'comment': 'Status: Beta service',
  'hasFormat': ['http://environment.data.gov.uk/flood-monitoring/data/readings.csv?latest',
   'http://environment.data.gov.uk/flood-monitoring/data/readings.rdf?latest',
   'http://environment.data.gov.uk/flood-monitoring/data/readings.ttl?latest',
   'http://environment.data.gov.uk/flood-monitoring/data/readings.html?latest']},
 'items': [{'@id': 'http://environment.data.gov.uk/flood-monitoring/data/readings/7056-level-groundwater-i-15_min-mBDAT/2024-10-24T14-30-00Z',
   'dateTime': '2024-10-24T14:30:00Z',
   'measure': 'http://environment.data.gov.uk/flood-monitoring/id/measures/7056-level-groundwater-i-15_min-mBDAT',
   'value':

# Observe Data and Encapsulate Functions

Define the functions `get_latest_rainfall_readings`, `get_latest_tide_readings`, and `get_latest_river_readings` as follows:

- **Data Observation**: 
  - The raw data, after being retrieved, contains the key information within the `items` field. Extract this as the initial dataset.
  - From the `@id` field in `items`, all relevant metadata (such as `stationReference`, `qualifier`, and `unitName`) can be extracted. To achieve this, define the helper functions `split_rainfall_api_id` and `split_level_api_id` that use string splitting to isolate the desired information.

- **Data Extraction**:
  - Extract additional relevant fields, such as `dateTime` and `value`, from the `items` field.
  - Construct and return a DataFrame with the cleaned and structured data.

---

# Implementation Steps

### 1. Write Functions for Data Retrieval:
Develop functions that fetch the latest `rainfall`, `tide`, and `river` data, storing the results in individual DataFrames (`live_data`).

### 2. Process Each Dataset:
Utilize the flood tool's functions to handle `rainfall`, `tide`, and `river` data separately. This modular approach simplifies visualization tasks later.

### 3. Visualize Risk Areas:
Leverage flood tool utilities to identify and visualize potential areas at risk. Generate plots showing the distribution of high-risk regions based on live data.

By modularizing each step, you ensure a clean workflow for data retrieval, processing, and visualization.


# 1. get live data

## 1.1 get live rainfall data

In [None]:
def split_rainfall_api_id(input):
    """
    Split rainfall station API id into component parts
    """
    try:
        # Split Path
        parts = input.split('/')
        # Find the section that contains the key information
        target_part = parts[-2] 
        # Further split into target fields
        station, qualifier, unit = target_part.split('-rainfall-')[0], \
                                          target_part.split('-rainfall-')[1].split('-t-15_min-')[0], \
                                          target_part.split('-t-15_min-')[1]
        return station, qualifier, unit
    except (IndexError, ValueError):
        # Returns a null value to handle an exception
        return "", "", ""
    
def extract_station_reference(x):
    """Extract stationReference"""
    return x[0]

def extract_qualifier(x):
    """Extract qualifier and format"""
    return x[1].replace('_', ' ').title()

def extract_unit_name(x):
    """Extract unitName"""
    return x[2]




def get_latest_rainfall_readings(url_rainfall):
    """
    Fetches the latest rainfall readings from a live API, processes the data, 
    and returns it as a structured DataFrame.

    Parameters:
    url_rainfall (str): URL of the live API providing rainfall data.

    Returns:
    pandas.DataFrame: A DataFrame of rainfall data filtered by unit (mm), indexed by 
                      stationReference and dateTime, with relevant metadata included.
    """
    data = urllib.request.urlopen(url_rainfall)
    data = json.load(data)

    df_item = pd.DataFrame(data["items"])

    id_data = df_item["@id"].apply(split_rainfall_api_id)
    df_item["stationReference"] = id_data.apply(extract_station_reference)
    df_item["qualifier"] = id_data.apply(extract_qualifier)
    df_item["unitName"] = id_data.apply(extract_unit_name)


    df_item.drop(["@id", "measure"], axis=1, inplace=True)

    df_item["dateTime"] = df_item["dateTime"].apply(pd.to_datetime)

    df_item.set_index(["stationReference", "dateTime"], inplace=True)

    df_item["parameter"] = "rainfail"

    df_item["value"] = pd.to_numeric(df_item["value"], errors="coerce")
    df_item = df_item[df_item['unitName'] == 'mm']

    return df_item.sort_index()

url_rainfall= "http://environment.data.gov.uk/flood-monitoring/data/readings?latest&unitName=mm"
live_rainfall = get_latest_rainfall_readings(url_rainfall)#.shape#parameter.value_counts()
live_rainfall.reset_index(inplace=True)
live_rainfall

Unnamed: 0,stationReference,dateTime,value,qualifier,unitName,parameter
0,000008,2024-11-21 23:00:00+00:00,0.0,Tipping Bucket Raingauge,mm,rainfail
1,000028,2024-11-21 23:00:00+00:00,0.0,Tipping Bucket Raingauge,mm,rainfail
2,000075TP,2024-11-21 23:00:00+00:00,0.0,Tipping Bucket Raingauge,mm,rainfail
3,000076TP,2024-11-21 23:00:00+00:00,0.0,Tipping Bucket Raingauge,mm,rainfail
4,000180TP,2024-11-21 23:00:00+00:00,0.0,Tipping Bucket Raingauge,mm,rainfail
...,...,...,...,...,...,...
888,Egerton1,2024-11-21 23:00:00+00:00,0.0,Tipping Bucket Raingauge,mm,rainfail
889,NelsonGC,2024-11-21 23:00:00+00:00,0.0,Tipping Bucket Raingauge,mm,rainfail
890,Not_Specified,2024-11-21 16:00:00+00:00,0.0,1,mm,rainfail
891,Not_Specified,2024-11-21 20:45:00+00:00,0.0,2,mm,rainfail


## 1.2 get live tide data

In [None]:
def split_level_api_id(input):
    """
    Split level station API id into component parts
    """
    try:
        # Split Path
        parts = input.split('/')
        # Find the section that contains the key information
        target_part = parts[-2] 
        # Further split into target fields
        station, qualifier, unit = target_part.split('-level-')[0], \
                                          target_part.split('-level-')[1].split('-i-15_min-')[0], \
                                          target_part.split('-i-15_min-')[1]
        return station, qualifier, unit
    except (IndexError, ValueError):
        # Returns a null value to handle an exception
        return "", "", ""
    
def extract_station_reference(x):
    """extract stationReference"""
    return x[0]

def extract_qualifier(x):
    """extract qualifier and format"""
    return x[1].replace('_', ' ').title()

def extract_unit_name(x):
    """extract unitName"""
    return x[2]


def get_latest_tide_readings(url_tide):
    """
    Fetches the latest tide readings from a live API, processes the data, 
    and returns it as a structured DataFrame.

    Parameters:
    url_tide (str): URL of the live API providing tide data.

    Returns:
    pandas.DataFrame: A DataFrame of tide data filtered by unit (mAOD), indexed by 
                      stationReference and dateTime, with relevant data included.
    """
    data = urllib.request.urlopen(url_tide)
    data = json.load(data)

    df_item = pd.DataFrame(data["items"])

    id_data = df_item["@id"].apply(split_level_api_id)
    df_item["stationReference"] = id_data.apply(extract_station_reference)
    df_item["qualifier"] = id_data.apply(extract_qualifier)
    df_item["unitName"] = id_data.apply(extract_unit_name)


    df_item.drop(["@id", "measure"], axis=1, inplace=True)

    df_item["dateTime"] = df_item["dateTime"].apply(pd.to_datetime)

    df_item.set_index(["stationReference", "dateTime"], inplace=True)

    # df_item["parameter"] = "rainfail"

    df_item["value"] = pd.to_numeric(df_item["value"], errors="coerce")
    # df_item.dropna(['unitName'], inplace=True)
    df_item = df_item[df_item['unitName'] == 'mAOD']

    return df_item.sort_index()

url_tide= "http://environment.data.gov.uk/flood-monitoring/data/readings?latest"

live_tide = get_latest_tide_readings(url_tide)#.shape#parameter.value_counts()
live_tide.reset_index(inplace=True)
live_tide

Unnamed: 0,stationReference,dateTime,value,qualifier,unitName
0,0001,2024-11-21 23:00:00+00:00,-1.908,Tidal Level,mAOD
1,0003,2024-11-21 23:00:00+00:00,-1.950,Tidal Level,mAOD
2,0004,2024-11-21 23:00:00+00:00,-1.144,Tidal Level,mAOD
3,0006,2024-11-21 23:00:00+00:00,-1.453,Tidal Level,mAOD
4,0007,2024-11-21 23:00:00+00:00,-1.858,Tidal Level,mAOD
...,...,...,...,...,...
755,TA06MF00CC,2024-11-21 18:00:00+00:00,26.451,Groundwater,mAOD
756,TA06NPSLCC,2024-11-21 18:00:00+00:00,19.775,Groundwater,mAOD
757,TA07RSNLCC,2024-11-21 18:00:00+00:00,28.170,Groundwater,mAOD
758,TA08IOW1CO,2024-11-21 18:00:00+00:00,29.973,Groundwater,mAOD


## 1.3 get live river data

In [None]:
def split_level_api_id(input):
    """
    Split rainfall station API id into component parts
    without using a regular expression.
    """
    try:
        # Split Path
        parts = input.split('/')
        # Find the section that contains the key information
        target_part = parts[-2] 
        # Further split into target fields
        station, qualifier, unit = target_part.split('-level-')[0], \
                                          target_part.split('-level-')[1].split('-i-15_min-')[0], \
                                          target_part.split('-i-15_min-')[1]
        return station, qualifier, unit
    except (IndexError, ValueError):
        # Returns a null value to handle an exception
        return "", "", ""
    
def extract_station_reference(x):
    """extract stationReference"""
    return x[0]

def extract_qualifier(x):
    """extract qualifier and format"""
    return x[1].replace('_', ' ').title()

def extract_unit_name(x):
    """extract unitName"""
    return x[2]



def get_latest_river_readings(url_river):
    """
    Fetches the latest river readings from a live API, processes the data, 
    and returns it as a structured DataFrame.

    Parameters:
    url_river (str): URL of the live API providing river data.

    Returns:
    pandas.DataFrame: A DataFrame of tide data filtered by unit (mASD), indexed by 
                      stationReference and dateTime, with relevant data included.
    """
    data = urllib.request.urlopen(url_river)
    data = json.load(data)

    df_item = pd.DataFrame(data["items"])

    id_data = df_item["@id"].apply(split_level_api_id)
    df_item["stationReference"] = id_data.apply(extract_station_reference)
    df_item["qualifier"] = id_data.apply(extract_qualifier)
    df_item["unitName"] = id_data.apply(extract_unit_name)


    df_item.drop(["@id", "measure"], axis=1, inplace=True)

    df_item["dateTime"] = df_item["dateTime"].apply(pd.to_datetime)

    df_item.set_index(["stationReference", "dateTime"], inplace=True)

    # df_item["parameter"] = "rainfail"

    df_item["value"] = pd.to_numeric(df_item["value"], errors="coerce")
    # df_item.dropna(['unitName'], inplace=True)
    df_item = df_item[df_item['unitName'] == 'mASD']

    return df_item.sort_index()

url_tide= "http://environment.data.gov.uk/flood-monitoring/data/readings?latest"

live_river = get_latest_river_readings(url_tide)#.shape#parameter.value_counts()
live_river.reset_index(inplace=True)
live_river

Unnamed: 0,stationReference,dateTime,value,qualifier,unitName
0,0130TH,2024-11-21 23:00:00+00:00,0.250,Stage,mASD
1,0130TH,2024-11-21 23:00:00+00:00,0.140,Downstage,mASD
2,0144TH,2024-11-21 23:00:00+00:00,1.177,Stage,mASD
3,0155TH,2024-11-21 23:00:00+00:00,0.888,Downstage,mASD
4,0155TH,2024-11-21 23:00:00+00:00,0.875,Stage,mASD
...,...,...,...,...,...
1315,L3211,2024-11-21 23:00:00+00:00,2.443,Downstage,mASD
1316,L3211,2024-11-21 23:00:00+00:00,1.533,Stage,mASD
1317,PCHURCH_TG_342,2024-11-21 23:00:00+00:00,0.438,Stage,mASD
1318,SE560,2024-11-21 23:00:00+00:00,0.682,Stage,mASD


# 2. Call a function of flood_tool to process the data

## 2.1 live_rainfall processing

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
import os
import sys
sys.path.append('..') # Add parent directory to path to always find flood_tool
                      # This is not best practice, but it works for this example
import flood_tool as ft


# Use the groupby function to group stationReference, calculate mean, max, min
live_rainfall_group=live_rainfall.groupby('stationReference')['value'].agg(['mean', 'max', 'min'])
live_rainfall_group.reset_index(inplace=True)
live_rainfall_group['max_min']=live_rainfall_group['max']-live_rainfall_group['min']
live_rainfall_group
    

station_path = "../flood_tool/resources/stations.csv"
station_loc=ft.read_data(station_path)
# # Connecting station and live_rainfall data using the station_rainfall_loc function
live_rainfall_station=ft.station_rainfall_loc(live_rainfall_group,station_loc)
live_rainfall_station.dropna(subset=['latitude'], inplace=True)

# The value data in live_rainfall_station is processed using the classify_rainfall and classify_rainfall_encode functions to classify the data
live_rainfall_station['rainfall_class'] = live_rainfall_station['mean'].apply(ft.classify_rainfall)
live_rainfall_station['rainfall_class_encode'] = live_rainfall_station['rainfall_class'].apply(ft.classify_rainfall_encode)
live_rainfall_station['rainfall_class_max'] = live_rainfall_station['max'].apply(ft.classify_rainfall)
live_rainfall_station['rainfall_class_encode_max'] = live_rainfall_station['rainfall_class_max'].apply(ft.classify_rainfall_encode)
live_rainfall_station.dropna(inplace=True)
live_rainfall_station

Unnamed: 0,stationReference,mean,max,min,max_min,stationName,latitude,longitude,easting,northing,rainfall_class,rainfall_class_encode,rainfall_class_max,rainfall_class_encode_max
0,000008,0.0,0.0,0.0,0.0,Rainfall station,53.480556,-1.441674,437150.512108,398348.710960,Slight,0,Slight,0
1,000028,0.0,0.0,0.0,0.0,Rainfall station,53.500289,-1.673575,421750.534631,400448.642470,Slight,0,Slight,0
2,000075TP,0.0,0.0,0.0,0.0,Rainfall station,51.084022,-0.214597,525150.530744,133149.574480,Slight,0,Slight,0
3,000076TP,0.0,0.0,0.0,0.0,Rainfall station,51.701508,-0.747539,486650.531968,201049.405434,Slight,0,Slight,0
4,000180TP,0.0,0.0,0.0,0.0,Rainfall station,51.618838,0.173236,550550.534797,193349.379816,Slight,0,Slight,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
884,E9360,0.0,0.0,0.0,0.0,Rainfall station,50.929113,-0.384184,513650.529609,115649.665558,Slight,0,Slight,0
885,E9660,0.0,0.0,0.0,0.0,Rainfall station,50.929364,-0.469557,507650.524814,115549.681959,Slight,0,Slight,0
886,E9720,0.0,0.0,0.0,0.0,Rainfall station,50.860296,-0.483186,506850.555277,107849.746269,Slight,0,Slight,0
887,EdgeH1,0.0,0.0,0.0,0.0,Rainfall station,53.560463,-2.873303,342250.520990,407448.681306,Slight,0,Slight,0


## 2.2 live_tide processing

## 2.3 live_river processing

# 3. Call the draw function to plot the potential areas at risk.