In [1]:
import os
import requests
import pandas

import warnings

warnings.filterwarnings('ignore')

### Docs

See here:

* https://environment.data.gov.uk/flood-monitoring/archive

### Construct request

In [2]:
def pull_date(date):
    """Pull data for the given date"""
    return pandas.read_csv(f"https://environment.data.gov.uk/flood-monitoring/archive/readings-full-{date:%Y-%m-%d}.csv")

### Find dates missing from master file

Examine last date in master file and compare against T-1

In [3]:
df_master = (
    pandas.read_csv("data/merged/Wanstead Rainfall Merged.csv")
    .assign(Timestamp=lambda x: pandas.to_datetime(x["Timestamp"]))
)

In [4]:
df_master.tail()

Unnamed: 0,Timestamp,Rainfall
1129618,2023-08-20 23:15:00+00:00,0.0
1129619,2023-08-20 23:30:00+00:00,0.0
1129620,2023-08-20 23:30:00+00:00,0.0
1129621,2023-08-20 23:45:00+00:00,0.0
1129622,2023-08-20 23:45:00+00:00,0.0


In [5]:
extracted_dates = df_master["Timestamp"].dt.date.unique()

In [6]:
current_date = pandas.Timestamp.today()

In [7]:
missing_dates = pandas.date_range(start=extracted_dates.max(), end=current_date - pandas.tseries.offsets.Day(1))

In [8]:
len(missing_dates)

2

Loop through historical dates, pull data, save to disk

We filter the response to a single station using stationReference - these can be found here: https://environment.data.gov.uk/flood-monitoring/assets/demo/index.html

Can take a while depending on number of dates requesting...

In [9]:
dir_output = "data/raw"

In [10]:
for date in missing_dates:

    if date.day == 1:
        print(date)
    
    try:
        df = pull_date(date)
    except:
        print(f"Failed for {date:%Y-%m-%d}")

    df_filtered = (
        df
        # Filter to Wanstead station
        .loc[lambda x: x["stationReference"] == "239374TP"]
        # Rename and drop some columns
        .rename(columns={"dateTime": "Timestamp", "value": "Rainfall"})
        .filter(items=["Timestamp", "Rainfall"], axis=1)
    )

    # Save to disk
    df_filtered.to_csv(f"{dir_output}/wanstead_rainfall_{date:%Y-%m-%d}.csv", index=False)

Failed for 2023-08-21


### Combine with master

In [11]:
files = os.listdir(dir_output)

In [12]:
df_new = (
    pandas.concat([
        pandas.read_csv(f"{dir_output}/{fname}")
        for fname in files
    ], axis=0)
    .assign(Timestamp=lambda x: pandas.to_datetime(x["Timestamp"]))
)

In [None]:
df_new.to_csv("data/Wanstead Rainfall 2022-23.csv")

In [13]:
df_new.head()

Unnamed: 0,Timestamp,Rainfall
0,2022-08-25 00:00:00+00:00,0.0
1,2022-08-25 00:15:00+00:00,0.0
2,2022-08-25 00:30:00+00:00,0.0
3,2022-08-25 00:45:00+00:00,0.0
4,2022-08-25 01:00:00+00:00,0.0


Concatenate and write to file

In [14]:
df_all = (
    pandas.concat([df_master, df_new], axis=0)
    .sort_values(by=["Timestamp"])
)

In [15]:
df_all.to_csv("data/merged/Wanstead Rainfall Merged.csv", index=False)