In [1]:
%matplotlib inline

import os
import sys
import pickle
import datacube
import numpy as np
import pandas as pd
from pandas import DataFrame
import geopandas as gpd
import geopandas.tools
from shapely.geometry import Point
import xarray as xr
import ipyleaflet as ipyl
from ipyleaflet import Map, Marker
import ipywidgets as ipyw
import json
import matplotlib.pyplot as plt
from pathlib import Path
from IPython.display import display
from datacube.utils import masking
from datacube.utils import geometry 
from datacube.utils.geometry import CRS
from datacube.helpers import write_geotiff

## Scrape gauge data from the BOM website
This is where you use the python script located in the Scripts folder. There are a few versions of this python script, some only get the gauge name and location, some actually generate a map of the gauges and you can click one and it retrieves the data of that gauge. That script is called dea_bom.py which is in my Flood-Mapping repository and it was written in September 2019 by Kirill at GA. 

In [2]:
#Kirill at GA wrote me this script in 2019 that retrieves data from the Water Data Online website
#dea_bom_storage retrieves the name and location of gauges that measure Storage Level
sys.path.append('Scripts')
import dea_bom_storage
stations = dea_bom_storage.get_stations()

# Preview the first five stations loaded
print(f'{len(stations)} stations loaded; e.g.:')
stations[:5]

613 stations loaded; e.g.:


[namespace(name='10 Mile Brook Dam Water Level Daily Value, South W',
           url='http://bom.gov.au/waterdata/services/stations/PI_357693.1',
           pos=(-33.963617, 115.1248985)),
 namespace(name='284',
           url='http://bom.gov.au/waterdata/services/stations/D24',
           pos=(-32.74362461, 151.9612514)),
 namespace(name='2km N Warringee Pt',
           url='http://bom.gov.au/waterdata/services/stations/A4261155',
           pos=(-35.67660246, 139.2502685)),
 namespace(name='3km W Pt McLeay',
           url='http://bom.gov.au/waterdata/services/stations/A4261156',
           pos=(-35.51481282, 139.0647519)),
 namespace(name='40A',
           url='http://bom.gov.au/waterdata/services/stations/D21',
           pos=(-32.79766496, 151.743779))]

## Extract position, name and ID of the gauges 
the variable called stations is a list of namespace objects (SimpleNamespace). Inside each namespace object is the position, url name of the gauge  (the url is the part that has the id number in it). We can get this information out with a loop:

In [3]:
#make some empty lists to append the names and locations into
pos_list = []
name_list = []
url_list = []
#Write a loop to extract each 'pos' tuple and 'name' string from the namespaces in the list
for i in range(len(stations)):
    pos = stations[i].pos
    name = stations[i].name
    url = stations[i].url
    pos_list.append(pos)
    name_list.append(name)
    url_list.append(url)
    
#extract the IDs from the url (they are at the end of the url)
def remove_cruft(s):
    return s[46:]
id_list = [remove_cruft(s) for s in url_list]

In [None]:
len(name_list)

In [4]:
    
#Make a pandas dataframe out of these 3 lists (pos_list, name_list and id_list)
df = DataFrame(name_list,columns=['staion_name']) #create a dataframe with one column
df['location'] = pos_list #make another column
df['gauge_ID'] = id_list

#split up the 'location' tuples into 2 columns, one for lat, one for lon
df[['lat', 'lon']] = pd.DataFrame(df['location'].tolist(), index=df.index)

#Make the data spatial by making a GeoDataFrame. Geopandas has a function points_from_xy to do this. 
st_gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat)) 
#make sure the lat and lon are the right way around in the output. lon should be first (eg: 115.124 -33.963)

st_gdf

Unnamed: 0,staion_name,location,gauge_ID,lat,lon,geometry
0,"10 Mile Brook Dam Water Level Daily Value, Sou...","(-33.963617, 115.1248985)",PI_357693.1,-33.963617,115.124899,POINT (115.12490 -33.96362)
1,284,"(-32.74362461, 151.9612514)",D24,-32.743625,151.961251,POINT (151.96125 -32.74362)
2,2km N Warringee Pt,"(-35.67660246, 139.2502685)",A4261155,-35.676602,139.250269,POINT (139.25027 -35.67660)
3,3km W Pt McLeay,"(-35.51481282, 139.0647519)",A4261156,-35.514813,139.064752,POINT (139.06475 -35.51481)
4,40A,"(-32.79766496, 151.743779)",D21,-32.797665,151.743779,POINT (151.74378 -32.79766)
...,...,...,...,...,...,...
608,YARRADDA LAGOON,"(-34.5846999999996, 145.8236)",41010302,-34.584700,145.823600,POINT (145.82360 -34.58470)
609,Yabba Ck at Borumba Dam HW,"(-26.5096751869416, 152.58186478)",138112A,-26.509675,152.581865,POINT (152.58186 -26.50968)
610,Yarrawonga Weir,"(-36.0103075, 146.091535)",sp-o10814,-36.010308,146.091535,POINT (146.09153 -36.01031)
611,Yealering,"(-32.59697157, 117.62690338)",6150002,-32.596972,117.626903,POINT (117.62690 -32.59697)


In [5]:
st_gdf_names = st_gdf.set_index(['staion_name'])
name = 'Lake Argyle Dam'
name_ID = st_gdf_names.at[name, 'gauge_ID']
name_ID

'ODSS_22241_GLEVN.1'

In [7]:
points_polys_join = gpd.read_file('reservoirs_with_gauge_ID/reservoirs_with_gauge_ID.shp')
points_polys_join

Unnamed: 0,staion_nam,gauge_ID,NAME,SHAPE_Area,Area_calc,lat,lon,geometry
0,"10 Mile Brook Dam Water Level Daily Value, Sou...",PI_357693.1,TEN MILE BROOK DAM,0.000029,2.941558e+05,-33.963056,115.125976,"POLYGON ((115.12337 -33.95985, 115.12344 -33.9..."
1,2km N Warringee Pt,A4261155,LAKE ALBERT,0.018549,1.864490e+08,-35.618794,139.283804,"POLYGON ((139.25587 -35.63729, 139.25642 -35.6..."
2,3km W Pt McLeay,A4261156,LAKE ALEXANDRINA,0.061854,6.230347e+08,-35.449258,139.149024,"POLYGON ((139.36075 -35.36765, 139.36370 -35.3..."
3,ARTHURS LAKE - AT PUMP STATION,418.1,ARTHURS LAKE,0.006971,6.417353e+07,-41.980583,146.923891,"POLYGON ((146.90975 -41.93936, 146.90994 -41.9..."
4,AUGUSTA LAKE - AT INTAKE,155.1,LAKE AUGUSTA,0.001265,1.166882e+07,-41.853706,146.530548,"POLYGON ((146.55101 -41.83153, 146.55113 -41.8..."
...,...,...,...,...,...,...,...,...
345,YAN YEAN RES HG,229406A,YAN YEAN RESERVOIR,0.000502,4.923370e+06,-37.555279,145.137892,"POLYGON ((145.14773 -37.55130, 145.14775 -37.5..."
346,YARRA RIV-UY RES HG,229102A,UPPER YARRA RESERVOIR,0.000747,7.311508e+06,-37.687427,145.926259,"POLYGON ((145.89486 -37.67682, 145.89512 -37.6..."
347,Yabba Ck at Borumba Dam HW,138112A,LAKE BORUMBA,0.000388,4.283537e+06,-26.520656,152.559265,"POLYGON ((152.53380 -26.51082, 152.53482 -26.5..."
348,Yarrawonga Weir,sp-o10814,LAKE MULWALA,0.003668,3.669286e+07,-36.000460,146.071580,"POLYGON ((146.04320 -35.96721, 146.04335 -35.9..."


## Open the named reservoirs dataset
See the notebook called 'Making the Reservoirs dataset' to see how I got to this point. Basically, it's made from the national surface water layer that you can download for free on the Digital Earth Australia website. And then in ArcGIS I extracted the polygons that were labelled 'Reservoir', made a shapefile out of it and then put it on this sandbox account, deleted a few columns, deleted any reservoirs that didn't have a name attached to them and obtained the centroid of the polygons to get the lat and lon columns. 

In [None]:
gdf = gpd.read_file('Reservoirs/Named_Reservoirs/Named_Reservoirs.shp')

#zip the lat and lon up into a tuple. We need this later to match the gauge to its reservoir
gdf['location'] = list(zip(gdf.lat, gdf.lon)) 
gdf = gdf[['NAME', 'SHAPE_Area', 'Area_calc', 'lat', 'lon', 'location', 'geometry']]
gdf

## See if the gauges are on the reservoirs
This is a visual check to see how many of the gauges match up to the reservoirs, ie are in the same place. I have only taken gauges that were labelled 'storage level' by BOM, but there is another class of gauges called 'depth' or something which if we included those, might get the rest of the reservoirs that don't have gauges in the 'water storage' category but do have 'depth' gauges. 

In [None]:
#Converting the data to json
data = json.loads(gdf.to_json())
st_data = json.loads(st_gdf.to_json())

map = ipyl.Map(center=[-28, 148], zoom=7)

label = ipyw.Label(layout=ipyw.Layout(width='100%'))

#This part draws the polygons for the reservoirs
for feature in data['features']:
    feature['properties']['style'] = {
        'color': 'grey',
        'weight': 1,
        'fillColor': 'grey',
        'fillOpacity': 0.5
    }
layer = ipyl.GeoJSON(data=data, hover_style={'fillColor': 'red'})

def click_handler(event=None, feature=None, id=None, properties=None):
    label.value = str(properties['NAME'])
    
    
layer.on_hover(click_handler)
map.add_layer(layer) #add the polygons to the map

#This part is the markers for the gauges
features = st_data['features']
#I got this off the internet for adding each gauge location
for i in range(len(features)):
    location=(features[i]['geometry']['coordinates'][1],features[i]['geometry']['coordinates'][0])

    marker = Marker(location=location)

    map.add_layer(marker)
    
map


ipyw.VBox([map, label])

## Merge the gauge data with the reservoir data by location
I want to add the gauge name and gauge location for each reservoir. We will probably do this with a spatial join. I don't know how to code that so I'm just going to save these 2 files out and do it in ArcGIS.  

## Save the files out so you can do a spatial join in ArcGIS

In [None]:
polys = gdf.drop(columns = ['location'])
points = st_gdf.drop(columns = ['location'])

#you want the points to have the same crs as the reservoir (GDA94). 
#I don't actually know if the points data is GDA94 when it comes straight off the BoM website, but let's just assume it is. 
points.crs = polys.crs

#output = 'polys'
#polys.to_file(output)

output = 'points_with_ID'
points.to_file(output)