# HL01 - export groundwater data

My suggestion would be to extract all the data from water level measurements in your area and then to do a series of filters and smoothing to come up with a reasonable set of boundary conditions for your geological timescale. I assume this includes a boundary for the GAB, which is adjacent to your area.
The filters and smoothers I would suggest would be:
Discard any data post 2000, I would be worried that the extractions would be to much influencing your water levels, but we can do some statistics on this
Calculate long term averages from any timeseries data, and extract single observations
Create a spatial map and run some sort of smoothing algorithm (Spatial block averaging?) to create a reasonably consistent map.

1. Get a data dump from the groundwater explorer (http://www.bom.gov.au/water/groundwater/explorer/map.shtml) for NSW and Qld,
2. Then subset to your geographical area using a mask,
3. Discard any bores without water level data
4. Get all the bore id’s and feed these into “real time data” and The Qld equivalent and get all the waterlevel data (I haven’t yet figured out how to data dump that out of the BOM system).
5. Then do the filtering and smoothing

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import cartopy.crs as ccrs
import cartopy.io.shapereader as shpreader
import h5py
import netCDF4
import pycurious
import csv
import shapefile
%matplotlib inline

extent_globe = [-180,180,-90,90]
extent_australia = [112, 155, -44, -10]

## Elevation

In [4]:
def read_bore_shapefile(shape_filename):
    """
    Read borehole information from NGIS shapefile
    
    Arguments
    ---------
    shape_filename : str
        file path of the NGIS shapefile
    
    Returns
    -------
    boreID : array shape(n,)
        unique borehole identifier
    lonlat : array shape(n,2)
        longitudinal / latitudinal coordinates
    coords : array shape(n,2)
        eastings / northings in local projected coordinates
    proj : array shape(n,)
        local projection number
    elevation : array shape(n,)
        elevation above sea level to well casing
    """
    shp = shpreader.Reader(shape_filename)

    n_entries = len(shp)
    coords_lonlat = np.empty((n_entries,2))
    coords_proj   = np.empty((n_entries,2))
    proj          = np.empty(n_entries, dtype=np.int)
    elevation     = np.empty(n_entries)
    hydroID       = np.empty(n_entries, dtype=np.int)
    is_hydro      = np.empty(n_entries, dtype=bool)
    drilled_depth = np.empty(n_entries)

    i = 0
    for record in shp.records():
        hydroID[i]       = record.attributes['HydroID']
        proj[i]          = record.attributes['Projecti_1']
        coords_lonlat[i] = record.attributes['Longitude'], record.attributes['Latitude']
        coords_proj[i]   = record.attributes['Easting'], record.attributes['Northing']
        elevation[i]     = record.attributes['RefElev']
        drilled_depth[i] = record.attributes['DrilledDep']
        is_hydro[i]      = record.attributes['WaterCount']
        i += 1

    shp.close()
    mask = np.logical_and(is_hydro, drilled_depth > 0)
    return hydroID[mask], coords_lonlat[mask], coords_proj[mask], proj[mask], elevation[mask]

def read_water_levels(levels_filename, boreID, min_date=None, max_date=None):
    """
    Read water level information from levels.csv for given boreID
    
    Arguments
    ---------
    levels_filename : str
        path to levels.csv file
    boreID : array shape(n,)
        unique borehole identifier
    min_date : datetime
        include entries greather than or equal to this datetime
    max_date : datetime
        include entries less than this datetime
    
    Returns
    -------
    levels : array shape(n,)
        mean water level in each borehole corresponding to their boreID
    levels_std : array shape(n,)
        standard deviation of water level in each borehole
    """
    bID, level = np.loadtxt(levels_filename, delimiter=',', usecols=(0,5), skiprows=1, unpack=True)
    date = np.loadtxt(levels_filename, delimiter=',', usecols=(3,), skiprows=1, unpack=True, dtype=np.datetime64)
    bID = bID.astype(np.int)

    if min_date is None and max_date is None:
        pass
    else:    
        # create a range if min_date or max_date is not NoneType
        if min_date is None:
            min_date = np.datetime64('0')
        if max_date is None:
            max_date = np.datetime64('3000')
        
        # filter data to within date range
        mask_date = np.logical_and(date >= min_date, date < max_date)
        bID = bID[mask_date]
        level = level[mask_date]

    mean_std_levels = np.empty((len(boreID), 2))

    for i, ID in enumerate(boreID):
        mask_ID = bID == ID
        if mask_ID.any():
            level_ID = level[mask_ID]
            mean_std_levels[i] = level_ID.mean(), np.std(level_ID)
        else:
            mean_std_levels[i] = np.nan
            
    return tuple(mean_std_levels.T)


In [5]:
states  =  ["NSW", "VIC", "ACT", "SA", "QLD", "TAS", "NT", "WA"]
shapefilename = "../Data/gw_shp_{0}/shp_{0}/NGIS_Bore.shp"
levelfilename = "../Data/gw_shp_{0}/shp_{0}/level_{0}.csv"


gw_data = []
for state in states:
    sf_state = read_bore_shapefile(shapefilename.format(state))
    gw_level = read_water_levels(levelfilename.format(state), sf_state[0], None, np.datetime64('2000'))
    
    gw_state = np.column_stack([np.c_[sf_state], np.c_[gw_level]])
    gw_state = gw_state[~np.isnan(gw_level[0])] # mask out no entries
    
    gw_data.append(gw_state)

# concatenate data
gw_data = np.vstack(gw_data)
gw_ID         = gw_data[:,0]
gw_lonlat     = gw_data[:,1:3]
gw_coords     = gw_data[:,3:5]
gw_proj       = gw_data[:,5]
gw_elevation  = gw_data[:,6]
gw_level      = gw_data[:,7]
gw_level_std  = gw_data[:,8]

In [6]:
# save data

import pandas as pd

columns = ['ID','lon','lat','easting','northing','projection','elevation','gw_level', 'gw_level_std']

df = pd.DataFrame(gw_data, columns=columns)
df.to_csv('../Data/NGIS_groundwater_levels_AUS.csv')

In [8]:
df.shape

(170333, 9)