In [4]:

# WSE import and manipulation modules
from dataclasses import dataclass
from datetime import date, datetime, timezone, timedelta
from influxdb_client import InfluxDBClient, Point, WritePrecision, WriteOptions
from influxdb_client.client.write_api import SYNCHRONOUS
import pandas as pd
from cmath import nan
import ipywidgets as widgets
from ipywidgets import interactive, TwoByTwoLayout
import geopandas as gpd
# Avoid unnecessary warning about pivot function (not applicable to format)
import warnings
from influxdb_client.client.warnings import MissingPivotFunction
warnings.simplefilter("ignore", MissingPivotFunction)

# Geocube/xarray modules
import os
from pathlib import Path
import json
from functools import partial
from shapely.geometry import Point, box, mapping
from scipy.interpolate import griddata, interp1d
from scipy import stats
import numpy as np
import xarray as xr
from geocube.api.core import make_geocube
from geocube.rasterize import rasterize_points_griddata, rasterize_points_radial
import h5netcdf

%matplotlib inline



In [11]:
##### Get WSE source data using InfluxDB client, return as pandas dataframe, slice to applicable time period, add missing river mile locations, interpolate, then make geocube/xarray from geodataframe.
# Do not modify these settings
token = "ZmsU7WoVdvs45GVB2jNeBzDwJFjza4ZfkQple4FaRqHq8sjxDPnP5kVWOFnVygjbQPY1H_SUcXCU4xX-rn692Q=="
org = "TWI"
bucket = "riverGagesSubset"
client=InfluxDBClient(url="http://10.3.10.19:8086", token=token, org=org,debug=False,enable_gzip=True)
query_api = client.query_api()
start = '2022-03-01'
today = date.today()
yesterday = str((today - timedelta(days = 1)))

# User modified variables
bounding_box = json.dumps(mapping(box(-92,28.5,-89,31.5))) # Not implemented 
projection = "EPSG:26915"
resolution = r = 160

# Function to interpolate missing WSE at predetermined RM
def wse_interp(time_query):
    # WSE values aggregated over 3h period beginning 00:00z, output references the end of each period  
    # e.g. '2022-09-01 03:00:00+00:00' represents mean values for selected date between 00:00z and 03:00z
    query = f'''from(bucket: "riverGagesSubset")
    |> range(start: {start}T00:00:00Z, stop: {yesterday}T23:59:00Z)
    |> filter(fn: (r) => r["_measurement"] == "Stage_Ft")
    |> aggregateWindow(every: 3h, fn: mean, createEmpty: false)
    |> yield(name: "mean")'''
    
    # Import into df, drop and rename columns. 
    wse_df = query_api.query_data_frame(org=org, query=query)
    wse_df = wse_df.drop(columns=["_start","result","table","_field","_stop","_measurement"])
    wse_df = wse_df.rename(columns={"_value": "z", "_time": "time", "Longitude": "lon_g", "Latitude": "lat_g", "Station": "sid", "river_mile": "mile"})
    
    # Modify selected field dtypes to simplify downstream operations
    wse_df[['mile', 'lat_g', 'lon_g']] = wse_df[['mile', 'lat_g', 'lon_g']].apply(pd.to_numeric)
    wse_df['time'] = pd.to_datetime(wse_df['time'],utc=True)
    
    # Pass period selection to create subset dataframe 
    period_select = str(time_query)
    wse_slice_df = wse_df.loc[wse_df['time'] == period_select]
    
    # Remove gage at Cape Giradeau to fix conflict with river mile join
    bad_gage = "rg_CE401278"
    wse_slice_df = wse_slice_df.loc[wse_slice_df['sid'] != bad_gage]
    
    # Import river mile .geojson into gdf and drop matching columns to simplify following join
    url = "https://raw.githubusercontent.com/hbienn/smartport_wse/main/"
    rm_formatted = f"{url}/mr_rm.geojson"
    rm_gdf = gpd.read_file(rm_formatted, crs="epsg:4326")
    rm_gdf = rm_gdf.drop(columns=['OBJECTID', 'ord', 'sid', 'wse', 'time', 'lat_g', 'lon_g'])
    
    # Round river mile to 1 decimal place to account for any floating precision errors
    rm_gdf = rm_gdf.round({'mile':1})
    rm_gdf = rm_gdf.sort_values('mile')
    rm_gdf = rm_gdf.reindex()
    
    # Merge WSE df with river mile gdf using mile as key
    wse_gdf = rm_gdf.merge(wse_slice_df, how='outer', on='mile')
    
    # Reorder columns for obsessive compulsive reasons
    cols = wse_gdf.columns.tolist()
    cols = ['mile', 'sid', 'z', 'time','lon', 'lat', 'lat_g', 'lon_g', 'geometry']
    wse_gdf = wse_gdf[cols]
    wse_gdf = wse_gdf.sort_values('mile')
    
    # Add column for period and calculate it as max of ['time'] and convert to integer. 
    # Establishes a time dimension for each point that is persistent through the downstream interpolation.
    start_time = wse_gdf['time'].min()
    end_time = wse_gdf['time'].max()
    period = pd.date_range(start=start_time, end=end_time, freq='3H', name='periodIndex')
    period = pd.to_datetime(period.values).view('int64')
    period = period.item()
    wse_gdf.insert(4,'period', period)
    
    # Subset gdf to limit spatial domain to south of RM 1000 in the vicinity of Cape Giradeau, MO
    wse_gdf = wse_gdf.loc[wse_gdf['mile'] <= 1000]
    
    # Interpolates missing WSE values based on a linear relationship between river mile and known WSE values.
    wse_gdf = wse_gdf.dissolve(by='mile', aggfunc='mean')
    wse_gdf = wse_gdf.sort_values('mile')
    wse_gdf['z'] = wse_gdf['z'].interpolate(method='linear', limit_direction = 'both')
    
    
    # Still issues here with getting make_geocube to recognize time field and assign it correct dtype (datetime64[ns]). 
    # Potentially results from use of a timezone-aware dtype, workaround implemented.
    wse_xr = make_geocube(
                        vector_data = wse_gdf,
                        measurements = ['z',],
                        #datetime_measurements=['period'],
                        output_crs = projection,
                        resolution = (r, r),
                        #geom = bounding_box,
                        #interpolate_na_method='linear'
                        rasterize_function=partial(rasterize_points_griddata, method="linear", filter_nan = True)
                        )
    # Expand dimensions and populate with the POSIX time value variable previously assigned 
    period = int(wse_gdf['period'].mean())
    wse_xr = wse_xr.expand_dims('time')
    arr = wse_xr['time'].to_numpy()
    arr[0,] = period
    wse_xr['time'] = arr
    wse_xr['time'] = pd.to_datetime(wse_xr['time'],utc=True)

    # Clip surface to extent of Mississippi River
    url = "https://raw.githubusercontent.com/hbienn/smartport_wse/main/"
    mr_formatted = f"{url}/generalized_nhdarea_stlouistogulf_utm.geojson"
    mr = gpd.read_file(mr_formatted, crs=projection)
    wse_xr = wse_xr.rio.clip(mr.geometry, mr.crs, drop=True, invert=False)
    out_path=str(os.path.join(Path.home(), "Documents") + "/nc_combine/smartport_wse_utm15n_160m_{}.nc".format(period))
    wse_xr.to_netcdf(out_path)
    return


In [12]:
wse_interp('2022-09-01 09:00:00')

AttributeError: 'list' object has no attribute 'drop'