In [27]:
import pandas as pd
import geopandas as gpd
from datetime import date, datetime, timedelta
from influxdb_client import InfluxDBClient, Point, WritePrecision, WriteOptions
from influxdb_client.client.write_api import SYNCHRONOUS
import json
from shapely.geometry import box, mapping
from geocube.api.core import make_geocube
from geocube.rasterize import rasterize_points_griddata
from functools import partial
from pathlib import Path
import boto3
from pprint import pprint
from pathlib import Path
from dotenv import load_dotenv
import os

# Avoid unnecessary warning about pivot function (not applicable to format)
import warnings
from influxdb_client.client.warnings import MissingPivotFunction
warnings.simplefilter("ignore", MissingPivotFunction)

load_dotenv()

True

In [28]:
##### Get WSE source data using InfluxDB client, return as pandas dataframe
# Set some default time variables for reference
start_time = '2022-06-01 00:00:00' # Modifiable
start_day = pd.Timestamp(start_time).strftime('%Y-%m-%d')
today = date.today()
yesterday = today - timedelta(days = 1)
now = datetime.now()
#end_time = end_day = yesterday.strftime('%Y-%m-%d')
end_time = end_day = '2022-09-01'

# Do not modify these settings
token = "ZmsU7WoVdvs45GVB2jNeBzDwJFjza4ZfkQple4FaRqHq8sjxDPnP5kVWOFnVygjbQPY1H_SUcXCU4xX-rn692Q=="
org = "TWI"
bucket = "riverGagesSubset"
client=InfluxDBClient(url="http://10.3.10.19:8086", token=token, org=org,debug=False,enable_gzip=True)
query_api = client.query_api()

# WSE values aggregated over 3h period beginning 00:00z, output references the end of each period  
# e.g. '2022-09-01 03:00:00+00:00' represents mean values for selected date between 00:00z and 03:00z
query = f'''from(bucket: "riverGagesSubset")
|> range(start: {start_day}T00:00:00Z, stop: {end_day}T23:59:00Z)
|> filter(fn: (r) => r["_measurement"] == "Stage_Ft")
|> aggregateWindow(every: 3h, fn: mean, createEmpty: false)
|> group()'''

# Import into df, drop and rename columns. 
wse_df = query_api.query_data_frame(org=org, query=query)
wse_df = wse_df.drop(columns=["_start","result","table","_field","_stop","_measurement"])
wse_df = wse_df.rename(columns={"_value": "z", "_time": "time", "Longitude": "lon_g", "Latitude": "lat_g", "Station": "sid", "river_mile": "mile"})

# Modify selected field dtypes to simplify downstream operations
wse_df[['mile', 'lat_g', 'lon_g']] = wse_df[['mile', 'lat_g', 'lon_g']].apply(pd.to_numeric)
wse_df['time'] = pd.to_datetime(wse_df['time'],utc=True)
wse_df = wse_df.sort_values('time')


In [29]:
wse_df

Unnamed: 0,time,z,lat_g,lon_g,sid,mile
0,2022-06-01 03:00:00+00:00,0.086360,28.93231,-89.4071,rg_01670,17.9
14496,2022-06-01 03:00:00+00:00,18.608853,31.54402,-91.4334,rg_CE4103F4,363.3
10119,2022-06-01 03:00:00+00:00,4.560824,30.05442,-90.5687,rg_01260,138.7
6608,2022-06-01 03:00:00+00:00,3.028696,29.91004,-90.0841,rg_01320,98.3
5882,2022-06-01 03:00:00+00:00,1.799336,29.68454,-89.9699,rg_01390,62.5
...,...,...,...,...,...,...
15065,2022-09-01 23:59:00+00:00,11.763045,31.54402,-91.4334,rg_CE4103F4,363.3
14495,2022-09-01 23:59:00+00:00,8.645144,31.07361,-91.5819,rg_01080,313.7
13756,2022-09-01 23:59:00+00:00,7.990840,30.96083,-91.6644,rg_01120,302.4
2204,2022-09-01 23:59:00+00:00,0.335280,29.05638,-89.3086,rg_01575,7.5


In [30]:
# Use the time field of the queried data to build a list to iterate the wse interpolation from
wse_period_range = wse_df['time'].drop_duplicates().values.tolist()

# Convert POSIX time to datetime then string
def format_time(time):
    format = pd.to_datetime(time)
    return format.strftime('%Y-%m-%d %H:%M:%S')


wse_period_list = list((map(lambda t: format_time(t), wse_period_range)))


In [31]:
# Function to interpolate missing WSE at predetermined RM
def wse_interp(time_query):
        
    # Pass period selection to create subset dataframe 
    period_select = str(time_query)
    wse_slice_df = wse_df.loc[wse_df['time'] == period_select]
    
    # Remove gage at Cape Giradeau to fix conflict with river mile join
    bad_gage = 'rg_CE401278'
    wse_slice_df = wse_slice_df.loc[wse_slice_df['sid'] != bad_gage]
    
    # Import river mile .geojson into gdf and drop matching columns to simplify following join
    url = 'https://raw.githubusercontent.com/hbienn/smartport_wse/main/'
    rm_formatted = f'{url}/mr_rm.geojson'
    rm_gdf = gpd.read_file(rm_formatted, crs='epsg:4326')
    rm_gdf = rm_gdf.drop(columns=['OBJECTID', 'ord', 'sid', 'wse', 'time', 'lat_g', 'lon_g'])
    
    # Round river mile to 1 decimal place to account for any floating precision errors
    rm_gdf = rm_gdf.round({'mile':1})
    rm_gdf = rm_gdf.sort_values('mile')
    rm_gdf = rm_gdf.reindex()
    
    # Merge WSE df with river mile gdf using mile as key
    wse_gdf = rm_gdf.merge(wse_slice_df, how='outer', on='mile')
    
    # Reorder columns for obsessive compulsive reasons
    cols = wse_gdf.columns.tolist()
    cols = ['mile', 'sid', 'z', 'time','lon', 'lat', 'lat_g', 'lon_g', 'geometry']
    wse_gdf = wse_gdf[cols]
    wse_gdf = wse_gdf.sort_values('mile')
    
    # Add column for period and calculate it as max of ['time'] and convert to integer. 
    # Establishes a time dimension for each point that is persistent through the downstream interpolation.
    period = pd.to_datetime(time_query)
    year = period.strftime("%Y")
    day_of_year = period.strftime("%j")
    period = int(round(period.timestamp())*1000000000)
    wse_gdf.insert(4,'period', period)
    
    # Subset gdf to limit spatial domain to south of RM 1000 in the vicinity of Cape Giradeau, MO
    wse_gdf = wse_gdf.loc[wse_gdf['mile'] <= 1000]
    
    # Interpolates missing WSE values based on a linear relationship between river mile and known WSE values.
    wse_gdf = wse_gdf.dissolve(by='mile', aggfunc='mean')
    wse_gdf = wse_gdf.sort_values('mile')
    wse_gdf['z'] = wse_gdf['z'].interpolate(method='linear', limit_direction = 'both')
    
    # User modified variables
    bounding_box = json.dumps(mapping(box(-92,28.5,-89,31.5))) # Not implemented 
    projection = 'EPSG:26915'
    resolution = r = 500
    
    # Still issues here with getting make_geocube to recognize time field and assign it correct dtype (datetime64[ns]). 
    # Potentially results from use of a timezone-aware dtype, workaround implemented.
    wse_xr = make_geocube(
                        vector_data = wse_gdf,
                        measurements = ['z',],
                        #datetime_measurements=['period'],
                        output_crs = projection,
                        resolution = (r, r),
                        #geom = bounding_box,
                        #interpolate_na_method='linear'
                        rasterize_function=partial(rasterize_points_griddata, method='linear', filter_nan = True)
                        )
    # Expand dimensions and populate with the POSIX time value variable previously assigned 
    period = int(wse_gdf['period'].mean())
    wse_xr = wse_xr.expand_dims('time')
    arr = wse_xr['time'].to_numpy()
    arr[0,] = period
    wse_xr['time'] = arr
    wse_xr['time'] = pd.to_datetime(wse_xr['time'],utc=True)
    period_label = pd.to_datetime(time_query).strftime('%Y%m%dT%H%M')

    # Clip surface to extent of Mississippi River
    url = 'https://raw.githubusercontent.com/hbienn/smartport_wse/main/'
    mr_formatted = f'{url}/generalized_nhdarea_stlouistogulf_utm.geojson'
    mr = gpd.read_file(mr_formatted, crs=projection)
    wse_xr = wse_xr.rio.clip(mr.geometry, mr.crs, drop=True, invert=False)
    
    # Export as .netcdf 
    object_name = f'wse_{resolution}m_{period_label}_y{year}_d{day_of_year}.nc'
    out_path='Z:/wse/'
    file_for_upload = out_path + object_name
    wse_xr.to_netcdf(file_for_upload)
    
    '''
    # Stage for upload to S3
    bucket_name = 'smartport-storage'
    bucket_directory = 'wse/pipeline_test/'
    object_key = bucket_directory + object_name
    s3 = boto3.resource('s3')
    s3.meta.client.upload_file(file_for_upload, bucket_name, object_key)

    # Delete staged .netcdf
    os.remove(file_for_upload)
    '''
    return


In [32]:
for t in wse_period_list:
    if t != wse_period_list[-1]:
        wse_interp(t)
    if t == wse_period_list[-1]:
        break

In [None]:
import xarray as xr
import netCDF4
import h5netcdf
import numpy as np
%matplotlib inline

In [None]:
ds = xr.open_dataset(r'Z:\wse\wse_500m_1621911600000000000_y2021_d145.nc')
ds

In [None]:
plot = ds.z.where(ds.z!=ds.z.rio.nodata).plot()
plot.axes.set_xlabel('Easting (m)')
plot.axes.set_ylabel('Northing (m)')
#plot.axes.set_title(str(pd.to_datetime(period)))
plot.axes.grid(True, linestyle='dotted')
plot.axes.tick_params()
plot.axes.ticklabel_format(axis = 'both', style='sci', scilimits=(0,0))
plot.colorbar.set_label('WSE (NAVD88 m)')
plot.axes.annotate('PCS: NAD83/UTM Zone 15N', xy=(5,5), xycoords='figure pixels',fontsize=7 )
plot.figure.set_dpi(150) # 300 is best for export
#plot.figure.savefig("Z:/Documents/ArcGIS/Projects/Smartport/smartport_wse_utm15n_160m_br2gom.jpg", transparent=False,)