# Objective

Provide a map of current and historic gaging stations and plot data for a selected station. The individual sites will be shown as blue markers once you zoom into the area of intest. Hovering over a marker provides the station name and USGS site number. Clicking on the marker will plot available discharge and sediment concentration data. Note that for now the data is being pulled from the sediment portal, which doesn't have all the station data. Thus, some stations may not plot any information even though the USGS site information shows the site as having data available.

In [1]:
import matplotlib
%matplotlib widget
from matplotlib import pyplot as plt
import matplotlib.dates as mdates
from ipyleaflet import Map, Marker, MarkerCluster, WMSLayer, Unicode, basemaps
from ipysheet import from_dataframe
import numpy as np
import ipywidgets as widgets
wms = WMSLayer(
    url='https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryTopo/MapServer/tile/{z}/{y}/{x}',
    attribution='US Geological Survey'
)

m = Map(basemap=basemaps.CartoDB.Positron, center=(30.661, -88.645), zoom=5)
m.add_layer(wms)

In [2]:
fig,(daily_flow_plot, inst_flow_plot, ssc_plot) = plt.subplots(3,1, figsize=(12, 8),constrained_layout=True);
fig_hist,(flow_hist, flow_hist_log) = plt.subplots(1,2, figsize=(12, 8),constrained_layout=True);
fig_rating_curve, rating_curve = plt.subplots(1,1, figsize=(12, 8),constrained_layout=True);
fig_eff_discharge, (eff_discharge, eff_discharge_log) = plt.subplots(2,1,figsize=(12,8),constrained_layout=True);
cum_discharge = eff_discharge.twinx()  # instantiate a second axes that shares the same x-axis
cum_discharge_log = eff_discharge_log.twinx()  # instantiate a second axes that shares the same x-axis

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [3]:
children = [m,
            fig.canvas,
            fig_hist.canvas, 
            fig_rating_curve.canvas, 
            fig_eff_discharge.canvas,
            widgets.VBox(),
            widgets.VBox()]
tab = widgets.Accordion(children=children)
DISCRETE_TAB=5
DAILY_TAB=6
tab.set_title(0,'Map')
tab.set_title(1,'Data Plots')
tab.set_title(2,'Flow Frequency')
tab.set_title(3,'Rating Curve')
tab.set_title(4,'Effective Discharge')
tab.set_title(DISCRETE_TAB,'Discrete Data')
tab.set_title(DAILY_TAB,'Daily Data')
tab

Accordion(children=(Map(center=[30.661, -88.645], controls=(ZoomControl(options=['position', 'zoom_in_text', '…

In [4]:
import requests,pandas,io
state_data=[]
for state in ['la','ms','ar','al','tn','mo']:
    resp = requests.post('https://waterservices.usgs.gov/nwis/site/?format=rdb&stateCd={0}&siteType=ST&siteStatus=active&hasDataTypeCd=iv'.format(state))
    ncomments=0
    for line in resp.text.split('\n'):
        if '#' in line:
            ncomments+=1
        else:
            break
    state_data.append(pandas.read_table(io.BytesIO(resp._content),comment='#',skiprows=[ncomments+1]))
f = pandas.concat(state_data, axis=0)
#f.head()

In [5]:
def plot_station_daily(site):
    site_no=site[0]
    fig.suptitle(site[1])
    fig_hist.suptitle(site[1])
    fig_rating_curve.suptitle(site[1])
    fig_eff_discharge.suptitle(site[1])
    daily_flow_plot.clear()
    inst_flow_plot.clear()
    ssc_plot.clear()
    flow_hist.clear()
    flow_hist_log.clear()
    rating_curve.clear()
    eff_discharge.clear()
    eff_discharge_log.clear()
    cum_discharge_log.clear()
    cum_discharge.clear()
    import re
    import requests 
    import xml.etree.ElementTree as ET
    from zipfile import ZipFile
    from io import BytesIO
    import numpy as np
    import math
    global station_zipfile
    # defining the api-endpoint  
    URL = "https://cida.usgs.gov/"
    API_ENDPOINT = "https://cida.usgs.gov/sediment/data"
    ogcfilter = ET.Element("ogc:Filter",attrib={"xmlns:ogc":"http://www.opengis.net/ogc"})
    property_is = ET.SubElement(ogcfilter, "ogc:PropertyIsEqualTo", attrib={"matchCase":"true"})
    ET.SubElement(property_is, "ogc:PropertyName").text = "SITE_NO"
    ET.SubElement(property_is, "ogc:Literal").text = '{0:08d}'.format(site_no)
    ogcfilter_string = ET.tostring(ogcfilter).decode()  
    data = {'email' : 'cekees@gmail.com',
            'format': 'tsv',
            'directDownload':'true',
            'dataTypes': 'sites_data_daily_discrete_',
            'dailyFilter' : ogcfilter_string,
            'discreteFilter' : ogcfilter_string
           }
    # sending post request and saving response as response object 
    search = requests.post(url = API_ENDPOINT, data = data)  
    download = requests.get(url=URL+search.text)
    station_zipfile = ZipFile(BytesIO(download._content))
    def get_comments(filename):
        ncomments=0
        comments=''
        with station_zipfile.open(filename) as f:
            commentline = f.readline().decode()
            while commentline:
                if commentline[0] == '#' or commentline[:2] == r'"#':
                    ncomments +=1
                    comments += commentline.strip(r'"').strip('#')
                    commentline=f.readline().decode()
                else:
                    commentline=None
        return (ncomments,comments)   
    if station_zipfile.filelist:
        has_daily_file=False
        if 'daily_data.tsv' in station_zipfile.namelist():
            has_daily_file=True
            (ncomments, comments) = get_comments('daily_data.tsv')
            f = pandas.read_table(station_zipfile.open('daily_data.tsv'),header=ncomments)
            f.datetime = pandas.to_datetime(f.datetime)
            tab.children[DAILY_TAB].children = [from_dataframe(f)]
            #print(f.head())
            if 'DAILY_FLOW' in f.columns:
                daily_flow_plot.plot(f.datetime,f.DAILY_FLOW)
                daily_flow_plot.set_ylabel('Discharge [cfs]')
                daily_flow_plot.set_xlabel('Date')
                daily_flow_plot.set_title('Daily Flow')
                daily_flow = f.DAILY_FLOW
        if 'discrete_data.tsv' in station_zipfile.namelist():
            (ncomments, comments) = get_comments('discrete_data.tsv')
            f = pandas.read_table(station_zipfile.open('discrete_data.tsv'),header=ncomments)
            tab.children[DISCRETE_TAB].children = [from_dataframe(f)]
            #for col in f.columns: 
            #    print(col) 
            f.DATETIME = pandas.to_datetime(f.DATETIME)
            #print(f.head())
            if not has_daily_file and 'DAILYFLOW' in f.columns:
                daily_flow_plot.plot(f.DATETIME, f.DAILYFLOW)
                daily_flow_plot.set_ylabel('Discharge [cfs]')
                daily_flow_plot.set_xlabel('Date')
                daily_flow_plot.set_title('Daily Flow (from discrete file)')
            isfinite = np.isfinite(f.DAILYFLOW)
            daily_flow_inst_file = f.DAILYFLOW[isfinite]
            if 'DATETIME' in f.columns and 'INSTFLOW' in f.columns:
                inst_flow_plot.plot(f.DATETIME,f.INSTFLOW)
                inst_flow_plot.set_title("Discrete Discharge")
                inst_flow_plot.set_xlabel('Date')
                inst_flow_plot.set_ylabel('Discharge [cfs]')
            if 'SSC' in f.columns and 'P63' in f.columns and 'INSTFLOW' in f.columns and 'DAILYFLOW' in f.columns:
                flow_sand=[]
                p63_conc=[]
                ssc=[]
                sand=[]
                for i, (insti, dailyi, p63i, ssci) in enumerate(zip(f.INSTFLOW, f.DAILYFLOW, f.P63, f.SSC)):
                    hasData=True
                    insti = float(insti)
                    dailyi = float(dailyi)
                    if type(p63i) is str:
                        p63i = float(re.sub('[^\d*\.?\d]+','', p63i))
                    else:
                        p63i = float(p63i)
                    if type(ssci) is str:
                        ssci = float(re.sub('[^\d*\.?\d]+','', ssci))
                    else:
                        ssci = float(ssci)
                    if math.isfinite(ssci) and math.isfinite(p63i) and (math.isfinite(insti) or math.isfinite(dailyi)):
                        ssc.append(ssci)
                        p63_conc.append(ssci*(p63i/100.0))
                        sandi = ssci*(1.0 - p63i/100.0)
                        sand.append(sandi)
                        if math.isfinite(dailyi):
                            flow_sand.append(dailyi)
                        else:
                            flow_sand.append(insti)              
                flow_sand = np.array(flow_sand)
                sand = np.array(sand)
                p63_con = np.array(p63_conc)
                ssc = np.array(ssc)
                ssc_plot.scatter(flow_sand,ssc,label='SSC')
                ssc_plot.scatter(flow_sand,p63_con, label='Silt')
                ssc_plot.scatter(flow_sand,sand, label='Sand')
                ssc_plot.set_xlabel('Discharge [cfs]')
                ssc_plot.set_ylabel('Sediment [mg/l]')
                ssc_plot.set_title("Discrete Sediment Data")
                ssc_plot.legend()
                daily_flow = flow_sand
                (n,bins,patches) = flow_hist.hist(daily_flow,bins=25)
                flow_hist.set_ylabel('Incidence')
                flow_hist.set_xlabel('Discharge [cfs]');
                flow_hist.set_title("Flow Frequency");
                annual_frequency = 365.0*n/n.sum()
                rep_discharge = 0.5*(bins[0:-1] + bins[1:])
                log_daily_flow = np.log(daily_flow)
                (n_log,bins_log,patches_log) = flow_hist_log.hist(log_daily_flow,bins=25)
                flow_hist_log.set_ylabel('Incidence')
                flow_hist_log.set_xlabel(r'Log Discharge [log(cfs)]');
                flow_hist_log.set_title("Flow Frequency");
                annual_frequency_log = 365.0*n_log/n_log.sum()
                discharge_bins = np.exp(bins_log)
                rep_discharge_log = 0.5*(discharge_bins[0:-1] + discharge_bins[1:])
                from scipy import stats
                K=.0027 #conversion factor to get Q_s from discharge and concentration in mg/l
                q_sand = flow_sand*sand*K
                sand_indeces = np.where(sand > 0.0)
                log_flow_sand = np.log(flow_sand[sand_indeces])
                log_sand = np.log(sand[sand_indeces])
                sand_pow = stats.linregress(x=log_flow_sand,y=log_sand)
                pow = pandas.DataFrame([sand_pow], 
                       columns=["slopes", 
                          "intercepts", 
                          "r_value", 
                          "p_value", 
                          "std err"],
                       index=['Sand'])
                import math
                rating_curve.scatter(log_flow_sand,log_sand,c='b')
                rating_curve.plot(log_flow_sand,sand_pow[1]+sand_pow[0]*log_flow_sand,'b')
                rating_curve.set_title('Rating Curve')
                rating_curve.set_xlabel(r'Discharge [ln(cfs)]')
                rating_curve.set_ylabel(r'Sediment Concentration [ln(tons/day)]');
                cumulative_load=[0.0]
                discharge=[0.0]
                for q,fr in zip(rep_discharge,annual_frequency):
                    discharge.append(q)
                    cumulative_load.append(cumulative_load[-1]+fr*math.exp(math.log(q)*sand_pow[0]+sand_pow[1]))
                percent_cumulative_load = [l/cumulative_load[-1] for l in cumulative_load]
                
                eff_discharge.set_xlabel('Discharge[cfs]')
                eff_discharge.set_ylabel('Sand Load [tons/year]')
                eff_discharge.plot(rep_discharge, annual_frequency*(np.exp(np.log(rep_discharge)*sand_pow[0]+sand_pow[1])),color='tab:red')
                eff_discharge.tick_params(axis='y', labelcolor='tab:red')
                
                cum_discharge.set_ylabel('Cummulative Percent Sand Load', color='tab:blue')  # we already handled the x-label with ax1
                cum_discharge.plot(discharge,percent_cumulative_load)
                cum_discharge.tick_params(axis='y', labelcolor='tab:blue')
                eff_discharge.set_title("Effective Discharge")
                
                cumulative_load_log=[0.0]
                discharge_log=[0.0]
                for q,fr in zip(rep_discharge_log,annual_frequency_log):
                    discharge_log.append(q)
                    cumulative_load_log.append(cumulative_load_log[-1]+fr*math.exp(math.log(q)*sand_pow[0]+sand_pow[1]))
                percent_cumulative_load_log = [l/cumulative_load_log[-1] for l in cumulative_load_log]
                
                eff_discharge_log.set_xlabel('Discharge[cfs]')
                eff_discharge_log.set_ylabel('Sand Load [tons/year]')
                eff_discharge_log.plot(rep_discharge_log, annual_frequency_log*(np.exp(np.log(rep_discharge_log)*sand_pow[0]+sand_pow[1])),color='tab:red')
                eff_discharge_log.tick_params(axis='y', labelcolor='tab:red')
                
                cum_discharge_log.set_ylabel('Cummulative Percent Sand Load', color='tab:blue')  # we already handled the x-label with ax1
                cum_discharge_log.plot(discharge_log,percent_cumulative_load_log)
                cum_discharge_log.tick_params(axis='y', labelcolor='tab:blue')
                eff_discharge_log.set_title("Effective Discharge")
    else:
        print(station_zipfile.filelist,station_zipfile)
        fig.suptitle("INSUFFICIENT DATA AT {0}".format(site[1]))
        fig_hist.suptitle("INSUFFICIENT DATA AT {0}".format(site[1]))
        fig_rating_curve.suptitle("INSUFFICIENT DATA AT {0}".format(site[1]))
        fig_eff_discharge.suptitle("INSUFFICIENT DATA AT {0}".format(site[1]))
        tab.children[DISCRETE_TAB].children = []
        tab.children[DAILY_TAB].children = []

In [6]:
stations = []
site_dict={}

def station_on_click_handler(event=None, id=None, properties=None, **args):
    plot_station_daily(site_dict[tuple(args['coordinates'])])

for site_no, station_nm, lat, long in zip(f.site_no,f.station_nm,f.dec_lat_va, f.dec_long_va):

    stations.append(Marker(location=(float(lat),float(long)), 
                           draggable=False,
                           title="Site: {0}\nStation: {1}".format(site_no,station_nm)))
    site_dict[tuple(stations[-1].location)] = (site_no, station_nm)
    stations[-1].on_click(station_on_click_handler)

In [7]:
marker_cluster = MarkerCluster(
    markers=tuple(stations)
)

m.add_layer(marker_cluster);