# Explore ERDDAP timeseries data using Jupyter Widgets
Inspired by [Jason Grout's excellent ESIP Tech Dive talk on "Jupyter Widgets"](https://youtu.be/CVcrTRQkTxo?t=2596), this notebook uses the `ipyleaflet` and `bqplot` widgets
to interactively explore the last two weeks of time series data from an ERDDAP Server. Select a `standard_name` from the list, then click a station to see the time series.  

In [1]:
import numpy as np
import pandas as pd

In [2]:
import pendulum

`ipyleaflet` and `bqplot` are both Jupyter widgets, so can interact with Python like any other widget.  Since we want to click on a map in a notebook and get an interactive time series plot, they are perfect tools to use here. 

In [3]:
import bqplot as bq
import ipyleaflet as ipyl
import ipywidgets as ipyw

To make working with ERDDAP simpler, we use `erddapy`, a high-level python interface to ERDDAP's RESTful API

In [4]:
from erddapy import ERDDAP
from erddapy.url_handling import urlopen

`quote` makes the url more readable for the user

In [5]:
import re
from urllib.parse import quote

from requests import HTTPError
from erddap_app.config import servers

This code should work with minor modifications on any ERDDAP (v1.64+) endpoint that has `cdm_data_type=timeseries` or `cdm_data_type=point` datasets.  Change the values for other ERDDAP endpoints or regions of interest

This function puts lon,lat and datasetID into a GeoJSON feature

In [6]:
def point(dataset, lon, lat, nchar):
    geojsonFeature = {
        "type": "Feature",
        "properties": {"datasetID": dataset, "short_dataset_name": dataset[:nchar]},
        "geometry": {"type": "Point", "coordinates": [lon, lat]},
    }
    geojsonFeature["properties"]["style"] = {"color": "Grey"}
    return geojsonFeature

This function finds all the datasets with a given standard_name in the specified time period, and return GeoJSON

In [7]:
def search_datasets(e, standard_name, cdm_data_type, min_time, max_time, skip_datasets):

    search_url = e.get_search_url(
        response="csv",
        cdm_data_type=cdm_data_type.lower(),
        items_per_page=100000,
        standard_name=standard_name,
        min_time=min_time,
        max_time=max_time,
    )
    try:
        df = pd.read_csv(urlopen(search_url))

        for skip_dataset in skip_datasets:
            try:
                row = df.loc[df["Dataset ID"] == skip_dataset].index[0]
                df.drop(row, inplace=True)
            except IndexError:
                continue

    except HTTPError:
        df = []
        if len(var) > 14:
            v = f"{standard_name[:15]}..."
        else:
            v = standard_name
        figure.title = f"No {v} found in this time range. Pick another variable."
        figure.marks[0].y = 0.0 * figure.marks[0].y
    return df

This function returns the lon,lat values from all datasets

In [8]:
def all_datasets_locations(e, cdm_data_type, min_time, max_time):
    url_dset = (
        f"{e.server}"
        "/tabledap/allDatasets.csv?"
        "datasetID,minLongitude,minLatitude&"
        f'cdm_data_type="{cdm_data_type}"'
        f"&minTime<={max_time.to_datetime_string()}"
        f"&maxTime>={min_time.to_datetime_string()}"
    )

    url_dataset = quote(url_dset, safe=":/?&= ")
    del url_dset
    df = pd.read_csv(urlopen(url_dataset), skiprows=[1])
    return df

This function returns GeoJSON containing lon, lat and dataset ID for all matching stations

In [9]:
def stdname2geojson(e, standard_name, cdm_data_type, min_time, max_time, skip_datasets):

    dfsd = search_datasets(
        e, standard_name, cdm_data_type, min_time, max_time, skip_datasets
    )
    if not dfsd.empty:
        datasets = dfsd["Dataset ID"].values

        dfad = all_datasets_locations(e, cdm_data_type, min_time, max_time)
        df = dfad[dfad["datasetID"].isin(dfsd["Dataset ID"])]
        geojson = {
            "features": [point(row[1], row[2], row[3], 3) for row in df.itertuples()]
        }
    else:
        geojson = {"features": []}
        datasets = []
    return geojson, datasets

The `map_click_handler` function updates the time series plot when a station marker is clicked

In [10]:
def map_click_handler(event=None, id=None, properties=None, feature=None):

    dataset_id = properties["datasetID"]

    min_time = pendulum.parse(widget_search_min_time.value)
    max_time = pendulum.parse(widget_search_max_time.value)
    constraints = {"time>=": min_time, "time<=": max_time}

    standard_name = widget_std_names.value
    widget_dsnames.value = dataset_id
    
    try:
        update_timeseries_plot(e,
            dataset=dataset_id, standard_name=standard_name, constraints=constraints
        )
    except HTTPError:
        print(
            "No", standard_name, "data for this station. Please choose another station."
        )

The `widget_replot_button_handler` function updates the time series plot when the `Update TimeSeries` button is selected 

In [11]:
def widget_replot_button_handler(change):
    
    plot_start_time = pendulum.parse(widget_plot_start_time.value)
    plot_stop_time = pendulum.parse(widget_plot_stop_time.value)

    constraints = {"time>=": plot_start_time, "time<=": plot_stop_time}
    dataset_id = widget_dsnames.value
    update_timeseries_plot(e,
        dataset=dataset_id, standard_name=widget_std_names.value, constraints=constraints
    )

The `widget_search_button_handler` function updates the map when the `Update Search` button is selected 

In [12]:
def widget_search_button_handler(change):
    
    min_time = pendulum.parse(widget_search_min_time.value)
    max_time = pendulum.parse(widget_search_max_time.value)

    standard_name = widget_std_names.value

    features, datasets = stdname2geojson(
        e,
        standard_name,
        server.get("cdm_data_type"),
        min_time,
        max_time,
        server.get("skip_datasets"),
    )

    feature_layer = ipyl.GeoJSON(data=features)
    constraints = {"time>=": min_time, "time<=": max_time}
    
    feature_layer.on_click(map_click_handler)
    map.layers = [map.layers[0], feature_layer]

    dataset_id = datasets[0]
    widget_dsnames.options = datasets
    widget_dsnames.value = dataset_id
    
    update_timeseries_plot(e,
        dataset=dataset_id, standard_name=standard_name, constraints=constraints
    )

This function updates the time series plot when the `Update Search` or the `Update TimeSeries` button is selected.

In [13]:
def update_timeseries_plot(e=None,
    dataset=None, standard_name=None, constraints=None, title_len=18
):
    df, var = get_timeseries(e,
        dataset=dataset, standard_name=standard_name, constraints=constraints
    )
    figure.marks[0].x = df.index
    figure.marks[0].y = df[var]
    figure.title = f"{dataset[:title_len]} - {var}"

This function returns the specified dataset time series values as a Pandas dataframe

In [14]:
def get_timeseries(e, dataset=None, standard_name=None, constraints=None):
    var = e.get_var_by_attr(
        dataset_id=dataset,
        standard_name=lambda v: str(v).lower() == standard_name.lower(),
    )
    if var:
        var = var[0]
    else:
        raise ValueError(f"Cannot get data for {standard_name}.")
        # We should filter out only valid standard_names for each dataset!
        # df = pd.read_csv(e.get_info_url(response="csv"))
        # df.loc[df["Attribute Name"] == "standard_name"]["Value"].values

    download_url = e.get_download_url(
        dataset_id=dataset,
        constraints=constraints,
        variables=["time", var],
        response="csv",
    )

    df = pd.read_csv(
        urlopen(download_url), index_col="time", parse_dates=True, skiprows=[1]
    )
    return df, var

In [15]:
# now = pendulum.now(tz="utc")

# servers = {
#     "ioos": {
#         "url": "http://erddap.sensors.ioos.us/erddap",
#         "standard_name": "sea_surface_wave_significant_height",
#         "nchar": 9,
#         "cdm_data_type": "TimeSeries",
#         "center": [35, -100],
#         "zoom": 3,
#         "max_time": pendulum.parse("2017-11-11T00:00:00Z"),
#         "min_time": pendulum.parse("2017-11-01T00:00:00Z"),
#         "skip_datasets": [],
#     },
#     "whoi": {
#         "url": "https://gamone.whoi.edu/erddap",
#         "standard_name": "sea_water_temperature",
#         "nchar": 9,
#         "cdm_data_type": "TimeSeries",
#         "center": [35, -100],
#         "zoom": 3,
#         "max_time": pendulum.parse("2011-05-15T00:00:00Z"),
#         "min_time": pendulum.parse("2011-05-05T00:00:00Z"),
#         "skip_datasets": [],
#     },
#     "ooi": {
#         "url": "https://erddap-uncabled.oceanobservatories.org/uncabled/erddap",
#         "standard_name": "sea_water_temperature",
#         "nchar": 8,
#         "cdm_data_type": "Point",
#         "center": [35, -100],
#         "zoom": 1,
#         "max_time": pendulum.parse("2017-08-03T00:00:00Z"),
#         "min_time": pendulum.parse("2017-08-01T00:00:00Z"),
#         "skip_datasets": [],
#     },
#     "neracoos": {
#         "url": "http://www.neracoos.org/erddap",
#         "standard_name": "significant_height_of_wind_and_swell_waves",
#         "nchar": 3,
#         "cdm_data_type": "TimeSeries",
#         "center": [42.5, -68],
#         "zoom": 6,
#         "max_time": now,
#         "min_time": now.subtract(weeks=2),
#         "skip_datasets": ["cwwcNDBCMet"],
#     },
# }

In [16]:
server_name = "neracoos"

This cell specifies the standard names to be skipped, such as quality control-related and time-invariant variables

In [17]:
def remove_qcstdnames(standard_names):
    qc = re.compile("^.*(qc)$|^.*(data_quality)$|^.*(flag)$")
    qc_stdnames = list(filter(qc.search, standard_names))
    del qc
    
    skip_stdnames = [
        "depth",
        "latitude",
        "longitude",
        "platform",
        "station_name",
        "time",
        "offset_time",
    ]
    skip_stdnames.extend(qc_stdnames)
    del qc_stdnames
    
    for skip_stdname in skip_stdnames:
        try:
            standard_names.remove(skip_stdname)
        except ValueError:
            continue
    del skip_stdname
    return standard_names

This cell identifies the valid standard names for the specified server

In [18]:
def get_valid_stdnames(server_name):
    server = servers[server_name]
    server_url = server.get("url")
    
    e = ERDDAP(server=server_url, protocol="tabledap")
    
    url_standard_names = f"{server_url}/categorize/standard_name/index.csv"
    df = pd.read_csv(urlopen(url_standard_names), skiprows=[1, 2])
    standard_names = list(df["Category"].values)

    standard_names = remove_qcstdnames(standard_names)

    valid_standard_names = []

    count = 0

    print(
        "Checking the variables available for this server. This might take up to a couple of minutes...\n"
    )

    for standard_name in standard_names:
    
        count += 1

        if count == np.floor(len(standard_names) / 2):
            print("Halfway there...\n")
        elif count == np.floor((len(standard_names) / 4) * 3):
            print("Almost done...\n")
        elif count == (len(standard_names)):
            print("Done!")

        try:

            features, datasets = stdname2geojson(
                e,
                standard_name,
                server.get("cdm_data_type"),
                server.get("min_time"),
                server.get("max_time"),
                server.get("skip_datasets"),
            )
        except NameError:
            continue

        try:

            var = e.get_var_by_attr(
                dataset_id=datasets[0],
                standard_name=lambda v: str(v).lower() == standard_name.lower(),
            )

            if var != []:
                valid_standard_names.append(standard_name)

        except IndexError:
            del features, datasets
            continue

    del count, standard_names, standard_name
    return valid_standard_names, server, e

In [19]:
valid_standard_names, server, e = get_valid_stdnames(server_name)

Checking the variables available for this server. This might take up to a couple of minutes...

Halfway there...

Almost done...

Done!


Create a dropdown menu widget with all the valid `standard_name` values found

In [20]:
def f_widget_std_names(server,valid_standard_names):
    widget_std_names = ipyw.Dropdown(
        options=valid_standard_names, 
        value=server.get("standard_name")
        )   
    return widget_std_names

Create a text widget to enter the search minimum time and maximum time for the datasets search

In [21]:
def f_widget_search_min_time(server):
    widget_search_min_time = ipyw.Text(
        value=server.get("min_time").to_datetime_string(),
        description="Search Min",
        disabled=False,
        )
    return widget_search_min_time

In [22]:
def f_widget_search_max_time(server):
    widget_search_max_time = ipyw.Text(
        value=server.get("max_time").to_datetime_string(),
        description="Search Max",
        disabled=False,
        )
    return widget_search_max_time

Create the `Update Search` button

In [23]:
widget_search_button = ipyw.Button(
    value=False, description="Update Search", disabled=False, button_style=""
)

Create a text widget to enter the search minimum and maximum time for the time series plot

In [24]:
def f_widget_plot_start_time(server):
    widget_plot_start_time = ipyw.Text(
        value=server.get("min_time").to_datetime_string(),
        description="Plot Min",
        disabled=False,
        )
    return widget_plot_start_time

In [25]:
def f_widget_plot_stop_time(server):
    widget_plot_stop_time = ipyw.Text(
        value=server.get("max_time").to_datetime_string(),
        description="Plot Max",
        disabled=False,
        )
    return widget_plot_stop_time

Create the `Update TimeSeries` button 

In [26]:
widget_replot_button = ipyw.Button(
    value=False, description="Update TimeSeries", disabled=False, button_style=""
)

In [27]:
widget_replot_button.on_click(widget_replot_button_handler)

In [28]:
widget_search_button.on_click(widget_search_button_handler)

This defines the initial `ipyleaflet` map 

In [29]:
def plot_datasets(server, e):
    map = ipyl.Map(
        center=server.get("center"),
        zoom=server.get("zoom"),
        layout=dict(width="750px", height="350px"),
    )
    features, datasets = stdname2geojson(
        e,
        server.get("standard_name"),
        server.get("cdm_data_type"),
        server.get("min_time"),
        server.get("max_time"),
        server.get("skip_datasets"),
    )
    dataset_id = datasets[0]
    feature_layer = ipyl.GeoJSON(data=features)
    feature_layer.on_click(map_click_handler)
    map.layers = [map.layers[0], feature_layer]
    return map, feature_layer, datasets 

In [30]:
def f_widget_dsnames(datasets):
    dataset_id = datasets[0]
    widget_dsnames = ipyw.Dropdown(options=datasets, value=dataset_id)
    return widget_dsnames

This defines the initial `bqplot` time series plot

In [31]:
def plot_timeseries(server, e, dataset_id):
    dt_x = bq.DateScale()
    sc_y = bq.LinearScale()

    constraints = {"time>=": server.get("min_time"), "time<=": server.get("max_time")}

    df, var = get_timeseries(e,
        dataset=dataset_id,
        standard_name=server.get("standard_name"),
        constraints=constraints,
    )
    def_tt = bq.Tooltip(fields=["y"], formats=[".2f"], labels=["value"])
    time_series = bq.Lines(
        x=df.index, y=df[var], scales={"x": dt_x, "y": sc_y}, tooltip=def_tt
    )
    ax_x = bq.Axis(scale=dt_x, label="Time")
    ax_y = bq.Axis(scale=sc_y, orientation="vertical")
    figure = bq.Figure(marks=[time_series], axes=[ax_x, ax_y])
    figure.title = f"{dataset_id[:18]} - {var}"
    figure.layout.height = "300px"
    figure.layout.width = "800px"
    return figure

In [32]:
def space():
    ispace = ipyw.HTML(
        value='<style>  .space {margin-bottom: 6.5cm;}</style><p class="space"> </p>',
        placeholder="",
        description="",
    )
    return ispace

In [33]:
map, feature_layer, datasets = plot_datasets(server, e)
figure = plot_timeseries(server, e, datasets[0])

In [34]:
widget_std_names = f_widget_std_names(server, valid_standard_names)
widget_search_min_time = f_widget_search_min_time(server)
widget_search_max_time = f_widget_search_max_time(server)
widget_plot_start_time = f_widget_plot_start_time(server)
widget_plot_stop_time = f_widget_plot_stop_time(server)
widget_dsnames = f_widget_dsnames(datasets)

In [35]:
ispace = space()

This specifies the widget layout

In [36]:
form_item_layout = ipyw.Layout(
    display="flex", flex_flow="column", justify_content="space-between"
)

col1 = ipyw.Box([map, figure], layout=form_item_layout)
col2 = ipyw.Box(
    [
        widget_std_names,
        widget_search_min_time,
        widget_search_max_time,
        widget_search_button,
        ispace,
        widget_dsnames,
        widget_plot_start_time,
        widget_plot_stop_time,
        widget_replot_button,
    ],
    layout=form_item_layout,
)

form_items = [col1, col2]

form = ipyw.Box(
    form_items,
    layout=ipyw.Layout(
        display="flex",
        flex_flow="row",
        border="solid 2px",
        align_items="flex-start",
        width="100%",
    ),
)

form

Box(children=(Box(children=(Map(center=[42.5, -68], controls=(ZoomControl(options=['position', 'zoom_in_text',…

In [None]:
whos