# Explore ERDDAP timeseries data using Jupyter Widgets
Inspired by [Jason Grout's excellent ESIP Tech Dive talk on "Jupyter Widgets"](https://youtu.be/CVcrTRQkTxo?t=2596), this notebook uses the `ipyleaflet` and `bqplot` widgets
to interactively explore the last two weeks of time series data from an ERDDAP Server. Select a `standard_name` from the list, then click a station to see the time series.  

NOTE: To access a protected ERDDAP endpoint is protected, you can add a `~/.netrc` file like this:
```
machine cgoms.coas.oregonstate.edu
login <username>
password <password>
```

In [1]:
import numpy as np
import pandas as pd

In [2]:
import pendulum

`ipyleaflet` and `bqplot` are both Jupyter widgets, so can interact with Python like any other widget.  Since we want to click on a map in a notebook and get an interactive time series plot, they are perfect tools to use here. 

In [3]:
import bqplot as bq
import ipyleaflet as ipyl
import ipywidgets as ipyw

To make working with ERDDAP simpler, we use `erddapy`, a high-level python interface to ERDDAP's RESTful API

In [4]:
from erddapy import ERDDAP
from erddapy.url_handling import urlopen

This code should work with minor modifications on any ERDDAP (v1.64+) endpoint that has `cdm_data_type=timeseries` or `cdm_data_type=point` datasets.  Change the values for other ERDDAP endpoints or regions of interest

This function puts lon,lat and datasetID into a GeoJSON feature

In [5]:
def point(dataset, lon, lat, nchar):
    geojsonFeature = {
        "type": "Feature",
        "properties": {"datasetID": dataset, "short_dataset_name": dataset[:nchar]},
        "geometry": {"type": "Point", "coordinates": [lon, lat]},
    }
    geojsonFeature["properties"]["style"] = {"color": "Grey"}
    return geojsonFeature

This function finds all the datasets with a given standard_name in the specified time period, and return GeoJSON

In [6]:
def adv_search(e, standard_name, cdm_data_type, min_time, max_time):
    try:
        search_url = e.get_search_url(
            response="csv",
            cdm_data_type=cdm_data_type.lower(),
            items_per_page=100000,
            standard_name=standard_name,
            min_time=min_time,
            max_time=max_time,
        )
        df = pd.read_csv(urlopen(search_url))
    except:
        df = []
        if len(var) > 14:
            v = "{}...".format(standard_name[:15])
        else:
            v = standard_name
        figure.title = "No {} found in this time range. Pick another variable.".format(
            v
        )
        figure.marks[0].y = 0.0 * figure.marks[0].y
    return df

This function returns the lon,lat values from allDatasets

In [7]:
def alllonlat(e, cdm_data_type, min_time, max_time):
    url_dataset = "{}/tabledap/allDatasets.csv?datasetID%2CminLongitude%2CminLatitude&cdm_data_type=%22{}%22&minTime%3C={}&maxTime%3E={}".format(
        e.server,
        cdm_data_type,
        max_time.to_datetime_string(),
        min_time.to_datetime_string(),
    )
    df = pd.read_csv(urlopen(url_dataset), skiprows=[1])
    return df

In [8]:
def stdname2geojson(e, standard_name, cdm_data_type, min_time, max_time):
    """return geojson containing lon, lat and datasetID for all matching stations"""

    dfa = adv_search(e, standard_name, cdm_data_type, min_time, max_time)
    if isinstance(dfa, pd.DataFrame):
        datasets = dfa["Dataset ID"].values

        dfll = alllonlat(e, cdm_data_type, min_time, max_time)
        dfr = dfll[dfll["datasetID"].isin(dfa["Dataset ID"])]
        geojson = {
            "features": [point(row[1], row[2], row[3], 3) for row in dfr.itertuples()]
        }
    else:
        geojson = {"features": []}
        datasets = []
    return geojson, datasets

The `map_click_handler` function updates the time series plot when a station marker is clicked

In [9]:
def map_click_handler(event=None, id=None, properties=None, feature=None):
    global dataset_id, standard_name
    dataset_id = properties["datasetID"]

    standard_name = widget_std_names.value
    widget_dsnames.value = dataset_id
    try:
        update_timeseries_plot(
            dataset=dataset_id, standard_name=standard_name, constraints=constraints
        )
    except:
        print(
            "No", standard_name, "data for this station. Please choose another station."
        )

The `search_button_handler` function updates the map when the `Search` button is selected 

In [10]:
def widget_replot_button_handler(change):
    global dataset_id, constraints
    plot_start_time = pendulum.parse(widget_plot_start_time.value)
    plot_stop_time = pendulum.parse(widget_plot_stop_time.value)

    constraints = {"time>=": plot_start_time, "time<=": plot_stop_time}
    dataset_id = widget_dsnames.value
    update_timeseries_plot(
        dataset=dataset_id, standard_name=standard_name, constraints=constraints
    )

In [11]:
def widget_search_button_handler(change):
    global features, datasets, standard_name, dataset_id, constraints
    min_time = pendulum.parse(widget_search_min_time.value)
    max_time = pendulum.parse(widget_search_max_time.value)

    standard_name = widget_std_names.value

    features, datasets = stdname2geojson(
        e,
        standard_name,
        server.get("cdm_data_type"),
        min_time,
        max_time,
    )

    feature_layer = ipyl.GeoJSON(data=features)
    feature_layer.on_click(map_click_handler)
    map.layers = [map.layers[0], feature_layer]

    dataset_id = datasets[0]
    widget_dsnames.options = datasets
    widget_dsnames.value = dataset_id

    constraints = {"time>=": min_time, "time<=": max_time}
    update_timeseries_plot(
        dataset=dataset_id, standard_name=standard_name, constraints=constraints
    )

In [12]:
def update_timeseries_plot(
    dataset=None, standard_name=None, constraints=None, title_len=18
):
    df, var = get_data(
        dataset=dataset, standard_name=standard_name, constraints=constraints
    )
    figure.marks[0].x = df.index
    figure.marks[0].y = df[var]
    figure.title = "{} - {}".format(dataset[:title_len], var)

This function returns the specified dataset time series values as a Pandas dataframe

In [13]:
def get_data(dataset=None, standard_name=None, constraints=None):
    var = e.get_var_by_attr(
        dataset_id=dataset,
        standard_name=lambda v: str(v).lower() == standard_name.lower(),
    )
    if var:
        var = var[0]
    else:
        raise ValueError(f"Cannot get data for {standard_name}.")
        # We should filter out only valid standard_names for each dataset!
        # df = pd.read_csv(e.get_info_url(response="csv"))
        # df.loc[df["Attribute Name"] == "standard_name"]["Value"].values

    download_url = e.get_download_url(
        dataset_id=dataset,
        constraints=constraints,
        variables=["time", var],
        response="csv",
    )

    df = pd.read_csv(
        urlopen(download_url), index_col="time", parse_dates=True, skiprows=[1]
    )
    return df, var

In [14]:
now = pendulum.now(tz="utc")

servers = {
    "ioos": {
        "url": "http://erddap.sensors.ioos.us/erddap",
        "standard_name": "sea_surface_wave_significant_height",
        "nchar": 9,
        "cdm_data_type": "TimeSeries",
        "center": [35, -100],
        "zoom": 3,
        "max_time": pendulum.parse("2017-11-11T00:00:00Z"),
        "min_time": pendulum.parse("2017-11-01T00:00:00Z"),
    },
    "whoi": {
        "url": "https://gamone.whoi.edu/erddap",
        "standard_name": "sea_water_temperature",
        "nchar": 9,
        "cdm_data_type": "TimeSeries",
        "center": [35, -100],
        "zoom": 3,
        "max_time": pendulum.parse("2011-05-15T00:00:00Z"),
        "min_time": pendulum.parse("2011-05-05T00:00:00Z"),
    },
    "ooi": {
        "url": "https://erddap-uncabled.oceanobservatories.org/uncabled/erddap",
        "standard_name": "sea_water_temperature",
        "nchar": 8,
        "cdm_data_type": "Point",
        "center": [35, -100],
        "zoom": 1,
        "max_time": pendulum.parse("2017-08-03T00:00:00Z"),
        "min_time": pendulum.parse("2017-08-01T00:00:00Z"),
    },
    "neracoos": {
        "url": "http://www.neracoos.org/erddap",
        "standard_name": "significant_height_of_wind_and_swell_waves",
        "nchar": 3,
        "cdm_data_type": "TimeSeries",
        "center": [42.5, -68],
        "zoom": 6,
        "max_time": now,
        "min_time": now.subtract(weeks=2),
    },
}

In [15]:
server_name = "ooi"

In [16]:
server = servers[server_name]
server_url = server.get("url")

In [17]:
e = ERDDAP(server=server_url, protocol="tabledap")

Find all the `standard_name` attributes that exist on this ERDDAP endpoint, using [ERDDAP's "categorize" service](http://www.neracoos.org/erddap/categorize/index.html)

In [18]:
url_standard_names = f"{server_url}/categorize/standard_name/index.csv"
df = pd.read_csv(urlopen(url_standard_names), skiprows=[1, 2])
standard_names = df["Category"].values

This cell identifies the valid standard names for the specified server

In [19]:
valid_standard_names = []

count = 0

print(
    "Checking the variables available for this server. This might take up to a couple of minutes...\n"
)

for standard_name in standard_names:

    count += 1

    if count == np.floor(len(standard_names) / 2):
        print("Halfway there...\n")
    elif count == np.floor((len(standard_names) / 4) * 3):
        print("Almost done...\n")
    elif count == (len(standard_names)):
        print("Done!")

    try:

        features, datasets = stdname2geojson(
            e,
            standard_name,
            server.get("cdm_data_type"),
            server.get("min_time"),
            server.get("max_time"),
        )

        var = e.get_var_by_attr(
            dataset_id=datasets[0],
            standard_name=lambda v: str(v).lower() == standard_name.lower(),
        )

        if var != []:
            valid_standard_names.append(standard_name)

        del var, features, datasets

    except NameError:
        continue
del count, standard_names, standard_name

Checking the variables available for this server. This might take up to a couple of minutes...

Halfway there...

Almost done...

Done!


Create a dropdown menu widget with all the `standard_name` values found

In [20]:
widget_std_names = ipyw.Dropdown(
    options=valid_standard_names, value=server.get("standard_name")
)

Create a text widget to enter the search minimum time

In [21]:
widget_search_min_time = ipyw.Text(
    value=server.get("min_time").to_datetime_string(),
    description="Search Min",
    disabled=False,
)

In [22]:
widget_search_max_time = ipyw.Text(
    value=server.get("max_time").to_datetime_string(),
    description="Search Max",
    disabled=False,
)

In [23]:
widget_search_button = ipyw.Button(
    value=False, description="Update search", disabled=False, button_style=""
)

In [24]:
widget_plot_start_time = ipyw.Text(
    value=server.get("min_time").to_datetime_string(),
    description="Plot Min",
    disabled=False,
)

In [25]:
widget_plot_stop_time = ipyw.Text(
    value=server.get("max_time").to_datetime_string(),
    description="Plot Max",
    disabled=False,
)

In [26]:
widget_replot_button = ipyw.Button(
    value=False, description="Update TimeSeries", disabled=False, button_style=""
)

In [27]:
widget_replot_button.on_click(widget_replot_button_handler)

In [28]:
widget_search_button.on_click(widget_search_button_handler)

This defines the initial `ipyleaflet` map 

In [29]:
map = ipyl.Map(
    center=server.get("center"),
    zoom=server.get("zoom"),
    layout=dict(width="750px", height="350px"),
)
features, datasets = stdname2geojson(
    e,
    server.get("standard_name"),
    server.get("cdm_data_type"),
    server.get("min_time"),
    server.get("max_time"),
)
dataset_id = datasets[0]
feature_layer = ipyl.GeoJSON(data=features)
feature_layer.on_click(map_click_handler)
map.layers = [map.layers[0], feature_layer]

In [30]:
widget_dsnames = ipyw.Dropdown(options=datasets, value=dataset_id)

This defines the initial `bqplot` time series plot

In [31]:
dt_x = bq.DateScale()
sc_y = bq.LinearScale()

constraints = {"time>=": server.get("min_time"), "time<=": server.get("max_time")}

df, var = get_data(
    dataset=dataset_id,
    standard_name=server.get("standard_name"),
    constraints=constraints,
)
def_tt = bq.Tooltip(fields=["y"], formats=[".2f"], labels=["value"])
time_series = bq.Lines(
    x=df.index, y=df[var], scales={"x": dt_x, "y": sc_y}, tooltip=def_tt
)
ax_x = bq.Axis(scale=dt_x, label="Time")
ax_y = bq.Axis(scale=sc_y, orientation="vertical")
figure = bq.Figure(marks=[time_series], axes=[ax_x, ax_y])
figure.title = "{} - {}".format(dataset_id[:18], var)
figure.layout.height = "300px"
figure.layout.width = "800px"

In [32]:
ispace = ipyw.HTML(
    value='<style>  .space {margin-bottom: 6.5cm;}</style><p class="space"> </p>',
    placeholder="",
    description="",
)

This specifies the widget layout

In [33]:
form_item_layout = ipyw.Layout(
    display="flex", flex_flow="column", justify_content="space-between"
)

col1 = ipyw.Box([map, figure], layout=form_item_layout)
col2 = ipyw.Box(
    [
        widget_std_names,
        widget_search_min_time,
        widget_search_max_time,
        widget_search_button,
        ispace,
        widget_dsnames,
        widget_plot_start_time,
        widget_plot_stop_time,
        widget_replot_button,
    ],
    layout=form_item_layout,
)

form_items = [col1, col2]

form = ipyw.Box(
    form_items,
    layout=ipyw.Layout(
        display="flex",
        flex_flow="row",
        border="solid 2px",
        align_items="flex-start",
        width="100%",
    ),
)

form

Box(children=(Box(children=(Map(center=[35, -100], controls=(ZoomControl(options=['position', 'zoom_in_text', …

HTTPError: <!DOCTYPE html><html><head><title>Apache Tomcat/8.0.33 - Error report</title><style type="text/css">H1 {font-family:Tahoma,Arial,sans-serif;color:white;background-color:#525D76;font-size:22px;} H2 {font-family:Tahoma,Arial,sans-serif;color:white;background-color:#525D76;font-size:16px;} H3 {font-family:Tahoma,Arial,sans-serif;color:white;background-color:#525D76;font-size:14px;} BODY {font-family:Tahoma,Arial,sans-serif;color:black;background-color:white;} B {font-family:Tahoma,Arial,sans-serif;color:white;background-color:#525D76;} P {font-family:Tahoma,Arial,sans-serif;background:white;color:black;font-size:12px;}A {color : black;}A.name {color : black;}.line {height: 1px; background-color: #525D76; border: none;}</style> </head><body><h1>HTTP Status 500 - FileNotFoundException: https://erddap-be-uncabled.oceanobservatories.org/uncabled/erddap/tabledap/CP01CNSM-SBD12-08-FDCHPA000-fdchp_a_dcl_instrument-telemetered-deployment0007-tabledap.nccsv?time,vw_momentum_flux&amp;time%3E=1501545600.0&amp;time%3C=1501718400.0</h1><div class="line"></div><p><b>type</b> Status report</p><p><b>message</b> <u>FileNotFoundException: https://erddap-be-uncabled.oceanobservatories.org/uncabled/erddap/tabledap/CP01CNSM-SBD12-08-FDCHPA000-fdchp_a_dcl_instrument-telemetered-deployment0007-tabledap.nccsv?time,vw_momentum_flux&amp;time%3E=1501545600.0&amp;time%3C=1501718400.0</u></p><p><b>description</b> <u>The server encountered an internal error that prevented it from fulfilling this request.</u></p><hr class="line"><h3>Apache Tomcat/8.0.33</h3></body></html>