# Execute Multiple Tests on Water Level Data

This Notebook loads a water level dataset and then uses the Configuration and Stream concepts from QARTOD v2 to run a series of tests.

The steps:

* Create a Bokeh-based function that can plot the results from any test
* Create a configuration string and convert that into a Config object
* Read data from a CSV file into a Pandas dataframe
* Make a Stream from that dataframe
* Run tests on the Stream using the Config and put the results in a list
* Plot all the results with the original data


In [None]:
from __future__ import annotations

import numpy as np
import pandas as pd
from bokeh.layouts import gridplot
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, output_notebook, show

from ioos_qc.config import Config
from ioos_qc.results import collect_results
from ioos_qc.streams import PandasStream

output_notebook()

In [None]:
def plot_results(data, var_name, collected_results, title, test_name):
    """Plot original data together with quality flags"""
    # create a ColumnDataSource by passing the dataframe of original data
    source = ColumnDataSource(data=data)

    # add flags to the data structure Bokeh uses
    context_result = next(r for r in collected_results if r.stream_id == var_name and r.test == test_name)
    qc_test = context_result.results
    source.data["qc_pass"] = np.ma.masked_where(qc_test != 1, data[var_name])
    source.data["qc_suspect"] = np.ma.masked_where(qc_test != 3, data[var_name])
    source.data["qc_fail"] = np.ma.masked_where(qc_test != 4, data[var_name])
    source.data["qc_notrun"] = np.ma.masked_where(qc_test != 2, data[var_name])

    # set-up the figure
    p1 = figure(x_axis_type="datetime", title=test_name + " : " + title)
    p1.grid.grid_line_alpha = 0.3
    p1.xaxis.axis_label = "Time"
    p1.yaxis.axis_label = "Observation Value"

    p1.line(x="time", y=var_name, source=source, legend_label="obs", color="#A6CEE3")
    p1.scatter(
        x="time",
        y="qc_notrun",
        source=source,
        size=2,
        color="gray",
        alpha=0.2,
        legend_label="qc not run",
    )
    p1.scatter(
        x="time",
        y="qc_pass",
        source=source,
        size=4,
        color="green",
        alpha=0.5,
        legend_label="qc pass",
    )
    p1.scatter(
        x="time",
        y="qc_suspect",
        source=source,
        size=4,
        color="orange",
        alpha=0.7,
        legend_label="qc suspect",
    )
    p1.scatter(
        x="time",
        y="qc_fail",
        source=source,
        size=6,
        color="red",
        alpha=1.0,
        legend_label="qc fail",
    )

    show(gridplot([[p1]], width=800, height=400))

# Specify data and QC configuration

In [None]:
# QC configuration
# This configuration is used to conntect the variable with the corresponding test
# method(s) in the ioos_qc library. See documentation for description of each test and its inputs:
#   https://ioos.github.io/ioos_qc/api/ioos_qc.html#module-ioos_qc.qartod

# This string is in YAML but it could have been JSON too.
config_string = """
contexts:
    - streams:
        sea_surface_height_above_sea_level:
            qartod:
                gross_range_test:
                    fail_span: [-10, 10]
                    suspect_span: [-2, 3]
                flat_line_test:
                    tolerance: 0.001
                    suspect_threshold: 10800
                    fail_threshold: 21600                        
                rate_of_change_test:
                    threshold: 0.001
                spike_test:
                    suspect_threshold: 0.8
                    fail_threshold: 3
"""

# Create a high level and flexible configuration object
context_configs = Config(config_string)

# Load data, run tests and plot results

In [None]:
import pandas as pd

url = "https://github.com/ioos/ioos_qc/raw/master/docs/source/examples"
fname = f"{url}/water_level_example.csv"
variable_name = "sea_surface_height_above_sea_level"

data = pd.read_csv(fname, parse_dates=["time"])
data.head()

In [None]:
# Convert the data to a Stream (Pandas dataframe to a PandasStream)
pandas_stream = PandasStream(data)

# Pass the run method the config to use
results = pandas_stream.run(context_configs)

# Then collect all the results into a single list
results_list = collect_results(results, how="list")

In [None]:
title = "Water Level [MHHW] [m] : Kotzebue, AK"

plot_results(data, variable_name, results_list, title, "gross_range_test")

In [None]:
plot_results(data, variable_name, results_list, title, "flat_line_test")

In [None]:
plot_results(data, variable_name, results_list, title, "rate_of_change_test")

In [None]:
plot_results(data, variable_name, results_list, title, "spike_test")