# Goal

The purpose of this notebook is to provide an example use case of running the ioos/ioos_qc qartod library on a netCDF file. The example netCDF dataset is a pCO2 sensor from the Ocean Observatories Initiative (OOI) Coastal Endurance Inshore Surface Mooring instrument frame at 7 meters depth located on the Oregon Shelf break.

In [None]:
# Install QC library
#!pip install git+git://github.com/ioos/ioos_qc.git #(only run if the ioos_qc library not installed)
from ioos_qc.config import NcQcConfig
from ioos_qc import qartod

In [None]:
# Other imports
import pandas as pd
import numpy as np
import xarray as xr
from datetime import datetime

from bokeh.layouts import gridplot
from bokeh.plotting import figure, show, output_file, output_notebook
output_notebook()

## Check the netCDF dataset

In [None]:
pco2 = xr.open_dataset("pco2_netcdf_example.nc")

In [None]:
for dim in pco2.dims:
    print(dim)

In [None]:
for var in pco2.variables:
    print(var)

## Setup & Run a single QC test

In [None]:
# Create the config object
config = {
    'pco2_seawater': {
        'qartod': {
            'gross_range_test': {
                'suspect_span': [200, 600],
                'fail_span': [0, 1200]
            }
        }
    }
}

qc = NcQcConfig(config)

In [None]:
# To run the qc on a netCDF file, call the path to the file, not the load netCDF dataset
qc_gross_range = qc.run("pco2_netcdf_example.nc")

In [None]:
# Check that the output is an OrderedDict and ran correctly
print(qc_gross_range)

In [None]:
pco2.variables['time']

In [None]:
# Method to plot QC results using Bokeh
def plot_ncresults(ncdata, var_name, results, title, test_name):

    time = np.array(ncdata.variables['time'])
    obs = np.array(ncdata.variables[var_name])
    qc_test = results[var_name]['qartod'][test_name]

    qc_pass = np.ma.masked_where(qc_test != 1, obs)
    qc_suspect = np.ma.masked_where(qc_test != 3, obs)
    qc_fail = np.ma.masked_where(qc_test != 4, obs)
    qc_notrun = np.ma.masked_where(qc_test != 2, obs)

    p1 = figure(x_axis_type="datetime", title=test_name + ' : ' + title)
    p1.grid.grid_line_alpha=0.3
    p1.xaxis.axis_label = 'Time'
    p1.yaxis.axis_label = 'Observation Value'

    p1.line(time, obs,  legend='obs', color='#A6CEE3')
    p1.circle(time, qc_notrun, size=2, legend='qc not run', color='gray', alpha=0.2)
    p1.circle(time, qc_pass, size=4, legend='qc pass', color='green', alpha=0.5)
    p1.circle(time, qc_suspect, size=4, legend='qc suspect', color='orange', alpha=0.7)
    p1.circle(time, qc_fail, size=6, legend='qc fail', color='red', alpha=1.0)

    #output_file("qc.html", title="qc example")

    show(gridplot([[p1]], plot_width=800, plot_height=400))

In [None]:
# Just a quick note: OOI time data is in seconds since 1900-01-01, so that requires some further 
plot_ncresults(pco2, 'pco2_seawater', qc_gross_range, 'pCO2 seawater', 'gross_range_test')

## Multiple tests 

When utilizing the NcQcConfig object with tests which require an ancillary variable, such as lat/lon for the location test or time for the rate_of_change_test, the ancillary variables must be pulled out of the netCDF file and passed back into the qc.run method as kwargs.

In [None]:
nclat = np.array(pco2.variables['lat'])
nclon = np.array(pco2.variables['lon'])
nctime = np.array(pco2.variables['time'])
ncobs = np.array(pco2.variables['pco2_seawater'])

In [None]:
nctime

In [None]:
# Create the config object
config = {
    'pco2_seawater': {
        'qartod': {
            'gross_range_test': {
                'suspect_span': [200, 600],
                'fail_span': [0, 1200]
            },
            'location_test': {
                'bbox': [-124.5, 44, -123.5, 45]
            },
            'spike_test': {
                'suspect_threshold': 10,
                'fail_threshold': 100                
            },
            'flat_line_test': {
                'tolerance': 1,
                'suspect_threshold': 3600*1e3*1e9,
                'fail_threshold': 86400*1e3*1e9
            },
            
        }
    }
}

qc = NcQcConfig(config)

In [None]:
qc_results = qc.run("pco2_netcdf_example.nc",pco2_seawater={'lat':nclat,'lon':nclon,'tinp':nctime})

In [None]:
qc_results

In [None]:
plot_ncresults(pco2, 'pco2_seawater', qc_results, 'pCO2 seawater', 'flat_line_test')

In [None]:
plot_ncresults(pco2, 'pco2_seawater', qc_results, 'pCO2 seawater', 'spike_test')

In [None]:
plot_ncresults(pco2, 'pco2_seawater', qc_results, 'pCO2 seawater', 'gross_range_test')

In [None]:
plot_ncresults(pco2, 'pco2_seawater', qc_results, 'pCO2 seawater', 'location_test')

Currently, the aggregate (roll-up) flag is not implemented for netCDF files.<br>
After the tests have been run, results of the qc tests may be saved to the 