# Automated Quality Coding To NEMS

Press Restart and Run All Cells to start the worksheet.

In [15]:
##buttons to reset the notebook
from IPython.display import HTML, Javascript, display, Markdown


def initialize():
    display(HTML(
        '''
            <!-- Style part stops output window scrolling -->
            <style>
                .jupyter-widgets-output-area .output_scroll {
                height: unset !important;
                border-radius: unset !important;
                -webkit-box-shadow: unset !important;
                box-shadow: unset !important;
                }
                .jupyter-widgets-output-area  {
                height: auto !important;
                }
            </style>
            
            <!-- Script to load buttons to restart and toggle code -->
            <script>
                code_show = true;
                function restart_run_all(){
                    IPython.notebook.kernel.restart();
                    setTimeout(function(){
                        IPython.notebook.execute_all_cells();
                    }, 10000)
                }
                function code_toggle() {
                    if (code_show) {
                        $('div.input').hide(200);
                    } else {
                        $('div.input').show(200);
                    }
                    code_show = !code_show
                }
                code_toggle()
            </script>
            <button onclick="code_toggle()">Code</button>
            <button onclick="restart_run_all()">Restart and Run all Cells</button>
        '''
    ))
initialize()


from IPython.lib.deepreload import reload
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [16]:
#plots
from bokeh.layouts import gridplot
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.transform import factor_cmap
output_notebook(hide_banner=True)

#interactive sessions
from ipywidgets import interact, interactive, fixed, interact_manual, Layout
import ipywidgets as widgets

import pandas as pd
import numpy as np

## Background

Automated processing of continuous data.

## Options

Select the dataset and options. [More Info](Information.ipynb)

The [Analysis Notebook](TSDataExplorer.ipynb) can be used to help identify appropriate values for some of the parameters.

The NEMS parameters are set based on the standard.  Select the relevant standard and the parameters are loaded in the background.

A summary of the results of the tests done on the data will be shown in the graphs below.


In [17]:

print('re-run this cell to reload')

#this is the module where all the heavy lifting is done.
import nemsQc as nq
import prefillINotebook as pf

print("Please note that data is pulled pulled out live from the server and be patient.")

display(Markdown('### Datasource'))
                 
display(Markdown('Enter the Server, File, Site and Measurement that you want to assess.'))
display(Markdown('Set the timerange to be assessed, usually the start date will be the date of the last result in the archive, and the end date will be the date of the latest check reading, or sensor change over.'))

#Basic selector at site level
interact(pf.siteSelector, 
         Server = pf.serverOptions, 
         File = pf.fileOptions, 
         StartDate = pf.sDate,
         EndDate = pf.eDate,
         Site = pf.siteOptions,
         Measurement = pf.measurementOptions
         )

display(Markdown('### Check Data'))
                 
display(Markdown('Enter the Server, File, Site and Measurement that provides the check data for the data to be assessed.'))
#display(Markdown('Set the timerange to be assessed, usually the start date will be the date of the last result in the archive, and the end date will be the date of the latest check reading, or sensor change over.'))

#Basic selector at site level
interact(pf.checkSelector, 
         Server = pf.checkServerOptions, 
         File = pf.checkFileOptions, 
         #StartDate = pf.sDate,
         #EndDate = pf.eDate,
         Site = pf.checkSiteOptions,
         Measurement = pf.checkMeasurementOptions);


re-run this cell to reload
Please note that data is pulled pulled out live from the server and be patient.


### Datasource

Enter the Server, File, Site and Measurement that you want to assess.

Set the timerange to be assessed, usually the start date will be the date of the last result in the archive, and the end date will be the date of the latest check reading, or sensor change over.

interactive(children=(Text(value='https://data.hbrc.govt.nz/EnviroData/', description='Server'), Text(value='T…

### Check Data

Enter the Server, File, Site and Measurement that provides the check data for the data to be assessed.

interactive(children=(Text(value='https://data.hbrc.govt.nz/EnviroData/', description='Server'), Text(value='E…

In [18]:
display(Markdown('### Autocoder Options'))
#Display status
#interact(pf.statusFunction, Status = pf.optStatus)
display(pf.optStatus)

display(Markdown('*NEMS*'))
display(Markdown('Select the relevant NEMS standard (and site type where relevant).'))
#display(Markdown('resolution - Number of decimal places that the results should have.'))
#display(Markdown('timeGap - The maximum number of minutes allowed between readings.'))
#display(Markdown('accuracyThreshold - The amount of deviation allowed between readings and check readings.'))
#display(Markdown('accuracyBandwidth - ??'))
#display(Markdown('**Accuracy tests not yet implemented**'))

#select module for the configuration
nems_p = interactive(nq.configParams_N,
             #resolution = widgets.IntText(value=1,style=pf.style),
             #timeGap = widgets.IntText(value=15,style=pf.style),
             #accuracyThreshold = widgets.FloatText(value=0.8,style=pf.style),
             #accuracyBandwidth = widgets.IntText(value=5,style=pf.style))
             nemsStd = pf.nemsStd)
             #resolution = pf.resolution,
             #timeGap = pf.timeGap,
             #accuracyThreshold = pf.accuracyThreshold,
             #accuracyBandwidth = pf.accuracyBandwidth)
display(nems_p)
                
display(Markdown('*General*'))
display(Markdown('grossRangeFailBelow - The minimum value that the sensor can record.'))
display(Markdown('grossRangeFailAbove - Tha maximum value that the sensor can record.'))
display(Markdown('grossRangeSuspectBelow - The lowest value expected for the site and measurement.'))
display(Markdown('grossRangeSuspectAbove - The highest value expected for the site and measurement.'))
display(Markdown('flatLineTolerance - The tolerance value that should be exceeded by a reading within the time threshold window.'))
display(Markdown('flatLineSuspectThreshold - The number of seconds to assess the flatline test over before flagging as suspect.'))
display(Markdown('flatLineFailThreshold - The number of seconds to assess the flatline test over before flagging as fail.'))
display(Markdown('rateOfChangeThreshold - The maximum allowed rate of change of the observational unit per second.'))
display(Markdown('spikeSuspectThreshold - The suspect threshold for spike detection (in observational units).'))
display(Markdown('spikeFailThreshold - The fail threshold for spike detection (in observational units).'))

qc_config = interactive(nq.configParams_Q,
             grossRangeFailBelow = pf.grfbSlot,
             grossRangeFailAbove = pf.grfaSlot,
             grossRangeSuspectBelow = pf.grsbSlot,
             grossRangeSuspectAbove = pf.grsaSlot,
             flatLineTolerance = pf.fltSlot,
             #flatLineSuspectThreshold = widgets.IntText(value=10800,style=pf.style),
             #flatLineFailThreshold = widgets.IntText(value=21600,style=pf.style),
             flatLineSuspectThreshold = pf.flatLineSuspectThreshold,
             flatLineFailThreshold = pf.flatLineFailThreshold,
             rateOfChangeThreshold = pf.rocSlot,
             #spikeSuspectThreshold = widgets.FloatText(value=0.33,style=pf.style),
             #spikeFailThreshold = widgets.FloatText(value=1,style=pf.style))
             spikeSuspectThreshold = pf.spikeSuspectThreshold,
             spikeFailThreshold = pf.spikeFailThreshold)
display(qc_config)

display(Markdown('*Processing*'))
display(Markdown('Interpolate - Whether gaps should be interpolated if there was data gathered originally.'))
display(Markdown('GapThreshold - The minimum number of seconds of missing data that will cause a gap marker to be inserted.'))
display(Markdown('InterpolationAllowance - The percentage leeway that will be used to determine whether to use an interpolated value, or measured..'))

display(pf.interpolationFlag)
interact(pf.processingSelector, 
         #Interpolate = pf.interpolationFlag,
         GapThreshold = pf.gapThreshold, 
         InterpolationAllowance = pf.interpolationAllowance,
         MaxQCCode = pf.maxCodeOptions)


display(Markdown('Press Save to save the options for future use for this site and measurement.'))
display(pf.saveBtn)
#display(pf.button, pf.output)

#print(data.head())
#print('set all params')
#print('Reset, rerun the cell incase you"ve experimented a lot')

### Autocoder Options

HTML(value='<b style="color:green;">Latest options loaded<b>')

*NEMS*

Select the relevant NEMS standard (and site type where relevant).

interactive(children=(Dropdown(description='nemsStd', index=17, options=('Water Temperature - Estuarine', 'Wat…

*General*

grossRangeFailBelow - The minimum value that the sensor can record.

grossRangeFailAbove - Tha maximum value that the sensor can record.

grossRangeSuspectBelow - The lowest value expected for the site and measurement.

grossRangeSuspectAbove - The highest value expected for the site and measurement.

flatLineTolerance - The tolerance value that should be exceeded by a reading within the time threshold window.

flatLineSuspectThreshold - The number of seconds to assess the flatline test over before flagging as suspect.

flatLineFailThreshold - The number of seconds to assess the flatline test over before flagging as fail.

rateOfChangeThreshold - The maximum allowed rate of change of the observational unit per second.

spikeSuspectThreshold - The suspect threshold for spike detection (in observational units).

spikeFailThreshold - The fail threshold for spike detection (in observational units).

interactive(children=(FloatText(value=0.0, description='grossRangeFailBelow', style=DescriptionStyle(descripti…

*Processing*

Interpolate - Whether gaps should be interpolated if there was data gathered originally.

GapThreshold - The minimum number of seconds of missing data that will cause a gap marker to be inserted.

InterpolationAllowance - The percentage leeway that will be used to determine whether to use an interpolated value, or measured..

Checkbox(value=False, description='Interpolate')

interactive(children=(IntText(value=10800, description='GapThreshold', style=DescriptionStyle(description_widt…

Press Save to save the options for future use for this site and measurement.

Button(description='Save', style=ButtonStyle())

In [19]:
display(Markdown('Press Run to run the tests.'))
display(pf.runBtn, pf.output)

Press Run to run the tests.

Button(description='Run', style=ButtonStyle())

Output()

In [20]:

#pf.qc_df.head()
#pf.qc_results['qartod'].keys()

In [21]:

#display(Markdown('### General Tests.'))
#display(Markdown('Select the test to see the results as a graph.'))
##interact(qb.doThePlots,x=["gross range", "flat line", "rate of change","spike","aggregate"])
#qplot = interactive(pf.doThePlots, x=["gross range", "flat line", "rate of change","spike","aggregate"])
#display(qplot)

#display(Markdown('### NEMS Tests.'))
#display(Markdown('Select the test to see the results as a graph.'))
##interact(qb.doThePlots,x=["gap data","resolution", "verification frequency", "accuracy"])
#nplot = interactive(pf.doThePlots, x=["gap data","resolution", "verification frequency", "verification accuracy"])
#display(nplot)

#display(Markdown('### NEMS QC.'))
#display(Markdown('Graph the coded results.  Choose whether to see all results or just the ones for archiving (clean set).'))
##interact(qb.plot_NEMS_results, data_set=widgets.Combobox(options=["all", "clean"], value="all"))
##qcplot = interactive(pf.plot_NEMS_results, data_set=["all", "clean"], value="all")
#oplot = interactive(pf.doThePlots, x=["All Data", "Clean Data"], value="All Data")
#display(oplot)

##display(Markdown('Please run the tests first.'))
##print("please run above cell first")

In [22]:


#df = pd.DataFrame(qb.qc_results['qartod'], columns=qb.qc_results['qartod'].keys())

#qartod_df = pd.DataFrame(qb.qc_results['qartod'], columns=qb.qc_results['qartod'].keys())
#nems_df = pd.DataFrame(qb.qc_results['nems'], columns=qb.qc_results['nems'].keys())
#combined_df = pd.concat([qartod_df, nems_df], axis=1)
#combined_df = combined_df.astype('int16')
# Read in the mapping table from file and save as df
#mapping_df = pd.read_csv("QC_Mapping.csv", dtype={'aggregate':np.int16, 'NEMS_accuracy':np.int16, 'NEMS_aggregate':np.int16, 'QC':np.int16})

#full_df = pd.merge(combined_df, mapping_df)

#print(combined_df.info())
#print(combined_df.describe())

#print(mapping_df.info())
#print(mapping_df.describe())

#print(full_df.info())
#print(full_df.head())

In [23]:
# qb.plot_NEMS_results()

In [24]:
#print(qb.qc_df['QC'].unique())
#print(qb.qc_df.head())
#print(qb.data.head())

In [25]:
display(Markdown('Press Output to save the results as a csv for upload to Hilltop.'))
display(pf.outOptions)
display(pf.outputBtn)

Press Output to save the results as a csv for upload to Hilltop.

RadioButtons(description='Output Data:', options=('Clean', 'All'), value='Clean')

Button(description='Output', style=ButtonStyle())

In [26]:
#pf.qc_df['Test'] = (((pf.qc_df['Value']-pf.qc_df['Interpolated Value']) / pf.qc_df['Value']) < 0.05)

In [27]:
#pf.qc_df.head

In [28]:
#qb.accuracyCheck.head()
#qb.qc_df.head()