# 4-1: Find QARTOD tests not executed 
This notebook searches through datasets for CGSN instruments and QARTOD automated test lookup tables for tests in production that are not applied to data streams as expected.

In [1]:
# Import libraries
import numpy as np
import pandas as pd
import xarray as xr
import requests
import io
import ast

In [2]:
# Import functions from ooinet and ooi_data_explorations libraries
from ooi_data_explorations.common import load_kdata, get_vocabulary
from ooinet import M2M

In [50]:
# Define site for refdes search and find datasets available
site = 'CP01CNSM'
datasets = M2M.search_datasets(site)
datasets.reset_index(inplace=True)
datasets[0:20]

Searching https://ooinet.oceanobservatories.org/api/m2m/12576/sensor/inv/CP01CNSM


Unnamed: 0,index,array,node,instrument,refdes,url,deployments
0,0,CP01CNSM,SBD12,08-FDCHPA000,CP01CNSM-SBD12-08-FDCHPA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1..."
1,0,CP01CNSM,SBD12,06-METBKA001,CP01CNSM-SBD12-06-METBKA001,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
2,0,CP01CNSM,SBD12,06-METBKA000,CP01CNSM-SBD12-06-METBKA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
3,0,CP01CNSM,SBD12,05-WAVSSA000,CP01CNSM-SBD12-05-WAVSSA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
4,0,CP01CNSM,SBD12,04-PCO2AA000,CP01CNSM-SBD12-04-PCO2AA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
5,0,CP01CNSM,SBD12,03-HYDGN0000,CP01CNSM-SBD12-03-HYDGN0000,https://ooinet.oceanobservatories.org/api/m2m/...,"[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1..."
6,0,CP01CNSM,SBD12,00-DCLENG000,CP01CNSM-SBD12-00-DCLENG000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
7,0,CP01CNSM,SBD11,06-METBKA001,CP01CNSM-SBD11-06-METBKA001,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
8,0,CP01CNSM,SBD11,06-METBKA000,CP01CNSM-SBD11-06-METBKA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
9,0,CP01CNSM,SBD11,02-HYDGN0000,CP01CNSM-SBD11-02-HYDGN0000,https://ooinet.oceanobservatories.org/api/m2m/...,"[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1..."


In [51]:
# Pick a sensor to check and find datastreams available
n = 14
refdes = datasets.refdes[n]
datastreams = M2M.get_datastreams(refdes)

In [52]:
# Select first datastream and first deployment available
m = 0
site, node, sensor = refdes.split("-", 2)
method = datastreams.method[m]
stream = datastreams.stream[m]
deploy = datasets.deployments[n][0]
instclass = sensor[3:8]

In [53]:
# Load data
get_vocabulary(site, node, sensor)
data = load_kdata(site, node, sensor, method, stream, ('*deployment%04d*%s*.nc' % (deploy, instclass)))

Downloading 1 data file(s) from the local kdata directory


Loading and Processing Data Files: 100%|██████████| 1/1 [00:04<00:00,  4.17s/it]


In [54]:
# Check dataset contents
data

In [128]:
# Define functions to load lookup table entries, edited to load multiple parameters
GITHUB_BASE_URL = "https://raw.githubusercontent.com/oceanobservatories/qc-lookup/master/qartod"

def load_gross_range_qartod_test_list(refdes, stream):
    """
    Load the gross range QARTOD test from gitHub
    """
    subsite, node, sensor = refdes.split("-", 2)
    sensor_type = sensor[3:8].lower()
    
    # gitHub url to the gross range table
    GROSS_RANGE_URL = f"{GITHUB_BASE_URL}/{sensor_type}/{sensor_type}_qartod_gross_range_test_values.csv"
    
    # Download the results
    download = requests.get(GROSS_RANGE_URL)
    if download.status_code == 200:
        df = pd.read_csv(io.StringIO(download.content.decode('utf-8')))
    else:
        return False
    
    # Now filter for the desired stream
    df = df[(df["subsite"] == subsite) & 
            (df["node"] == node) & 
            (df["sensor"] == sensor) &
            (df["stream"] == stream)]
    
    # Next, change parameter field to parameter names
    df["parameters"] = df["parameters"].apply(ast.literal_eval)
    df["parameters"] = df["parameters"].apply(lambda x: x.get("inp"))
    
    # Drop columns for qcConfig, source, notes
    df.drop(columns=["qcConfig", "source", "notes"], inplace=True)

    return df


def load_climatology_qartod_test_list(refdes, stream):
    """
    Load the OOI climatology qartod test values table from gitHub
    
    Parameters
    ----------
    refdes: str
        The reference designator for the given sensor
    param: str
        The name of the 
    """
    
    subsite, node, sensor = refdes.split("-", 2)
    sensor_type = sensor[3:8].lower()
    
    # gitHub url to the climatology test tables
    CLIMATOLOGY_URL = f"{GITHUB_BASE_URL}/{sensor_type}/{sensor_type}_qartod_climatology_test_values.csv"

    # Get the correct climatologyTable
    download = requests.get(CLIMATOLOGY_URL)
    
    # Exit function if there is no climatology test table for the instrument class
    if download.status_code == 200:
        df = pd.read_csv(io.StringIO(download.content.decode('utf-8')))
    else:
        return False
    
    # Now filter for the desired stream
    df = df[(df["subsite"] == subsite) & 
            (df["node"] == node) & 
            (df["sensor"] == sensor) &
            (df["stream"] == stream)]
    
    # Next, change parameter field to parameter name
    df["parameters"] = df["parameters"].apply(ast.literal_eval)
    df["parameters"] = df["parameters"].apply(lambda x: x.get("inp"))
    
    # Drop columns for climatologyTable, source, notes
    df.drop(columns=["climatologyTable", "source", "notes"], inplace=True)
    
    return df

In [129]:
# Load gross range and climatology test tables
grt_table = load_gross_range_qartod_test_list(refdes, stream)
ct_table = load_climatology_qartod_test_list(refdes, stream)

In [147]:
# Create a dictionary of key-value pairs of dataset variable name:alternate parameter name
test_parameters={}
for var in data.variables:
    if "qartod_executed" in var:
        # Get the parameter name
        # param = var.split("_qartod")[0]
        
        # Check if the parameter has an alternative ooinet_name
        if "alternate_parameter_name" in data[var].attrs:
            ooinet_name = data[var].attrs["alternate_parameter_name"]
        else:
            ooinet_name = var
        
        # Save the results in a dictionary
        test_parameters.update({
            var: ooinet_name
        })
# Print out the results
test_parameters = dict([(value, key) for key, value in test_parameters.items()])
test_parameters

{'pressure_qartod_executed': 'sea_water_pressure_qartod_executed',
 'temp_qartod_executed': 'sea_water_temperature_qartod_executed',
 'conductivity_qartod_executed': 'sea_water_electrical_conductivity_qartod_executed',
 'practical_salinity_qartod_executed': 'sea_water_practical_salinity_qartod_executed'}

In [150]:
# Loop through table parameters to check for tests executed
test_exe = {}
if grt_table is not False:
    for param in grt_table.parameters:
        qartod = param+"_qartod_executed"
        if qartod in test_parameters.keys():
            var = test_parameters[qartod]
            test_exe.update({param: data[var].tests_executed})
            # print(qartod)
        else:
            test_exe.update({param: "none"})
if ct_table is not False:
    for param in ct_table.parameters:
        qartod = param+"_qartod_executed"
        for key in test_parameters.keys():
                if qartod == test_parameters[key]:
                    test_exe.update({param: data[key].tests_executed})
                # else:
                #     test_exe.update({param: "none"})
                    # print(qartod)

test_exe

{'conductivity': 'gross_range_test',
 'pressure': 'gross_range_test',
 'temp': 'gross_range_test, climatology_test',
 'practical_salinity': 'gross_range_test, climatology_test'}

In [86]:
pd.DataFrame(test_exe, index=[0])

Unnamed: 0,conductivity,pressure,temp,practical_salinity
0,gross_range_test,gross_range_test,"gross_range_test, climatology_test","gross_range_test, climatology_test"


In [151]:
grt_table

Unnamed: 0,subsite,node,sensor,stream,parameters
196,CP01CNSM,RID27,03-CTDBPC000,ctdbp_cdef_dcl_instrument_recovered,conductivity
197,CP01CNSM,RID27,03-CTDBPC000,ctdbp_cdef_dcl_instrument_recovered,pressure
198,CP01CNSM,RID27,03-CTDBPC000,ctdbp_cdef_dcl_instrument_recovered,temp
199,CP01CNSM,RID27,03-CTDBPC000,ctdbp_cdef_dcl_instrument_recovered,practical_salinity


In [127]:
ct_table

Unnamed: 0,subsite,node,sensor,stream,parameters
78,CP01CNSM,RID27,03-CTDBPC000,ctdbp_cdef_dcl_instrument_recovered,temp
81,CP01CNSM,RID27,03-CTDBPC000,ctdbp_cdef_dcl_instrument_recovered,practical_salinity


In [103]:
ct_table["source"]=1

In [104]:
ct_table["source"]

78    1
81    1
Name: source, dtype: int64

In [113]:
ct_table["source"] = ct_table["parameters"].apply(lambda x: x.get("inp"))

In [137]:
ct_table

Unnamed: 0,subsite,node,sensor,stream,parameters
78,CP01CNSM,RID27,03-CTDBPC000,ctdbp_cdef_dcl_instrument_recovered,temp
81,CP01CNSM,RID27,03-CTDBPC000,ctdbp_cdef_dcl_instrument_recovered,practical_salinity
