# 4-1: Find QARTOD tests not executed 
This notebook searches through datasets for CGSN instruments and QARTOD automated test lookup tables for tests in production that are not applied to data streams as expected.

In [1]:
# Import libraries
import numpy as np
import pandas as pd
import xarray as xr
import requests
import io
import ast
from glob import glob

In [2]:
# Import functions from ooinet and ooi_data_explorations libraries
from ooi_data_explorations.common import load_kdata, get_vocabulary
from ooinet import M2M

In [3]:
# Import functions from project qc_completion module
from qartod_testing.qc_completion import load_gross_range_qartod_test_list, \
    load_climatology_qartod_test_list, make_test_parameter_dict, \
    check_tests_exe

In [4]:
# Define site for refdes search and find datasets available
site = 'CP01CNSM'
datasets = M2M.search_datasets(site)
datasets.reset_index(inplace=True)
datasets.drop(labels="index", axis=1, inplace=True)
datasets[0:20]

Searching https://ooinet.oceanobservatories.org/api/m2m/12576/sensor/inv/CP01CNSM


Unnamed: 0,array,node,instrument,refdes,url,deployments
0,CP01CNSM,SBD12,08-FDCHPA000,CP01CNSM-SBD12-08-FDCHPA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1..."
1,CP01CNSM,SBD12,06-METBKA001,CP01CNSM-SBD12-06-METBKA001,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
2,CP01CNSM,SBD12,06-METBKA000,CP01CNSM-SBD12-06-METBKA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
3,CP01CNSM,SBD12,05-WAVSSA000,CP01CNSM-SBD12-05-WAVSSA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
4,CP01CNSM,SBD12,04-PCO2AA000,CP01CNSM-SBD12-04-PCO2AA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
5,CP01CNSM,SBD12,03-HYDGN0000,CP01CNSM-SBD12-03-HYDGN0000,https://ooinet.oceanobservatories.org/api/m2m/...,"[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1..."
6,CP01CNSM,SBD12,00-DCLENG000,CP01CNSM-SBD12-00-DCLENG000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
7,CP01CNSM,SBD11,06-METBKA001,CP01CNSM-SBD11-06-METBKA001,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
8,CP01CNSM,SBD11,06-METBKA000,CP01CNSM-SBD11-06-METBKA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
9,CP01CNSM,SBD11,02-HYDGN0000,CP01CNSM-SBD11-02-HYDGN0000,https://ooinet.oceanobservatories.org/api/m2m/...,"[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1..."


In [5]:
# Pick a sensor to check and find datastreams available
n = 14
refdes = datasets.refdes[n]
datastreams = M2M.get_datastreams(refdes)

In [6]:
# Select first datastream and first deployment available
m = 0
site, node, sensor = refdes.split("-", 2)
method = datastreams.method[m]
stream = datastreams.stream[m]
deploy = datasets.deployments[n][0]
instclass = sensor[3:8]

In [7]:
# Load data
get_vocabulary(site, node, sensor)
data = load_kdata(site, node, sensor, method, stream, ('*deployment%04d*%s*.nc' % (deploy, instclass)))

Downloading 1 data file(s) from the local kdata directory


Loading and Processing Data Files: 100%|██████████| 1/1 [00:03<00:00,  3.31s/it]


In [8]:
# Check dataset contents
data

In [73]:
# Load gross range and climatology test tables
grt_table = load_gross_range_qartod_test_list(refdes, stream)
ct_table = load_climatology_qartod_test_list(refdes, stream)

In [74]:
# Create a dictionary of key-value pairs of dataset variable name:alternate parameter name
test_parameters = make_test_parameter_dict(data)
test_parameters

{'pressure_qartod_executed': 'sea_water_pressure_qartod_executed',
 'temp_qartod_executed': 'sea_water_temperature_qartod_executed',
 'conductivity_qartod_executed': 'sea_water_electrical_conductivity_qartod_executed',
 'practical_salinity_qartod_executed': 'sea_water_practical_salinity_qartod_executed'}

In [75]:
# Loop through table parameters to check for tests executed
test_exe = check_tests_exe(data, test_parameters, grt_table, ct_table)
test_exe

{'conductivity': 'gross_range_test',
 'pressure': 'gross_range_test',
 'temp': 'gross_range_test, climatology_test',
 'practical_salinity': 'gross_range_test, climatology_test'}

In [76]:
grt_table

Unnamed: 0,subsite,node,sensor,stream,parameters
196,CP01CNSM,RID27,03-CTDBPC000,ctdbp_cdef_dcl_instrument_recovered,conductivity
197,CP01CNSM,RID27,03-CTDBPC000,ctdbp_cdef_dcl_instrument_recovered,pressure
198,CP01CNSM,RID27,03-CTDBPC000,ctdbp_cdef_dcl_instrument_recovered,temp
199,CP01CNSM,RID27,03-CTDBPC000,ctdbp_cdef_dcl_instrument_recovered,practical_salinity


In [77]:
ct_table

Unnamed: 0,subsite,node,sensor,stream,parameters
78,CP01CNSM,RID27,03-CTDBPC000,ctdbp_cdef_dcl_instrument_recovered,temp
81,CP01CNSM,RID27,03-CTDBPC000,ctdbp_cdef_dcl_instrument_recovered,practical_salinity


In [78]:
def make_results_table(grt_table=False, ct_table=False):
    if grt_table is not False:
        table = grt_table.reset_index(drop=True)
        table["GRTtable"] = True
    if ct_table is not False:
        table["CTtable"] = table["parameters"].isin(list(ct_table["parameters"]))
        ct_table["GRTtable"] = False
        ct_table["CTtable"] = True
        ct_table = ct_table[np.bitwise_not(ct_table["parameters"].isin(list(table["parameters"])))]
        table = pd.concat([table, ct_table], ignore_index=True, sort=False)        
    else:
        table["CTtable"] = False
    return table

In [79]:
table = make_results_table(grt_table, ct_table)
table

Unnamed: 0,subsite,node,sensor,stream,parameters,GRTtable,CTtable
0,CP01CNSM,RID27,03-CTDBPC000,ctdbp_cdef_dcl_instrument_recovered,conductivity,True,False
1,CP01CNSM,RID27,03-CTDBPC000,ctdbp_cdef_dcl_instrument_recovered,pressure,True,False
2,CP01CNSM,RID27,03-CTDBPC000,ctdbp_cdef_dcl_instrument_recovered,temp,True,True
3,CP01CNSM,RID27,03-CTDBPC000,ctdbp_cdef_dcl_instrument_recovered,practical_salinity,True,True


In [81]:
# Add column with QARTOD tests executed by parameter
def add_test_exe(table, test_exe):
    table["testsExecuted"] = "none"
    for k in table.index: 
        param = table.at[k, "parameters"]
        table.at[k, "testsExecuted"] = test_exe.get(param)
    return table

In [82]:
table = add_test_exe(table, test_exe)
table

Unnamed: 0,subsite,node,sensor,stream,parameters,GRTtable,CTtable,testsExecuted
0,CP01CNSM,RID27,03-CTDBPC000,ctdbp_cdef_dcl_instrument_recovered,conductivity,True,False,gross_range_test
1,CP01CNSM,RID27,03-CTDBPC000,ctdbp_cdef_dcl_instrument_recovered,pressure,True,False,gross_range_test
2,CP01CNSM,RID27,03-CTDBPC000,ctdbp_cdef_dcl_instrument_recovered,temp,True,True,"gross_range_test, climatology_test"
3,CP01CNSM,RID27,03-CTDBPC000,ctdbp_cdef_dcl_instrument_recovered,practical_salinity,True,True,"gross_range_test, climatology_test"


In [97]:
# Write QARTOD test cross-reference results table to a CSV 
csv_name = "test_cross-ref_results.csv"
csv_dir = "./../data/processed/"
def write_results(table, csv_name="test_cross-ref_results.csv", csv_dir="/../data/processed/"):
    csv_path = csv_dir+csv_name
    if glob(csv_path)==[]:
        file = open(csv_path, mode='w')
        table.to_csv(csv_path, mode='a', index=False)
    else: 
        file = open(csv_path, mode='a')
        table.to_csv(csv_path, mode='a', header=False, index=False)
    # close file (can't remember how to do this now)
    print(f"results saved to {csv_path}")
    return

results saved to ./../data/processed/test_cross-ref_results.csv


In [90]:
write_results(table, csv_name, csv_dir)

True