# 4-1: Find QARTOD tests not executed 
This notebook searches through datasets for CGSN instruments and QARTOD automated test lookup tables for tests in production that are not applied to data streams as expected.

In [1]:
# Import libraries
import numpy as np
import pandas as pd
import xarray as xr
import requests
import io
import ast
from glob import glob

In [2]:
# Import functions from ooinet and ooi_data_explorations libraries
from ooi_data_explorations.common import load_kdata, get_vocabulary, m2m_request, m2m_collect
from ooinet import M2M

In [3]:
# Import functions from project qc_completion module
from qartod_testing.qc_completion import load_gross_range_qartod_test_list, \
    load_climatology_qartod_test_list, make_test_parameter_dict, \
    check_tests_exe, make_results_table, add_test_exe, write_results

In [4]:
# Define site for refdes search and find datasets available
site = 'CP01CNSM'
datasets = M2M.search_datasets(site)
datasets.reset_index(inplace=True)
datasets.drop(labels="index", axis=1, inplace=True)
datasets[0:5]

Searching https://ooinet.oceanobservatories.org/api/m2m/12576/sensor/inv/CP01CNSM


Unnamed: 0,array,node,instrument,refdes,url,deployments
0,CP01CNSM,SBD12,08-FDCHPA000,CP01CNSM-SBD12-08-FDCHPA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1..."
1,CP01CNSM,SBD12,06-METBKA001,CP01CNSM-SBD12-06-METBKA001,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
2,CP01CNSM,SBD12,06-METBKA000,CP01CNSM-SBD12-06-METBKA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
3,CP01CNSM,SBD12,05-WAVSSA000,CP01CNSM-SBD12-05-WAVSSA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
4,CP01CNSM,SBD12,04-PCO2AA000,CP01CNSM-SBD12-04-PCO2AA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."


### Run cross-reference for tests executed on all refdes and datastreams at site

The cell as written below ran for all refdes-stream combinations printed below the cell until the kernel crashed. At this point, this cell and the cells above it were turned into a python script `run_qartod_test_cross-ref.py`.

In [None]:
# Set csv save directory and file name for results
csv_name = "GA01SUMO_test_cross-ref_results.csv"
csv_dir = "./../data/processed/"
# loop through sensors to check and find datastreams available
for k in datasets.index:
    refdes = datasets.refdes[k]
    datastreams = M2M.get_datastreams(refdes)
    # loop through datastreams and first deployment available
    site, node, sensor = refdes.split("-", 2)
    for m in datastreams.index:
        method = datastreams.method[m]
        stream = datastreams.stream[m]
        deploy = datasets.deployments[k][0]
        instclass = sensor[3:8]
        # Load gross range and climatology test tables
        grt_table = load_gross_range_qartod_test_list(refdes, stream)
        ct_table = load_climatology_qartod_test_list(refdes, stream)
        if (grt_table is False) and (ct_table is False):
            print(f"No test in production for {refdes}-{stream}.")
        else:
            # Load data
            get_vocabulary(site, node, sensor)
            data = load_kdata(site, node, sensor, method, stream, ('*deployment%04d*%s*.nc' % (deploy, instclass)))
            try:
                print(data.id) # to check data stream loaded
            except AttributeError:
                print(f"No dataset available for {refdes}-{stream} tests.")
                del [grt_table, ct_table]
            else:
                # Create a dictionary of key-value pairs of dataset variable name:alternate parameter name
                test_parameters = make_test_parameter_dict(data)
                # Use test parameters to check for tests executed in dataset
                test_exe = check_tests_exe(data, test_parameters, grt_table, ct_table)
                # Make table for cross-ref results
                table = make_results_table(grt_table, ct_table)
                # Add column with QARTOD tests executed by parameter
                table = add_test_exe(table, test_exe)
                # Write QARTOD test cross-reference results table to a CSV
                write_results(table, csv_name, csv_dir)
                del [grt_table, ct_table, data, test_parameters,
                     test_exe, table]

### Run cross-reference for tests executed for a single datastream

In [5]:
# Pick a sensor to check and find datastreams available
n = 0
refdes = datasets.refdes[n]
datastreams = M2M.get_datastreams(refdes)

In [34]:
# temp for mopak test - Pick a sensor to check and find datastreams available
n = 0
refdes = "CP01CNSM-SBD11-01-MOPAK0000"
datastreams = M2M.get_datastreams(refdes)

In [6]:
datastreams

Unnamed: 0,refdes,method,stream
0,CP01CNSM-SBD12-08-FDCHPA000,recovered_host,fdchp_a_dcl_instrument_recovered
1,CP01CNSM-SBD12-08-FDCHPA000,recovered_inst,fdchp_a_instrument_recovered
2,CP01CNSM-SBD12-08-FDCHPA000,telemetered,fdchp_a_dcl_instrument


In [17]:
# Select first datastream and first deployment available
m = 0
site, node, sensor = refdes.split("-", 2)
method = datastreams.method[m]
stream = datastreams.stream[m]
deploy = datasets.deployments[n][0]
instclass = sensor[3:8]
print([site, node, sensor, method, stream, deploy, instclass])

['CP01CNSM', 'SBD12', '08-FDCHPA000', 'recovered_host', 'fdchp_a_dcl_instrument_recovered', 2, 'FDCHP']


In [18]:
# Load data
get_vocabulary(site, node, sensor)
print(f"Loading deployment {deploy}")
data = load_kdata(site, node, sensor, method, stream,
                  ('*deployment%04d*%s*.nc' % (deploy, instclass)))
while data is None:
    deploy+=1
    if deploy > datasets.deployments[n][-1]:
        print("No dataset loaded")
        break
    else:
        print(f"Loading deployment {deploy}")
        try:
            data = load_kdata(site, node, sensor, method, stream,
                              ('*deployment%04d*%s*.nc' % (deploy, instclass)))
        except:
            continue

Loading deployment 2
Downloading 0 data file(s) from the local kdata directory


Loading and Processing Data Files: 0it [00:00, ?it/s]


Loading deployment 3
Downloading 1 data file(s) from the local kdata directory


Loading and Processing Data Files: 100%|██████████| 1/1 [00:00<00:00,  8.01it/s]


In [8]:
# Run this cell with one file name for FDCHP recovered inst on the kdata server 
# (until I figure out a better way)
get_vocabulary(site, node, sensor)
data = load_kdata(site, node, sensor, method, stream,
                  ('*deployment0003_CP01CNSM-SBD12-08-FDCHPA000-recovered_inst-fdchp_a_instrument_recovered_20151008T120100.007000-20151015T172059.961000.nc'))

Downloading 1 data file(s) from the local kdata directory


Loading and Processing Data Files: 100%|██████████| 1/1 [00:13<00:00, 13.28s/it]


In [19]:
# Check dataset contents
data

In [24]:
fdchp_vars = [x for x in data.variables if "fdchp" in x]
print(fdchp_vars)

[]


In [10]:
# Load gross range and climatology test tables
grt_table = load_gross_range_qartod_test_list(refdes, stream)
ct_table = load_climatology_qartod_test_list(refdes, stream)

In [11]:
# Create a dictionary of key-value pairs of dataset variable name:alternate parameter name
test_parameters = make_test_parameter_dict(data)
test_parameters

{'fdchp_wind_z_qartod_executed': 'fdchp_wind_z_qartod_executed',
 'fdchp_wind_x_qartod_executed': 'fdchp_wind_x_qartod_executed',
 'fdchp_a_fluxhot_qartod_executed': 'fdchp_a_fluxhot_qartod_executed',
 'fdchp_a_fluxmom_alongwind_qartod_executed': 'fdchp_a_fluxmom_alongwind_qartod_executed',
 'fdchp_wind_y_qartod_executed': 'fdchp_wind_y_qartod_executed',
 'fdchp_a_fluxmom_crosswind_qartod_executed': 'fdchp_a_fluxmom_crosswind_qartod_executed'}

In [12]:
# Loop through table parameters to check for tests executed
test_exe = check_tests_exe(data, test_parameters, grt_table, ct_table)
test_exe

{'fdchp_wind_x': 'gross_range_test',
 'fdchp_wind_y': 'gross_range_test',
 'fdchp_wind_z': 'gross_range_test',
 'fdchp_a_fluxmom_crosswind': 'gross_range_test',
 'fdchp_a_fluxmom_alongwind': 'gross_range_test',
 'fdchp_a_fluxhot': 'gross_range_test'}

In [13]:
grt_table

Unnamed: 0,subsite,node,sensor,stream,parameters
60,CP01CNSM,SBD12,08-FDCHPA000,fdchp_a_instrument_recovered,fdchp_wind_x
61,CP01CNSM,SBD12,08-FDCHPA000,fdchp_a_instrument_recovered,fdchp_wind_y
62,CP01CNSM,SBD12,08-FDCHPA000,fdchp_a_instrument_recovered,fdchp_wind_z
63,CP01CNSM,SBD12,08-FDCHPA000,fdchp_a_instrument_recovered,fdchp_a_fluxmom_crosswind
64,CP01CNSM,SBD12,08-FDCHPA000,fdchp_a_instrument_recovered,fdchp_a_fluxmom_alongwind
65,CP01CNSM,SBD12,08-FDCHPA000,fdchp_a_instrument_recovered,fdchp_a_fluxhot


In [14]:
ct_table

False

In [15]:
table = make_results_table(grt_table, ct_table)
table

Unnamed: 0,subsite,node,sensor,stream,parameters,GRTtable,CTtable
0,CP01CNSM,SBD12,08-FDCHPA000,fdchp_a_instrument_recovered,fdchp_wind_x,True,False
1,CP01CNSM,SBD12,08-FDCHPA000,fdchp_a_instrument_recovered,fdchp_wind_y,True,False
2,CP01CNSM,SBD12,08-FDCHPA000,fdchp_a_instrument_recovered,fdchp_wind_z,True,False
3,CP01CNSM,SBD12,08-FDCHPA000,fdchp_a_instrument_recovered,fdchp_a_fluxmom_crosswind,True,False
4,CP01CNSM,SBD12,08-FDCHPA000,fdchp_a_instrument_recovered,fdchp_a_fluxmom_alongwind,True,False
5,CP01CNSM,SBD12,08-FDCHPA000,fdchp_a_instrument_recovered,fdchp_a_fluxhot,True,False


In [16]:
# Add column with QARTOD tests executed by parameter
table = add_test_exe(table, test_exe)
table

Unnamed: 0,subsite,node,sensor,stream,parameters,GRTtable,CTtable,testsExecuted
0,CP01CNSM,SBD12,08-FDCHPA000,fdchp_a_instrument_recovered,fdchp_wind_x,True,False,gross_range_test
1,CP01CNSM,SBD12,08-FDCHPA000,fdchp_a_instrument_recovered,fdchp_wind_y,True,False,gross_range_test
2,CP01CNSM,SBD12,08-FDCHPA000,fdchp_a_instrument_recovered,fdchp_wind_z,True,False,gross_range_test
3,CP01CNSM,SBD12,08-FDCHPA000,fdchp_a_instrument_recovered,fdchp_a_fluxmom_crosswind,True,False,gross_range_test
4,CP01CNSM,SBD12,08-FDCHPA000,fdchp_a_instrument_recovered,fdchp_a_fluxmom_alongwind,True,False,gross_range_test
5,CP01CNSM,SBD12,08-FDCHPA000,fdchp_a_instrument_recovered,fdchp_a_fluxhot,True,False,gross_range_test


In [97]:
# Write QARTOD test cross-reference results table to a CSV 
csv_name = "test_cross-ref_results.csv"
csv_dir = "./../data/processed/"
write_results(table, csv_name, csv_dir)

results saved to ./../data/processed/test_cross-ref_results.csv


In [13]:
# Load data
get_vocabulary('CE09OSPM', 'WFP01', '01-VEL3DK000')
data = load_kdata('CE09OSPM', 'WFP01', '01-VEL3DK000', 'recovered_wfp', 'vel3d_k_wfp_instrument', ('*deployment%04d*%s*.nc' % (2, "VEL3D")))

Downloading 1 data file(s) from the local kdata directory


Loading and Processing Data Files: 100%|██████████| 1/1 [00:06<00:00,  6.42s/it]


In [14]:
data

In [15]:
m2m_result = m2m_request('GP02HYPM', 'WFP03', '05-VEL3DL000', 'recovered_wfp', 'vel3d_l_wfp_instrument_recovered',
                         start="2021-08-01T00:00:00.000Z", stop="2021-10-01T00:00:00.000Z")

Requesting:
	refdes: GP02HYPM-WFP03-05-VEL3DL000
	method: recovered_wfp
	stream: vel3d_l_wfp_instrument_recovered
	from 2021-08-01T00:00:00.000Z to 2021-10-01T00:00:00.000Z
Waiting for OOINet to process and prepare data request, this may take up to 20 minutes.
Waiting: 100%|██████████| 400/400 [01:30<00:00,  4.44it/s]


In [16]:
m2m_result

{'requestUUID': '185b47ef-c539-4492-8fa7-8593e22716ed',
 'outputURL': 'https://opendap.oceanobservatories.org/thredds/catalog/ooi/kylene.cooley@whoi.edu/20250122T211104676Z-GP02HYPM-WFP03-05-VEL3DL000-recovered_wfp-vel3d_l_wfp_instrument_recovered/catalog.html',
 'allURLs': ['https://opendap.oceanobservatories.org/thredds/catalog/ooi/kylene.cooley@whoi.edu/20250122T211104676Z-GP02HYPM-WFP03-05-VEL3DL000-recovered_wfp-vel3d_l_wfp_instrument_recovered/catalog.html',
  'https://downloads.oceanobservatories.org/async_results/kylene.cooley@whoi.edu/20250122T211104676Z-GP02HYPM-WFP03-05-VEL3DL000-recovered_wfp-vel3d_l_wfp_instrument_recovered'],
 'sizeCalculation': 71104279,
 'timeCalculation': 60,
 'numberOfSubJobs': 31}

In [42]:
('.*deployment%04d.*%s.*.nc$' % (8, instclass))

'.*deployment0008.*VEL3D.*.nc$'

In [17]:
m2m_data = m2m_collect(m2m_result, tag=('.*deployment%04d.*%s.*.nc$' % (8, instclass)))

Downloading 1 data file(s) from the users OOI M2M THREDDS catalog


Downloading and Processing Data Files: 100%|██████████| 1/1 [00:08<00:00,  8.93s/it]


In [18]:
m2m_data

In [14]:
deploy_info = M2M.get_deployments(refdes, deploy_num='3')

In [24]:
t = deploy_info.deployStart+pd.Timedelta(5, 'D')

In [29]:
t[0].isoformat(timespec='milliseconds')+'Z'

'2015-05-04T20:15:00.000Z'