# Compare local QARTOD Climatology and Gross Range Test Results to Expected Flags

### Import modules used in this notebook

In [1]:
# Import libraries
import os
import re
import gc
import io
import ast
import requests
import pandas as pd
import numpy as np
import xarray as xr
import warnings
warnings.filterwarnings("ignore")
import sys

In [2]:
# Import OOINet library - can probably delete
from ooinet import M2M
from ooinet.Instrument.common import process_file

In [3]:
# Import functions from ooi-data-explorations library - can probably delete
from ooi_data_explorations.uncabled.process_dosta import dosta_datalogger
from ooi_data_explorations.combine_data import combine_datasets

In [4]:
# Import dask tools and ProgressBar
import dask
from dask.diagnostics import ProgressBar

In [5]:
# Import function to build relative path to data files
from qartod_testing.data_processing import build_data_path

### Define reference designator and file paths for chosen variable

In [6]:
# Set reference designator, data stream, and method 

site = "CP01CNSM"                                   # Coastal Pioneer Array (NES) - Central Surface Mooring
node = "MFD37"                                      # 
sensor = "03-CTDBPD000"                             # CTD Bottom-pumped
method = "recovered_inst"                           # non-decimated data from recovered instrument
stream = "ctdbp_cdef_instrument_recovered"          # name of data stream

refdes = '-'.join((site,node,sensor))               # build reference designator

### Load test QARTOD test flags from local processed dataset

In [7]:
# Build path to data files with local test results and load to workspace
gr_local_test_path = build_data_path(refdes,method,stream,'prod-gr-result',folder='processed')
clim_local_test_path = build_data_path(refdes, method, stream, 'prod-clim-result', folder="processed")

# Load local test data from .nc files
gr_results_local = xr.open_dataset(gr_local_test_path)
clim_results_local = xr.open_dataset(clim_local_test_path)

In [8]:
gr_results_local

In [9]:
clim_results_local

### Extract and parse expected QC results

In [10]:
# Load expected results data from interim data folder
expected_ds_path = build_data_path(refdes,method,stream,'prod',folder='interim')
ds_expected = xr.open_dataset(expected_ds_path)

# Create a dictionary of key-value pairs of dataset variable name:alternate parameter name
test_parameters={}
for var in ds_expected.variables:
    if "qartod_results" in var:
        # Get the parameter name
        param = var.split("_qartod")[0]
        
        # Check if the parameter has an alternative ooinet_name
        if "alternate_parameter_name" in ds_expected[param].attrs:
            ooinet_name = ds_expected[param].attrs["alternate_parameter_name"]
        else:
            ooinet_name = param
        
        # Save the results in a dictionary
        test_parameters.update({
            param: ooinet_name
        })
# Print out the results
test_parameters

{'sea_water_electrical_conductivity': 'ctdbp_seawater_conductivity',
 'sea_water_temperature': 'ctdbp_seawater_temperature',
 'sea_water_practical_salinity': 'practical_salinity',
 'sea_water_pressure': 'ctdbp_seawater_pressure'}

In [11]:
# Parse the variables with expected QARTOD flags into more easily useable dataset
def parse_qartod_executed(ds, parameters):
    """
    Parses the qartod tests for the given parameter into separate variables.
    
    Parameters
    ----------
    ds: xarray.DataSet
        The dataset downloaded from OOI with the QARTOD flags applied.
    parameters: list[str]
        The name of the parameters in the dataset to parse the QARTOD flags
        
    Returns
    -------
    ds: xarray.DataSet
        The dataset with the QARTOD test for the given parameters split out
        into new seperate data variables using the naming convention:
        {parameter}_qartod_{test_name}
    """
    # For the params into a list if only a string
    if type(parameters) is not list:
        parameters = list(parameters)
    
    # Iterate through each parameter
    for param in parameters:
        # Generate the qartod executed name
        qartod_name = f"{param}_qartod_executed"
        
        if qartod_name not in ds.variables:
            continue
    
        # Fix the test types
        ds[qartod_name] = ds[qartod_name].astype(str)
    
        # Get the test order
        test_order = ds[qartod_name].attrs["tests_executed"].split(",")
    
        # Iterate through the available tests and create separate variables with the results
        for test in test_order:
            test_index = test_order.index(test)
            test_name = f"{param}_qartod_{test.strip()}"
            ds[test_name] = ds[qartod_name].str.get(test_index)

    return ds

In [12]:
# Put the test parameter names in the dataset into a list
parameters = [x for x in test_parameters.keys()]

In [13]:
# Parse all of the variables with QARTOD tests applied into separate tests
results_expected = parse_qartod_executed(ds_expected, parameters)
results_expected

### Comparing local results of QARTOD tests to expected results 

In [14]:
# or something like Andrew's example:

def run_comparison(ds, param, test_results):
    """
    Runs a comparison between the qartod gross range results returned as part of the dataset
    and results calculated locally.
    """
    # Get the local test results and convert to string type for comparison
    local_results = test_results[param].astype(str)
    
    # Run comparison
    not_equal = np.where(ds[f"{param}_qartod_gross_range_test"] != local_results)[0]
    
    if len(not_equal) == 0:
        return None
    else:
        return not_equal

### Identify differences in results

In [19]:
gr_comparison = dict()

for index, param in enumerate(parameters):
    print("Checking for mismatched QARTOD flags in "f"{param}")
    gr_mismatch = run_comparison(results_expected, param, gr_results_local)

    if gr_mismatch is None:
        print("No mismatched values found")
        gr_comparison.update({f"{param}": {
                "index": "",
                "expected flags": "",
                "local test flags": ""
            }
        })
    else:         
        gr_comparison.update({f"{param}":{
                "index": gr_mismatch,
                "expected flags": results_expected[f"{param}_qartod_gross_range_test"][gr_mismatch].values,
                "local test flags": gr_results_local[param][gr_mismatch].values
            }
        })
   
    

Checking for mismatched QARTOD flags in sea_water_electrical_conductivity
Checking for mismatched QARTOD flags in sea_water_temperature
No mismatched values found
Checking for mismatched QARTOD flags in sea_water_practical_salinity
No mismatched values found
Checking for mismatched QARTOD flags in sea_water_pressure


In [20]:
gr_comparison

{'sea_water_electrical_conductivity': {'index': array([166533], dtype=int64),
  'expected flags': array(['B'], dtype='<U1'),
  'local test flags': array([1], dtype=uint8)},
 'sea_water_temperature': {'index': '',
  'expected flags': '',
  'local test flags': ''},
 'sea_water_practical_salinity': {'index': '',
  'expected flags': '',
  'local test flags': ''},
 'sea_water_pressure': {'index': array([ 71158, 166560, 205549], dtype=int64),
  'expected flags': array(['B', 'B', 'B'], dtype='<U1'),
  'local test flags': array([1, 1, 1], dtype=uint8)}}

### Export differing values to CSV with relevant metadata
metadata could include: date & time, sensor, node, site, local & expected test results... whatever