# Compare local QARTOD Climatology Test Results to Expected Flags
Next, we want to calculate the statistics of the different QARTOD flags for the different tests that are applied to the different parameters in the dataset. The example ```qartod_results_summary``` below simply counts the total number of different flags (e.g 1, 3, 4) and their relative percentages for each test (gross range, climatology, etc) for each parameter that the tests area applied to. 

### Import modules used in this notebook

In [1]:
# Import libraries
import os
import re
import requests
import gc
import io
import ast
import pandas as pd
import numpy as np
import xarray as xr
import warnings
warnings.filterwarnings("ignore")
import sys
import glob

In [2]:
# Import function to build relative path to data files
import qartod_testing.data_processing as dp

### Define reference designator for chosen variable

In [3]:
# Set reference designator, data stream, and method 
method = "recovered_inst"                       
stream = "ctdbp_cdef_instrument_recovered"   
refdes = "CP01CNSM-MFD37-03-CTDBPD000"              

### Load local QARTOD test flags from processed dataset

In [4]:
# build path to folder where data was saved
folder_path = os.path.join(os.path.abspath('../data/interim'), method, stream, refdes)

# retrieve list of netCDF files in this directory
local_files = glob.glob(folder_path+'/climatology*.nc')
# files = [file for file in # I started trying to remove files with blank in the name with more generalized way and ran out of time

14

In [5]:
local_test_results = xr.open_dataset(files[0])
local_test_results

### Extract and parse expected QC results

In [6]:
# Load expected results data from external data folder
# build path to folder where data was saved
folder_path = os.path.join(os.path.abspath('../data/external'), method, stream, refdes)

# retrieve list of netCDF files in this directory
expected_files = glob.glob(folder_path+'/*.nc')
files[-3]

'/home/jovyan/code/qartod_testing/data/external/recovered_inst/ctdbp_cdef_instrument_recovered/CP01CNSM-MFD37-03-CTDBPD000/deployment0012_CP01CNSM-MFD37-03-CTDBPD000-recovered_inst-ctdbp_cdef_instrument_recovered_20190927T183001-20201106T130001.nc'

In [7]:
ds_expected = xr.open_dataset(files[-3])

# Create a dictionary of key-value pairs of dataset variable name:alternate parameter name
test_parameters = dp.get_test_parameters(ds_expected)

{'sea_water_electrical_conductivity': 'ctdbp_seawater_conductivity',
 'sea_water_temperature': 'ctdbp_seawater_temperature',
 'sea_water_practical_salinity': 'practical_salinity',
 'sea_water_pressure': 'ctdbp_seawater_pressure'}

In [17]:
# Put the test parameter names in the dataset into a list
parameters = [x for x in test_parameters.keys()]

In [18]:
# Parse all of the variables with QARTOD tests applied into separate tests
results_expected = dp.parse_qartod_executed(ds_expected, parameters)
results_expected

In [19]:
results_expected['sea_water_pressure_qartod_results']

### Comparing local results of QARTOD tests to expected results 

In [21]:
# Identify differences in the results
test_comparison = dict()

for index, param in enumerate(parameters):
    print("Checking for mismatched QARTOD flags in "f"{param}")
    flag_mismatch = dp.run_comparison(results_expected, param, local_test_results)

    if flag_mismatch is None:
        print("No mismatched values found")
        pass
    else:  
        flag_mismatch = flag_mismatch[np.char.isnumeric(results_expected[f"{param}_qartod_climatology_test"][flag_mismatch])] 

        if len(flag_mismatch) == 0:
            print("No mismatched values found")
            pass
        else:
           test_comparison.update({f"{param}_mismatched_flags":{
                    "time": results_expected['time'][flag_mismatch].values,
                    "expected flags": results_expected[f"{param}_qartod_climatology_test"][flag_mismatch].values,
                    "local test flags": local_test_results[param][flag_mismatch].values
                }
            })

Checking for mismatched QARTOD flags in sea_water_electrical_conductivity
No mismatched values found
Checking for mismatched QARTOD flags in sea_water_temperature
No mismatched values found
Checking for mismatched QARTOD flags in sea_water_practical_salinity
No mismatched values found
Checking for mismatched QARTOD flags in sea_water_pressure
No mismatched values found


In [22]:
test_comparison

{}

Next, we'll manually create datasets to hold the results of the comparison for each parameter

In [15]:
temperature_mismatch = xr.Dataset(data_vars=dict(expected_flags=(["time"], test_comparison['sea_water_temperature_mismatched_flags']['expected flags']),
                                                 local_test_flags=(["time"], test_comparison['sea_water_temperature_mismatched_flags']['local test flags'])
                                                 ),
                                    coords=dict(time=(["time"], test_comparison['sea_water_temperature_mismatched_flags']['time']))
                                )

KeyError: 'sea_water_temperature_mismatched_flags'

In [115]:
temperature_mismatch

In [122]:
import matplotlib.pyplot as plt
temperature_mismatch.plot
plt.show

ModuleNotFoundError: No module named 'matplotlib'

### Prepare CSV with statistics about QARTOD results

In [24]:
# Finally add statistics about the whole data record available for this sensor
# to-do: fix files so that they are arranged monotonically increasing along time
full_ds = xr.open_mfdataset(files)

ValueError: Resulting object does not have monotonic global indexes along dimension time

In [25]:
qartod_results = dp.qartod_results_summary(results_expected, parameters, "climatology")
qartod_results

{}

In [20]:
qartod_flag_stats = pd.DataFrame.from_dict(qartod_results, orient='index')
qartod_flag_stats

In [117]:
# save data frame with statistics to csv
csv_path = dp.build_data_path(refdes, method, stream, 'qartod-clim-stats', folder='processed', suffix='.csv')
qartod_flag_stats.to_csv(csv_path)

In [119]:
# save datasets of comparison results to .nc file
nc_path = dp.build_data_path(refdes, method, stream, 'qartod-clim-comparison', folder='processed', suffix='.nc')
temperature_mismatch.to_netcdf(nc_path, mode='a')