# 07 - Generate statistics CSVs
Creating a separate notebook to calculate the statistics all in one pass so that I can complete a representative set of CSVs for each instrument class.

### Statistics for QARTOD tests in production

In [1]:
# Import libraries available from main conda channels or conda-forge
import xarray as xr
import pandas as pd
import numpy as np
import glob
import os
import re
import warnings
warnings.filterwarnings("ignore")

# Import dask tools and ProgressBar
import dask
from dask.diagnostics import ProgressBar

# Import qartod_testing project functions
from qartod_testing.qc_flag_statistics import ooinet_gold_copy_request, \
    get_test_parameters, parse_qartod_executed, qartod_summary_expanded, \
    get_deployment_ds, collect_statistics

# Import OOI library functions
from ooi_data_explorations.common import merge_frames
from ooinet.M2M import get_deployments, get_annotations

In [2]:
# Setup parameters needed to request data
refdes = "GS01SUMO-RID16-05-PCO2WB000"
method = "recovered_host"
stream = "pco2w_abc_dcl_instrument_recovered"

# Site, node, and sensor info from deconstructed reference designator
# [site, node, sensor] = refdes.split('-', 2)

In [3]:
# Routine in data_processing module from this project to download the gold copy THREDDs datasets
# Variable 'files' contains list of catalog URLs for downloaded datasets 
files = ooinet_gold_copy_request(refdes, method, stream)

Downloading and Processing Data Files: 100%|██████████| 3/3 [00:02<00:00,  1.02it/s]


In [4]:
# Load expected results data from external data folder
folder_path = os.path.join(os.path.abspath('../data/external'), method, stream, refdes)
expected_files = glob.glob(folder_path+'/*.nc')
expected_files.sort() # sorts local test files in alphanumeric order

In [5]:
expected_files
# get_deployments(refdes)

['/home/jovyan/code/qartod_testing/data/external/recovered_host/pco2w_abc_dcl_instrument_recovered/GS01SUMO-RID16-05-PCO2WB000/deployment0001_GS01SUMO-RID16-05-PCO2WB000-recovered_host-pco2w_abc_dcl_instrument_recovered_20150218T220513.709000-20151227T100737.299000.nc',
 '/home/jovyan/code/qartod_testing/data/external/recovered_host/pco2w_abc_dcl_instrument_recovered/GS01SUMO-RID16-05-PCO2WB000/deployment0002_GS01SUMO-RID16-05-PCO2WB000-recovered_host-pco2w_abc_dcl_instrument_recovered_20151214T220510.207000-20161126T080546.684000.nc',
 '/home/jovyan/code/qartod_testing/data/external/recovered_host/pco2w_abc_dcl_instrument_recovered/GS01SUMO-RID16-05-PCO2WB000/deployment0004_GS01SUMO-RID16-05-PCO2WB000-recovered_host-pco2w_abc_dcl_instrument_recovered_20181204T180004.474000-20200120T080531.209000.nc']

In [6]:
get_annotations(refdes)

Unnamed: 0,@class,id,subsite,node,sensor,method,stream,beginDT,endDT,annotation,exclusionFlag,source,qcFlag,parameters
0,.AnnotationRecord,630,GS01SUMO,RID16,05-PCO2WB000,,,1424293560000,1451215200000,Deployment 1: Recovered_instrument data not av...,False,lgarzio@marine.rutgers.edu,0,[]
1,.AnnotationRecord,298,GS01SUMO,RID16,05-PCO2WB000,telemetered,pco2w_abc_dcl_instrument,1424293560000,1547078400000,"* UPDATED, 2019-01-25: After careful review, t...",False,friedrich.knuth@rutgers.edu,0,[931]
2,.AnnotationRecord,299,GS01SUMO,RID16,05-PCO2WB000,recovered_host,pco2w_abc_dcl_instrument_recovered,1424293560000,1547078400000,"* UPDATED, 2019-01-25: After careful review, t...",False,friedrich.knuth@rutgers.edu,0,[931]
3,.AnnotationRecord,834,GS01SUMO,,,,,1512777600000,1544330520000,Deployment 3: *UPDATED 2020-04-28: This moorin...,False,lgarzio@marine.rutgers.edu,0,[]
4,.AnnotationRecord,2157,GS01SUMO,RID16,05-PCO2WB000,,,1514678400000,1544330520000,Deployment 3: The PCO2WB000 is known to have c...,False,cdobson@whoi.edu,9,[]
5,.AnnotationRecord,1592,GS01SUMO,RID16,05-PCO2WB000,,,1552608000000,1579514460000,Deployment 4: Large data gap between 3/15/2019...,False,cdobson@whoi.edu,0,[]
6,.AnnotationRecord,1577,GS01SUMO,RID16,,,,1549216800000,1549886400000,UPDATED 2020-06-26: This data gap will also be...,False,cdobson@whoi.edu,0,[]


#### Gross range test statistics

In [7]:
gross_range_stats = collect_statistics(expected_files, "gross_range")
gross_range_stats

Evaluating statistics on QARTOD flags for deployment             01
Evaluating statistics on QARTOD flags for deployment             02
Evaluating statistics on QARTOD flags for deployment             04
Evaluating statistics on QARTOD flags for all deployments


Unnamed: 0_level_0,pco2_seawater total,seawater good,seawater good %,seawater suspect,seawater suspect %,seawater fail,seawater fail %
deployment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
01,1407,3,0.21,3,0.21,0,0.0
02,4039,4039,100.0,0,0.0,0,0.0
04,3921,3919,99.95,1,0.03,0,0.0
all,9367,7961,84.99,4,0.04,0,0.0


In [9]:
# Save data frames as CSVs
folder_path = os.path.join(os.path.abspath('../data/processed'), method, stream, refdes)
os.makedirs(folder_path, exist_ok=True)
gross_range_stats.to_csv(folder_path+f"/gross_range-{refdes}-flag_statistics.csv", na_rep='NaN', mode='w')

#### Climatology test statistics

In [10]:
climatology_stats = collect_statistics(expected_files, "climatology")
climatology_stats

Evaluating statistics on QARTOD flags for deployment 01
Evaluating statistics on QARTOD flags for deployment 02
Evaluating statistics on QARTOD flags for deployment 04
Evaluating statistics on QARTOD flags for all deployments


Unnamed: 0_level_0,pco2_seawater total,seawater good,seawater good %,seawater suspect,seawater suspect %,seawater fail,seawater fail %
deployment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
01,1407,2,0.14,138,9.81,0,0.0
02,4039,4039,100.0,0,0.0,0,0.0
04,3921,3914,99.82,7,0.18,0,0.0
all,9367,7955,84.93,145,1.55,0,0.0


In [11]:
# Save data frames as CSVs
folder_path = os.path.join(os.path.abspath('../data/processed'), method, stream, refdes)
os.makedirs(folder_path, exist_ok=True)
climatology_stats.to_csv(folder_path+f"/climatology-{refdes}-flag_statistics.csv", na_rep='NaN', mode='w')