In [None]:
import portal
import glob
# import joblib
# from joblib import Parallel, delayed
import pcmsc
import pandas as pd
import xarray as xr
import numpy as np
%load_ext autoreload
%autoreload 2

In [None]:
doi = "P91T185R"
title = "Time series of carbonate system parameters in Eastern Gulf of Mexico near Tampa Bay, Florida, USA"
summary = """This data set contains time series measurements of carbonate system parameters including water temperature (°C), pressure (dbars), salinity, pHT (pH on the total scale), carbon dioxide (ppm), dissolved oxygen (milligrams/L), and photosynthetically active radiation (microEinsteins). These data were collected in the eastern Gulf of Mexico near Tampa Bay, Florida, at the University of South Florida Coastal Ocean and Monitoring Prediction System (COMPS) Buoy C12 by the U.S. Geological Survey (USGS) St. Petersburg Coastal and Marine Science Center using an autonomous instrument package called the Ocean Carbon System version 3 (OCSv3). The OCSv3 consists of four sensors integrated using a Sea-Bird Stor-X data logger including a Sea-Bird SeapHOx pH sensor, a Sea-Bird SBE 37-SMP-ODO MicroCAT C-T-ODO (P) Recorder, a Pro-Oceanus CO2-Pro CV sensor, and a Wetlabs Eco-PAR sensor. Each parameter was measured hourly over multiple twenty-four hour time periods, ranging from weeks to months."""

In [None]:
f = 'Gulf-of-Mexico-C12-OCSv3-Data-10252018-to-01292020/Gulf-of-Mexico-C12-OCSv3-Data-10252018-to-01292020.csv'
c = pcmsc.convert(f, doi, title, summary)
print(c)

In [None]:


print(ds)

ds.attrs['Conventions'] = "CF-1.6, ACDD-1.3"
for d in ds.coords:
    ds[d].encoding["_FillValue"] = None
# no _FillValue for lat, lon just to be safe
for d in ["latitude", "longitude"]:
    ds[d].encoding["_FillValue"] = None
# CF: Add axis attr
ds["time"].attrs["axis"] = "T"
ds["longitude"].attrs["axis"] = "X"
ds["latitude"].attrs["axis"] = "Y"
for var, qual in zip(["T(W) (C)", "PRESS (dbar)", "SALINITY",    "pHT",    "CO2(ppm)", "OXYGEN (mg/L)", "PAR (microEinsteins)"],
                     ["QF_T(W)",  "QF_PRESS",     "QF_SALINITY", "QF_pHT", "QF_CO2",   "QF_OXYGEN",     "QF_PAR"]):
    # only keep values with QAQC values of 1
    if ds[qual].dtype == int:
        ds[var][ds[qual] != 1] = np.nan
    elif ds[qual].dtype == object:
        goods = []
        for x in ds[qual].values:
            if '1' in x:
                goods.append(True)
            else:
                goods.append(False)
        goods = np.array(goods)
        ds[var][~goods] = np.nan
for var in ds.data_vars:
    if "QF_" in var:
        ds[var].attrs["long_name"] = "A numeric value that indicates the quality of the reported data"

ds["PRES(IRGA) (mbar)"].attrs["long_name"] = " pressure in the sampling chamber of the infrared gas analyzer of the CO2 sensor"
ds["PRES(IRGA) (mbar)"].attrs["units"] = "mbar"
ds["CO2(ppm)"].attrs["long_name"] = "concentration of oxygen in milligrams per liter of seawater"
ds["CO2(ppm)"].attrs["units"] = "mg/L"
ds["PAR (microEinsteins)"].attrs["long_name"] = "concentration of photosynthetically available radiation"
ds["PAR (microEinsteins)"].attrs["units"] = "microEinsteins"

ds = portal.assign_standard_names(ds)
for k in ds.data_vars:
    ds[k].attrs["coverage_content_type"] = "physicalMeasurement"
print(ds)

In [None]:
%%time
files = glob.glob(f'../pcmsc/doi-{doi}/*.nc')
n = 1
for f in files:
    print(n/len(files)*100, f)
    c = pcmsc.convert(f, doi, title, summary)
    print(c)
    n += 1

In [None]:
files = glob.glob(f'../pcmsc/doi-{doi}/clean/*.nc')
n = 1
for f in files[100:110]:
    print(n/len(files)*100, f)
    cc = portal.check_compliance_system(f, "cf:1.6")
    print(cc.stdout.decode('utf-8'))
#     cc = portal.check_compliance_system(f, "acdd")
#     print(cc.stdout.decode('utf-8'))
    n += 1

In [None]:
%%time
files = glob.glob('../pcmsc/doi-' + doi + '/*.nc')
# need to specify multiprocessing so we get print statements
Parallel(n_jobs=8, backend='multiprocessing')(delayed(convert)(f) for f in files)