### Import modules used in this notebook

In [None]:
# Import libraries
import os
import re
import gc
import io
import ast
import pandas as pd
import numpy as np
import xarray as xr
import warnings
warnings.filterwarnings("ignore")
import sys

In [None]:
# Import OOINet library
sys.path.append("c:\\Users\\cooleyky\\Documents\\GitHub\\OOINet") # this is what was missing from the steps I followed to install ooinet and ooi-data-explorations as local dev repo
from ooinet import M2M
from ooinet.Instrument.common import process_file

In [None]:
# Import functions from ooi-data-explorations library
sys.path.append("c:\\Users\\cooleyky\\Documents\\GitHub\\ooi-data-explorations\\python") # why did the initial install not include this?
from ooi_data_explorations.uncabled.process_dosta import dosta_datalogger
from ooi_data_explorations.combine_data import combine_datasets

In [None]:
# Import dask tools and ProgressBar
import dask
from dask.diagnostics import ProgressBar

### Define data parameters and routines

In [None]:
# Setup parameters needed to request data
refdes = "CP01CNSM-MFD37-03-CTDBPD000"              # Coastal Pioneer Array (NES) - Central Surface Mooring CTD Bottom-pumped
method = "recovered_inst"                           # non-decimated data from recovered instrument
stream = "ctdbp_cdef_instrument_recovered"          # name of data stream

In [None]:
# Generic preprocessing routine to do some generic dataset cleaning/processing
@dask.delayed
def preprocess(ds):
    ds = xr.open_dataset(ds)
    ds = process_file(ds)
    return ds

### QARTOD in Production: Request data from the THREDDS catalog

In [None]:
# Use the gold copy THREDDs datasets
thredds_url = M2M.get_thredds_url(refdes, method, stream, goldCopy=True)

# Get the THREDDs catalog
thredds_catalog = M2M.get_thredds_catalog(thredds_url)

In [None]:
# Clean the THREDDs catalog
sensor_files, ancillary_files = M2M.clean_catalog(thredds_catalog, stream) 
# removes entries from thredds_catalog if they do not match the stream, or are used in processing data from the selected stream

In [None]:
# Now build the url to access the data
sensor_files = [re.sub("catalog.html\?dataset=", M2M.URLS["goldCopy_dodsC"], file) for file in sensor_files]
# sensor_files

In [None]:
# preprocess the data
zs = [preprocess(file) for file in sensor_files]

In [None]:
# Load all the datasets
with ProgressBar():
    data = xr.concat([ds.chunk() for ds in dask.compute(*zs)], dim="time")

In [None]:
# Make a copy of the data with a unique name
ds_prod = data.copy()

### QARTOD in Development: Request data from dev1 server

In [None]:
# Sub in ooinet-dev1-west.intra.oceanobservatories.org into the avaialbe API urls
Dev01_urls = {}
for key in M2M.URLS:
    url = M2M.URLS.get(key)
    if "opendap" in url:
        dev1_url = re.sub("opendap", "opendap-dev1-west.intra", url)
    else:
        dev1_url = re.sub("ooinet","ooinet-dev1-west.intra", url)
    Dev01_urls[key] = dev1_url
    
# Use the gold copy THREDDs datasets
thredds_url = M2M.get_thredds_url(refdes, method, stream, goldCopy=True) # in this example we recycle refdes, method, stream

# Get the THREDDs catalog
thredds_catalog = M2M.get_thredds_catalog(thredds_url)

# Clean the THREDDs catalog
sensor_files, ancillary_files = M2M.clean_catalog(thredds_catalog, stream)

# Now build the url to access the data
sensor_files = [re.sub("catalog.html\?dataset=", M2M.URLS["goldCopy_dodsC"], file) for file in sensor_files]
zs = [preprocess(file) for file in sensor_files]

# Load all the datasets
with ProgressBar():
    data = xr.concat([ds.chunk() for ds in dask.compute(*zs)], dim="time")

In [None]:
# Make a copy of the data with a unique name
ds_dev = data.copy()

### Save datasets to interim data folder for further processing

In [None]:
# something along the lines of write_nc(), xr.save_dataset?
# call the data file datasets_for_testing.nc 
# I think we will test datasets in production and development separately, so maybe making separate files for those types