In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import glob
from io import StringIO


# Local import 
# > Make sure SIO_wrap dir is on the same path as this script.
%load_ext autoreload
%autoreload 2
%reload_ext autoreload
from SIO_wrap import dir_tree, fnames

from setdir import *


/Users/eddifying/Python/drifters/ already exists
/Users/eddifying/Python/drifters/02-code/ already exists
/Users/eddifying/Python/drifters/02-code/SIO_wrap/ already exists
/Users/eddifying/Python/drifters/01-data/04-aux/ already exists
/Users/eddifying/Python/drifters/01-data/02-intermediate/ already exists


# Save drifter data (one netcdf per drifter) 

[National Centers for Environmental Information (NOAA/NCEI) NetCDF Trajectory Template version 2](https://www.nodc.noaa.gov/data/formats/netcdf/v2.0/#keyprinciples)


See example CDL here: https://www.nodc.noaa.gov/data/formats/netcdf/v2.0/trajectoryIncomplete.cdl


In [2]:
###################-----------   USER EDITS    ------------###################
url_strftime = '%Y-%m-%d'

latname = 'GPS_Latitude_deg'
lonname = 'GPS_Longitude_deg'
uvelname = 'uvel'
vvelname = 'vvel'
droguename = 'Drogue_cnts'
slpname = 'SLP_mB'

In [3]:
# Get the list of Platform IDs
PID = pd.read_csv(cat_proc_path('PID_list.txt'), header='infer', index_col=0)


In [4]:
# Try saving a single data file in the appropriate format
for i in range(len(PID)):
    # Get a single platform ID from the full list
    pid1 = (PID["PID"].values)[i].astype('str')
    PID1 = (PID["PID"].values)[i]


    fname = 'pid'+str(PID1)+'_*'
    hourly_files = glob.glob(cat_interim_path(fname))
    if len(hourly_files):
        hourly_files = sorted(hourly_files)
        # Find the most recent file (alphabetically, rather than by time)
        hourly_files = hourly_files[-1]

        ds_hourly = xr.open_dataset(hourly_files)

        # Assign dimensions of obs = <dim1>, trajectory = <dim2>
        time = ds_hourly.time.values
        lat = ds_hourly[latname].values
        lon = ds_hourly[lonname].values
        sst1 = ds_hourly.SST_degC.values
        uvel = ds_hourly[uvelname].values
        vvel = ds_hourly[vvelname].values
        slp = ds_hourly[slpname].values
        drogue = ds_hourly[droguename].values
        obs = np.linspace(1,len(time),num=len(time),dtype=int)


        # Reshape
        time = time[np.newaxis, :]
        lat = lat[np.newaxis, :]
        lon = lon[np.newaxis, :]
        sst1 = sst1[np.newaxis, :]
        uvel = uvel[np.newaxis, :]
        vvel = vvel[np.newaxis, :]
        slp = slp[np.newaxis, :]
        drogue = drogue[np.newaxis, :]
        #obs = obs[np.newaxis,:]


        # Coordinates where trajectory is the platform ID, and obs are the individual 
        # (hourly) observations
        mycoords = dict(
            trajectory=(["trajectory"], [PID1]),
            obs=(["obs"], obs),
        )

        # Data variables
        myvars = dict(
            time = (["trajectory", "obs"], time,
                    dict(long_name = "Type in datetime64[ns]",
                        )),
            lat = (["trajectory", "obs"], lat,
                   dict(long_name = "Latitude",
                        units = "Degrees north")),
            lon = (["trajectory", "obs"], lon,
                   dict(long_name = "Longitude",
                        units = "Degrees east")),
            SST = (["trajectory", "obs"], sst1, 
                   dict(long_name = 'sea_surface_temperature',
                       units = 'deg C',
                       _FillValue = -999)),
            u = (["trajectory", "obs"], uvel,
                 dict(long_name = 'eastward velocity',
                      units = 'm/s',
                      _FillValue = -999)),
            v = (["trajectory", "obs"], vvel,
                 dict(long_name = 'northward velocity',
                      units = 'm/s',
                      _FillValue = -999)),
            slp = (["trajectory", "obs"], slp,
                   dict(long_name = "Sea level pressure",
                        units = "mB",
                        _FillValue = 850)),
            drogue_cnts = (["trajectory", "obs"], drogue,
                      dict(long_name = 'Drogue counts',
                           units = 'counts')),
        )

        # Attributes
        myattrs = dict(
            ncei_template_version = "NCEI_NetCDF_Trajectory_Template_v2.0",
            featureType = "trajectory",
            instrument = 'Surface velocity profiler (SVP)',
            title = "TERIFIC drifters: Deployed in the subpolar North Atlantic between 2019-12-05 and 2021-10-01",
            keywords = "SVP drifters, SVPB drifters",
            Conventions = 'CF-1.6, ACDD-1.3',
            id = 'To be populated with a DOI',
            naming_authority = 'gov.noaa.ncei',
            processing_level = "Filtered and hourly-interpolated",
            acknowledgment = "Funded by the European Union, Horizon 2020: European Research Council Starting Grant",
            date_created = datetime.datetime.today().strftime(url_strftime),
            creator_name = 'Eleanor Frajka-Williams',
            creator_email = 'eleanor.frajka@noc.ac.uk',
            creator_url = 'http://eleanorfrajka.com',
            institution = "National Oceanography Centre, UK",
            project = "Targeted Experiment to Reconcile Increased Freshwater with Increased Convection (TERIFIC)",
            time_coverage_start = '2019-12-05',
            time_coverage_end = '2022-01-11',
            time_coverage_resolution = 'hourly',
            creator_type = 'person',
            platform = 'Surface Velocity Profiler',
            sea_name = 'Atlantic',
        )

        blank_new = xr.Dataset(data_vars=myvars, coords=mycoords, attrs=myattrs)

        if i==0:
            blank_full = blank_new
        else:
            blank_full = blank_full.combine_first(blank_new)



In [6]:
fname_full = 'TERIFIC_drifters.nc'

# Will overwrite any existing file!
blank_full.to_netcdf(cat_proc_path(fname_full), mode='w') 


In [7]:
blank_full

# Filesize too big for Github

I can reduce it by marking only thedate the drogue was lost?

I could also separate it into four (or five) deployments