In [None]:
#| default_exp nc_template

# MARIS NetCDF Template
> Creation of MARIS NetCDF template based on "pseudo" Common Data Language `.toml` config file

In [None]:
#| export
from netCDF4 import Dataset
import pandas as pd
from pathlib import Path
from fastcore.basics import patch, store_attr
from typing import Dict

In [None]:
#| export
CDL = {
    'global_attr': {
        'description': 'Template description',
        'summary': 'Template summary',
        'keyword': 'MARIS other-key-words',
        'license': 'Common ...'
    },
    'grps': ['seawater', 'biota', 'sediment', 'suspended-matter'],
    'var_attrs': {
        'sample': {
            'long_name': 'Sample ID of measurement'
        },
        'depth': {
            'long_name': 'Depth below seal level',
            'standard_name': 'depth_below_sea_floor',
            'units': 'm',
            'axis': 'Z'},
        'lon': {
            'long_name': 'Measurement longitude',
            'standard_name': 'longitude',
            'units': 'degrees_north',
            'axis': 'Y',
            '_CoordinateAxisType': 'Lon'
        },
        'lat': {
            'long_name': 'Measurement latitude',
            'standard_name': 'latitude',
            'units': 'degrees_east',
            'axis': 'X',
            '_CoordinateAxisType': 'Lat'
        },
        'time': {
            'long_name': 'Time of measurement',
            'standard_name': 'time',
            'units': 'seconds since 1970-01-01 00:00:00.0',
            'time_origin': '1970-01-01 00:00:00',
            'time_zone': 'UTC',
            'abbreviation': 'Date/Time',
            'axis': 'T',
            'calendar': 'gregorian'
        }
    },
    'placeholder': '_to_be_filled_in_',
    'uncertainty': {
        'long_name': ' uncertainty',
        'standard_name': '_uncertainty',
        'var_suffix': '_unc'
    },
    'detection_limit': {
        'long_name': ' detection limit',
        'standard_name': '_detection_limit',
        'var_suffix': '_dl'
    }
}

In [None]:
tpl_fname = 'maris-template.nc'

In [None]:
#| export
class NCTemplate:
    def __init__(self, 
                 tpl_fname:str, # CDL file name
                 vars_fname:str, # File name and path of MARIS nuclide look up table
                 dest_dir:str, # Destination directory for generated NetCDF template files
                 cfgs:Dict, # Pseudo CDL (`.toml`)
                ):
        store_attr()

In [None]:
#| export
@patch
def get_analytes(self:NCTemplate,
                 col_varnames:str='nc_name', # Column name containing the NetCDF variable names
                 col_stdnames:str='nusymbol', # Column name containing the NetCDF standard names
                ):
    "Return the name of the variables analysed"
    df = pd.read_excel(self.vars_fname, index_col=0)
    df = df[df.nuclide != 'NOT AVAILABLE']
    var_names = df[col_varnames].tolist()
    std_names = df[col_stdnames].tolist()
    long_names = df[['nuclide', 'massnb']].apply(lambda row: ' '.join(row.values.astype(str)), 
                                                 axis=1).tolist()
    long_names = [name.capitalize() for name in long_names]

    return [{'name': n, 
             'attrs': {
                 'long_name': ln,
                 'standard_name': sn}} 
            for n, ln, sn in zip(*(var_names, long_names, std_names))]

In [None]:
#| export
@patch
def create_variable(self:NCTemplate, 
               nc, # NetCDF file
               name:str, # Name of the variable
               attrs:dict, # Variable attributes
               dtype:str='f4', # Type of the variable
               dim:tuple=('sample',) # Dimension

           ):
    nc_var = nc.createVariable(name, dtype, dim)
    nc_var.setncatts(attrs)    
    return nc

In [None]:
#| export
@patch
def generate(self:NCTemplate,
            common_vars:list=['lon', 'lat', 'depth', 'time'], # Common variables
            ):
    "Generate CDL"
    fname = Path(self.dest_dir)/self.tpl_fname
    with Dataset(fname, 'w', format='NETCDF4') as nc:
        # Create dataset attributes
        nc.setncatts(self.cfgs['global_attr']) 
        
        # Create shared `sample` dimension
        nc.createDimension('sample', None)

        # Create grps
        for grp_name in self.cfgs['grps']:
            grp = nc.createGroup(grp_name)
            # Create common variables
            self.create_variable(grp, 'sample', self.cfgs['var_attrs']['sample'], 'i4')
            for name in common_vars: self.create_variable(grp, name, self.cfgs['var_attrs'][name])

            # Create analyte variables
            for analyte in self.get_analytes():
                attrs = analyte['attrs']
                attrs['units'] = self.cfgs['placeholder']

                self.create_variable(grp, analyte['name'], attrs)

                # Related uncertainty and detection limit
                for related_var in ['uncertainty', 'detection_limit']:
                    cfg = self.cfgs[related_var]
                    attrs['long_name'] += cfg['long_name']
                    attrs['standard_name'] += cfg['standard_name']
                    self.create_variable(grp, analyte['name'] + cfg['var_suffix'], attrs)

In [None]:
#|eval: false
nc_tpl = NCTemplate(tpl_fname,
               vars_fname='./files/lut/dbo_nuclide.xlsx', 
               dest_dir='./files/nc',
               cfgs=CDL)

nc_tpl.generate()