In [None]:
#| default_exp nc_template

# MARIS NetCDF Template
> Creation of MARIS NetCDF template based on "pseudo" Common Data Language `.toml` config file

In [None]:
#| export
from netCDF4 import Dataset
import pandas as pd
from pathlib import Path
from fastcore.basics import patch, store_attr
from fastcore.test import *
from typing import Dict

from marisco.utils import read_toml
from marisco.configs import BASE_PATH

In [None]:
#| export
class NCTemplate:
    "MARIS NetCDF templater"
    def __init__(self, 
                 tpl_fname:str, # CDL file name
                 vars_fname:str, # File name and path of MARIS nuclide look up table
                 dest_dir:str, # Destination directory for generated NetCDF template files
                 cdl:Dict, # Pseudo CDL (`.toml`)
                ):
        store_attr()
        self.dim = self.cdl['dim']

In [None]:
cdl = read_toml(Path('./files') / 'cdl.toml')
nc_tpl = NCTemplate('test.nc',
                     vars_fname='./files/lut/dbo_nuclide.xlsx', 
                     dest_dir='./files/nc',
                     cdl=cdl)

In [None]:
expected = {'name': 'sample', 'long_name': 'Sample ID of measurement'}
test_eq(nc_tpl.dim, expected)

In [None]:
#| export
@patch
def get_analytes(self:NCTemplate,
                 col_varnames:str='nc_name', # Column name containing the NetCDF variable names
                 col_stdnames:str='nusymbol', # Column name containing the NetCDF standard names
                ):
    "Return the name of the variables analysed"
    df = pd.read_excel(self.vars_fname, index_col=0)
    df = df[df.nuclide != 'NOT AVAILABLE']
    var_names = df[col_varnames].tolist()
    std_names = df[col_stdnames].tolist()
    long_names = df[['nuclide', 'massnb']].apply(lambda row: ' '.join(row.values.astype(str)), 
                                                 axis=1).tolist()
    long_names = [name.capitalize() for name in long_names]

    return [{'name': n, 
             'long_name': ln,
             'standard_name': sn
            } for n, ln, sn in zip(*(var_names, long_names, std_names))]

In [None]:
nc_tpl.get_analytes()[:5]

[{'name': 'h3', 'long_name': 'Tritium 3', 'standard_name': '3H'},
 {'name': 'be7', 'long_name': 'Beryllium 7', 'standard_name': '7Be'},
 {'name': 'c14', 'long_name': 'Carbon 14', 'standard_name': '14C'},
 {'name': 'k40', 'long_name': 'Potassium 40', 'standard_name': '40K'},
 {'name': 'cr51', 'long_name': 'Chromium 51', 'standard_name': '51Cr'}]

In [None]:
#| export
def derive(
    analyte:dict, # Analyte/nuclide/var name and associated netcdf attributes
    suffix:dict,  # Naming rules as described in CDL
):
    "Derive NetCDf var name & attributes as defined in CDL" 
    derived = analyte.copy()
    for k, v in suffix.items():
        derived[k] += v
    return derived

Example:

In [None]:
analyte = nc_tpl.get_analytes()[0]; analyte

{'name': 'h3', 'long_name': 'Tritium 3', 'standard_name': '3H'}

In [None]:
analyte['units'] = cdl['placeholder']; analyte

{'name': 'h3',
 'long_name': 'Tritium 3',
 'standard_name': '3H',
 'units': '_to_be_filled_in_'}

In [None]:
suffix = cdl['vars']['suffixes']['uncertainty']; suffix

{'name': '_unc', 'long_name': ' uncertainty', 'standard_name': '_uncertainty'}

In [None]:
expected = {'name': 'h3_unc', 'long_name': 'Tritium 3 uncertainty', 
            'standard_name': '3H_uncertainty', 'units': '_to_be_filled_in_'}

test_eq(derive(analyte, suffix), expected)

In [None]:
#| export
@patch
def create_variable(self:NCTemplate, 
               nc, # NetCDF file
               var:Dict, # Variable
               dtype:str='f4', # Type of the variable
           ):
    name = var['name']
    attrs = {k:v for k, v in var.items() if k != name}
    nc_var = nc.createVariable(name, dtype, self.dim['name'])
    nc_var.setncatts(attrs)    
    return nc

In [None]:
# Example of use
with Dataset('files/nc/test.nc', 'w', format='NETCDF4') as nc:
    nc.createDimension(nc_tpl.dim['name'], None)
    nc_tpl.create_variable(nc, cdl['vars']['defaults']['lon'])
    print(nc.variables['lon'])

<class 'netCDF4._netCDF4.Variable'>
float32 lon(sample)
    name: lon
    long_name: Measurement longitude
    standard_name: longitude
    units: degrees_north
    axis: Y
    _CoordinateAxisType: Lon
unlimited dimensions: sample
current shape = (0,)
filling on, default _FillValue of 9.969209968386869e+36 used


In [None]:
#| export
@patch
def generate(self:NCTemplate,
             common_vars:list=['lon', 'lat', 'depth', 'time'], # Common variables
            ):
    "Generate CDL"
    fname = Path(self.dest_dir)/self.tpl_fname
    
    common_vars = self.cdl['vars']['defaults'].keys()
    
    with Dataset(fname, 'w', format='NETCDF4') as nc:
        # Create dataset attributes
        nc.setncatts(self.cdl['global_attrs']) 
        
        # Create shared `sample` dimension
        nc.createDimension(self.dim['name'], None)
        
        # Create grps
        grp_names = [v['name'] for k, v in self.cdl['grps'].items()]
        for grp_name in grp_names:
            grp = nc.createGroup(grp_name)

            # Create 'dim' variable
            self.create_variable(grp, self.dim, 'i4')
            
            # Create default variables
            for var in self.cdl['vars']['defaults'].values(): 
                self.create_variable(grp, var)

            # Create analyte variables
            for analyte in self.get_analytes():
                analyte['units'] = self.cdl['placeholder']
                self.create_variable(grp, analyte)
            
                # Derived uncertainty and detection limit variables
                for k, v in self.cdl['vars']['suffixes'].items():
                    self.create_variable(grp, derive(analyte, v))
                #for related_var in ['uncertainty', 'detection_limit']:
                #    cfg = self.cfgs[related_var]
                #    attrs['long_name'] += cfg['long_name']
                #    attrs['standard_name'] += cfg['standard_name']
                #    self.create_variable(grp, analyte['name'] + cfg['var_suffix'], attrs)

In [None]:
# So in summary, to produce a template MARIS NetCDF
nc_tpl = NCTemplate('test.nc',
               vars_fname='./files/lut/dbo_nuclide.xlsx', 
               dest_dir='./files/nc',
               cdl=cdl)

nc_tpl.generate()