In [None]:
#| default_exp nc_template

# MARIS NetCDF Template
> Creation of MARIS NetCDF template based on "pseudo" Common Data Language `.toml` config file.

In [None]:
#| hide
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
#| export
from typing import Dict, Union, Callable
from copy import deepcopy
import re

import netCDF4
from netCDF4 import Dataset
import numpy as np
import pandas as pd
from pathlib import Path
# from fastcore.basics import patch, store_attr
import fastcore.all as fc
from fastcore.basics import patch

from marisco.utils import read_toml
from marisco.configs import name2grp, get_cfgs, get_enum_dicts

## NetCDF template generator

Generate a NetCDF4 template from the configurable [`CDL.toml`](https://github.com/franckalbinet/marisco/blob/main/nbs/files/cdl.toml) file, itself generated in [`/api/configs.ipynb`](https://github.com/franckalbinet/marisco/blob/main/nbs/api/configs.ipynb).

In [None]:
#| export
class NCTemplater:
    "MARIS NetCDF template generator."
    def __init__(self, 
                 cdl_fname:Dict, # File name and path of the "Pseudo CDL" (`.toml`)
                 nuclide_vars_fname:str, # File name and path of MARIS nuclide lookup table containing variable names
                 tpl_fname:str, # File name and path of NetCDF4 file to be generated
                 enum_dicts:Dict # MARIS NetCDF enumeration types
                ):
        fc.store_attr()
        self.cdl = read_toml(cdl_fname)
        self.dim = self.cdl['dim']
        self.enum_types = {}

For example, provided the configuration `cdl.toml` below, the templater gets access, among others, to its `dim` definiton section:

In [None]:
templater = NCTemplater(cdl_fname='./files/cdl.toml',
                        nuclide_vars_fname='./files/lut/dbo_nuclide.xlsx', 
                        tpl_fname='./files/nc/test.nc',
                        enum_dicts=get_enum_dicts())

expected = {'name': 'sample', 
            'dtype': 'u8', 
            'attrs': {'long_name': 'Sample ID of measurement'}
            }

fc.test_eq(templater.dim, expected)

In [None]:
#| export
@patch
def nuclide_vars(
    self:NCTemplater,
    col_varnames:str='nc_name', # Column name in the Excel lookup file containing the NetCDF variable names
    col_stdnames:str='nusymbol', # Column name Excel lookup file containing the NetCDF standard names
    dtype:str='f4', # Default data type
    ) -> list[dict]: # List of nuclide variables (including their names and attributes)
    "Return the name of the radionuclide variables analysed."
    df = pd.read_excel(self.nuclide_vars_fname, index_col=0)
    df = df[df.nuclide != 'NOT AVAILABLE']
    return [
        {
            'name': n,
            'dtype': dtype,
            'attrs': {
                'long_name': f"{nuclide.capitalize()} {massnb}",
                'standard_name': sn,
            }
        }
        for n, nuclide, massnb, sn in zip(
            df[col_varnames],
            df['nuclide'].str.capitalize(),
            df['massnb'].astype(str),
            df[col_stdnames],
        )
    ]

For example, to retrieve the NetCDF nuclide names and associated attributes:

In [None]:
templater = NCTemplater(cdl_fname='./files/cdl.toml',
                        nuclide_vars_fname='./files/lut/dbo_nuclide.xlsx', 
                        tpl_fname='./files/nc/test.nc',
                        enum_dicts=get_enum_dicts())
expected = [
  {'name': 'h3', 'attrs': {'long_name': 'Tritium 3', 'standard_name': '3H'}, 'dtype': 'f4'},
  {'name': 'be7', 'attrs': {'long_name': 'Beryllium 7', 'standard_name': '7Be'}, 'dtype': 'f4'}
  ]

fc.test_eq(templater.nuclide_vars()[:2], expected)

In [None]:
#| export
@patch
def derive(
    self:NCTemplater,
    nuclide:dict, # Nuclide variable name and associated netcdf attributes
    suffix:dict,  # Naming rules as described in CDL (e.g `_unc`)
) -> dict: # Derived variable name and associated attributes
    "Derive NetCDF nuclide-dependent variable names & attributes as defined in CDL." 
    return {
        'name': nuclide['name'] + suffix['name'],
        'dtype': suffix['dtype'],  # Using dtype from suffix
        'attrs': {key: nuclide['attrs'][key] + suffix['attrs'][key] for key in nuclide['attrs']}
        }

For example, among others, the `cdl.toml` file defines the naming convention on variable names deriving from nuclides (e.g `h3_unc` for measurement uncertainty on the `h3` nuclide variable).

In [None]:
templater = NCTemplater(cdl_fname='./files/cdl.toml',
                        nuclide_vars_fname='./files/lut/dbo_nuclide.xlsx', 
                        tpl_fname='./files/nc/test.nc',
                        enum_dicts=get_enum_dicts())

Here is below the defined Tritium NetCDF variable as specified in the `.cdl` file:

In [None]:
templater.nuclide_vars()[0]

{'name': 'h3',
 'dtype': 'f4',
 'attrs': {'long_name': 'Tritium 3', 'standard_name': '3H'}}

In [None]:
# Example of suffix defined in the .cdl file
suffix = {
    'name': '_unc',
    'attrs': {
        'long_name': ' uncertainty',
        'standard_name': '_uncertainty'
        },
    'dtype': 'f4'
    }

# And what we expect
expected = {
    'name': 'h3_unc',
    'attrs': {
        'long_name': 'Tritium 3 uncertainty',
        'standard_name': '3H_uncertainty'
        },
    'dtype': 'f4'
    }

fc.test_eq(templater.derive(templater.nuclide_vars()[0], suffix=suffix), expected)

In [None]:
#| export
@fc.patch
def create_enum_types(self:NCTemplater):
    "Create enumeration types"
    for name, enum in self.enum_dicts.items(): 
        self.enum_types[name] = self.nc.createEnumType(np.uint16, name, enum)

In [None]:
#| export
@fc.patch
def create_groups(self:NCTemplater):
    "Create NetCDF groups"
    grp_names = [v['name'] for k, v in self.cdl['grps'].items()]
    for grp_name in grp_names:
        grp = self.nc.createGroup(grp_name)
        self.create_variables(grp)

In [None]:
#| export
@fc.patch
def create_variables(self:NCTemplater, 
                     grp:netCDF4.Group, # NetCDF group
                     ):
        "Create variables"
        self.create_variable(grp, self.dim) # Dimension variable
        self.create_default_variables(grp)
        self.create_group_specific_variables(grp)
        self.create_analyte_variables(grp)

In [None]:
#| export
@fc.patch
def create_default_variables(self:NCTemplater, 
                             grp:netCDF4.Group, # NetCDF group
                             ):
        "Create Default variables"
        vars = self.cdl['vars']['defaults'].values()
        for var in vars: self.create_variable(grp, var)

In [None]:
#| export
@fc.patch
def create_group_specific_variables(self:NCTemplater, 
                             grp:netCDF4.Group, # NetCDF group
                             ):
        "Create group specific variables"
        cfg = self.cdl['vars']
        for var in cfg.get(name2grp(grp.name), {}).values(): 
            self.create_variable(grp, var)

In [None]:
 #| export
@fc.patch
def create_analyte_variables(self:NCTemplater, 
                             grp:netCDF4.Group, # NetCDF group
                             ):
    "Create analyte variables and dependent one as uncertainty, detection limit, ..."    
    for var in self.nuclide_vars():
        self.create_variable(grp, var)
        for v in self.cdl['vars']['suffixes'].values(): 
            self.create_variable(grp, self.derive(var, v))

In [None]:
#| export
@fc.patch
def create_variable(self:NCTemplater, 
            grp:netCDF4.Group, # NetCDF group
            var:Dict, # Variable specificiation dict with `name`, `dtype` and `attrs` keys
           ):
    "Create NetCDF variable with proper types (standard and enums)"
    name, dtype, attrs = var.values()
    nc_var = grp.createVariable(name, 
                                self.enum_types.get(dtype) or dtype, 
                                self.dim['name'])
    nc_var.setncatts(attrs) 

In [None]:
#| export
@fc.patch
def generate(self:NCTemplater):
    "Generate CDL"
    with Dataset(self.tpl_fname, 'w', format='NETCDF4') as self.nc:
        self.nc.setncatts(self.cdl['global_attrs']) 
        self.create_enum_types()
        self.nc.createDimension(self.dim['name'], None) 
        self.create_groups()

So in summary, to produce a template MARIS NetCDF:

In [None]:
cdl_fname_test = './files/cdl.toml'
nuclide_vars_fname_test = './files/lut/dbo_nuclide.xlsx'
tpl_fname_test = './files/nc/test.nc'

templater = NCTemplater(cdl_fname=cdl_fname_test,
                        nuclide_vars_fname=nuclide_vars_fname_test, 
                        tpl_fname=tpl_fname_test,
                        enum_dicts=get_enum_dicts(lut_src_dir='./files/lut', cdl_name=cdl_fname_test))

templater.generate()