In [None]:
#| default_exp handlers.helcom

# HELCOM
> Data pipeline (handler) to convert HELCOM data ([source](https://helcom.fi/about-us)) to `NetCDF` format

The data is provided as a Microsoft Access database. `Mdbtools` (https://github.com/mdbtools/mdbtools) is used to convert tables into `.csv` files on Unix-like OS.

## Packages import

In [None]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
#| export
import pandas as pd
from tqdm import tqdm
from functools import partial
import fastcore.all as fc

from pathlib import Path

from marisco.utils import (has_valid_varname, match_worms, 
                           match_maris_species, match_maris_sediment)
from marisco.callbacks import (Callback, Transformer,
                               EncodeTimeCB, SanitizeLonLatCB)

from marisco.metadata import (GlobAttrsFeeder, BboxCB,
                              DepthRangeCB, TimeRangeCB,
                              ZoteroCB, KeyValuePairCB)

from marisco.configs import base_path, nc_tpl_path, cfg, cache_path
from marisco.serializers import NetCDFEncoder

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
fname_in = '../../_data/accdb/mors/csv'
fname_out = '../../_data/output/helcom.nc'

## Utils

In [None]:
#| export
def load_data(src_dir,
                smp_types=['SEA', 'SED', 'BIO']):
    "Load HELCOM data and return them as individual dataframe by sample type"
    dfs = {}
    lut_smp_type = {'SEA': 'seawater', 'SED': 'sediment', 'BIO': 'biota'}
    for smp_type in smp_types:
        fname_meas = smp_type + '02.csv'
        fname_smp = smp_type + '01.csv'
        df = pd.merge(pd.read_csv(Path(src_dir)/fname_meas),  # measurements
                      pd.read_csv(Path(src_dir)/fname_smp),  # sample
                      on='KEY', how='left')
        dfs[lut_smp_type[smp_type]] = df
    return dfs


def rename_cols(cols):
    "Flatten multiindex columns"
    new_cols = []
    for outer, inner in cols:
        if not inner:
            new_cols.append(outer)
        else:
            if outer == 'unc':
                new_cols.append(inner + '_' + outer)
            if outer == 'value':
                new_cols.append(inner)
    return new_cols

## Load tables

In [None]:
dfs = load_data(fname_in)

In [None]:
dfs['seawater'].head()

Unnamed: 0,KEY,NUCLIDE,METHOD,< VALUE_Bq/m³,VALUE_Bq/m³,ERROR%_m³,DATE_OF_ENTRY_x,COUNTRY,LABORATORY,SEQUENCE,...,LONGITUDE (ddmmmm),LONGITUDE (dddddd),TDEPTH,SDEPTH,SALIN,TTEMP,FILT,MORS_SUBBASIN,HELCOM_SUBBASIN,DATE_OF_ENTRY_y
0,WKRIL2012003,CS137,,,5.3,32.0,08/20/14 00:00:00,90,KRIL,2012003,...,29.2,29.3333,,0.0,,,,11,11,08/20/14 00:00:00
1,WKRIL2012004,CS137,,,19.9,20.0,08/20/14 00:00:00,90,KRIL,2012004,...,29.2,29.3333,,29.0,,,,11,11,08/20/14 00:00:00
2,WKRIL2012005,CS137,,,25.5,20.0,08/20/14 00:00:00,90,KRIL,2012005,...,23.09,23.15,,0.0,,,,11,3,08/20/14 00:00:00
3,WKRIL2012006,CS137,,,17.0,29.0,08/20/14 00:00:00,90,KRIL,2012006,...,27.59,27.9833,,0.0,,,,11,11,08/20/14 00:00:00
4,WKRIL2012007,CS137,,,22.2,18.0,08/20/14 00:00:00,90,KRIL,2012007,...,27.59,27.9833,,39.0,,,,11,11,08/20/14 00:00:00


In [None]:
dfs['biota'].head()

Unnamed: 0,KEY,NUCLIDE,METHOD,< VALUE_Bq/kg,VALUE_Bq/kg,BASIS,ERROR%,NUMBER,DATE_OF_ENTRY_x,COUNTRY,...,BIOTATYPE,TISSUE,NO,LENGTH,WEIGHT,DW%,LOI%,MORS_SUBBASIN,HELCOM_SUBBASIN,DATE_OF_ENTRY_y
0,BVTIG2012041,CS134,VTIG01,<,0.01014,W,,,02/27/14 00:00:00,6.0,...,F,5,16.0,45.7,948.0,18.453,92.9,2,16,02/27/14 00:00:00
1,BVTIG2012041,K40,VTIG01,,135.3,W,3.57,,02/27/14 00:00:00,6.0,...,F,5,16.0,45.7,948.0,18.453,92.9,2,16,02/27/14 00:00:00
2,BVTIG2012041,CO60,VTIG01,<,0.01398,W,,,02/27/14 00:00:00,6.0,...,F,5,16.0,45.7,948.0,18.453,92.9,2,16,02/27/14 00:00:00
3,BVTIG2012041,CS137,VTIG01,,4.338,W,3.48,,02/27/14 00:00:00,6.0,...,F,5,16.0,45.7,948.0,18.453,92.9,2,16,02/27/14 00:00:00
4,BVTIG2012040,CS134,VTIG01,<,0.009614,W,,,02/27/14 00:00:00,6.0,...,F,5,17.0,45.9,964.0,18.458,92.9,2,16,02/27/14 00:00:00


In [None]:
dfs['sediment'].head()

Unnamed: 0,KEY,NUCLIDE,METHOD,< VALUE_Bq/kg,VALUE_Bq/kg,ERROR%_kg,< VALUE_Bq/m²,VALUE_Bq/m²,ERROR%_m²,DATE_OF_ENTRY_x,...,LOWSLI,AREA,SEDI,OXIC,DW%,LOI%,MORS_SUBBASIN,HELCOM_SUBBASIN,SUM_LINK,DATE_OF_ENTRY_y
0,SKRIL2012048,RA226,,,35.0,26.0,,,,08/20/14 00:00:00,...,20.0,0.006,,,,,11.0,11.0,,08/20/14 00:00:00
1,SKRIL2012049,RA226,,,36.0,22.0,,,,08/20/14 00:00:00,...,27.0,0.006,,,,,11.0,11.0,,08/20/14 00:00:00
2,SKRIL2012050,RA226,,,38.0,24.0,,,,08/20/14 00:00:00,...,2.0,0.006,,,,,11.0,11.0,,08/20/14 00:00:00
3,SKRIL2012051,RA226,,,36.0,25.0,,,,08/20/14 00:00:00,...,4.0,0.006,,,,,11.0,11.0,,08/20/14 00:00:00
4,SKRIL2012052,RA226,,,30.0,23.0,,,,08/20/14 00:00:00,...,6.0,0.006,,,,,11.0,11.0,,08/20/14 00:00:00


## Data transformation pipeline

### Normalize nuclide names

#### Lower & strip

In [None]:
#| export
class LowerStripRdnNameCB(Callback):
    "Convert nuclide names to lowercase & strip any trailing space(s)"

    def __call__(self, tfm):
        for k in tfm.dfs.keys():
            tfm.dfs[k]['NUCLIDE'] = tfm.dfs[k]['NUCLIDE'].apply(
                lambda x: x.lower().strip())

In [None]:
dfs = load_data(fname_in)
tfm = Transformer(dfs, cbs=[LowerStripRdnNameCB()])
print(tfm()['seawater']['NUCLIDE'].unique())

['cs137' 'sr90' 'h3' 'cs134' 'pu238' 'pu239240' 'am241' 'cm242' 'cm244'
 'tc99' 'k40' 'ru103' 'sr89' 'sb125' 'nb95' 'ru106' 'zr95' 'ag110m'
 'cm243244' 'ba140' 'ce144' 'u234' 'u238' 'co60' 'pu239' 'pb210' 'po210'
 'np237' 'pu240' 'mn54']


#### Remap to MARIS nuclide names 

In [None]:
#| export
def get_unique_nuclides(dfs):
    "Get list of unique radionuclide types measured across samples."
    nuclides = []
    for k in dfs.keys():
        nuclides += dfs[k]['NUCLIDE'].unique().tolist()
    return nuclides

In [None]:
# Check if these variable names consistent with MARIS CDL
has_valid_varname(get_unique_nuclides(tfm.dfs), nc_tpl_path())

"pu239240" variable name not found in MARIS CDL
"cm243244" variable name not found in MARIS CDL
"cs134137" variable name not found in MARIS CDL
"pu239240" variable name not found in MARIS CDL
"pu238240" variable name not found in MARIS CDL
"pu239240" variable name not found in MARIS CDL
"cs134137" variable name not found in MARIS CDL
"k-40" variable name not found in MARIS CDL
"cs138" variable name not found in MARIS CDL
"cs139" variable name not found in MARIS CDL
"cs140" variable name not found in MARIS CDL
"cs141" variable name not found in MARIS CDL
"cs142" variable name not found in MARIS CDL
"cs143" variable name not found in MARIS CDL
"cs144" variable name not found in MARIS CDL
"cs145" variable name not found in MARIS CDL
"cs146" variable name not found in MARIS CDL


False

In [None]:
#| export
varnames_lut_updates = {
    'k-40': 'k40',
    'cm243244': 'cm243_244_tot',
    'cs134137': 'cs134_137_tot',
    'pu239240': 'pu239_240_tot',
    'pu238240': 'pu238_240_tot'}


In [None]:
#| export
def get_varnames_lut(dfs, lut=varnames_lut_updates):
    lut = {n: n for n in set(get_unique_nuclides(dfs))}
    lut.update(varnames_lut_updates)
    return lut


In [None]:
#|eval: false
varnames_lut = partial(get_varnames_lut, lut=varnames_lut_updates)(tfm.dfs)
varnames_lut

{'sr90': 'sr90',
 'ce141': 'ce141',
 'rb86': 'rb86',
 'bi214': 'bi214',
 'co58': 'co58',
 'cd109': 'cd109',
 'fe59': 'fe59',
 'sn113': 'sn113',
 'sc46': 'sc46',
 'ru106': 'ru106',
 'ce144': 'ce144',
 'nb95': 'nb95',
 'eu152': 'eu152',
 'cs139': 'cs139',
 'sr89': 'sr89',
 'cs137': 'cs137',
 'pu239240': 'pu239_240_tot',
 'th232': 'th232',
 'u235': 'u235',
 'ra228': 'ra228',
 'sn117m': 'sn117m',
 'am241': 'am241',
 'cs141': 'cs141',
 'zr95': 'zr95',
 'pu239': 'pu239',
 'h3': 'h3',
 'u234': 'u234',
 'cs144': 'cs144',
 'ra226': 'ra226',
 'eu155': 'eu155',
 'pb212': 'pb212',
 'cs142': 'cs142',
 'cs134': 'cs134',
 'th228': 'th228',
 'tc99': 'tc99',
 'cs140': 'cs140',
 'ba140': 'ba140',
 'ir192': 'ir192',
 'pb214': 'pb214',
 'sb125': 'sb125',
 'pu240': 'pu240',
 'u238': 'u238',
 'pu238240': 'pu238_240_tot',
 'tl208': 'tl208',
 'la140': 'la140',
 'te129m': 'te129m',
 'cs143': 'cs143',
 'ag110m': 'ag110m',
 'sb124': 'sb124',
 'cm242': 'cm242',
 'cs146': 'cs146',
 'cm243244': 'cm243_244_tot',
 'i

In [None]:
# | export
class RemapRdnNameCB(Callback):
    "Remap to MARIS radionuclide names."
    def __init__(self,
                 fn_lut=partial(get_varnames_lut, lut=varnames_lut_updates)):
        fc.store_attr()

    def __call__(self, tfm):
        lut = self.fn_lut(tfm.dfs)
        for k in tfm.dfs.keys():
            tfm.dfs[k]['NUCLIDE'].replace(lut, inplace=True)

In [None]:
dfs = load_data(fname_in)
tfm = Transformer(dfs, cbs=[LowerStripRdnNameCB(),
                            RemapRdnNameCB()])

print(tfm()['biota']['NUCLIDE'].unique())

['cs134' 'k40' 'co60' 'cs137' 'sr90' 'ag108m' 'mn54' 'co58' 'ag110m'
 'zn65' 'sb125' 'pu239_240_tot' 'ru106' 'be7' 'ce144' 'pb210' 'po210'
 'sb124' 'sr89' 'zr95' 'te129m' 'ru103' 'nb95' 'ce141' 'la140' 'i131'
 'ba140' 'pu238' 'u235' 'bi214' 'pb214' 'pb212' 'tl208' 'ac228' 'ra223'
 'eu155' 'ra226' 'gd153' 'sn113' 'fe59' 'tc99' 'co57' 'sn117m' 'eu152'
 'sc46' 'rb86' 'ra224' 'th232' 'cs134_137_tot' 'am241' 'ra228' 'th228'
 'cs138' 'cs139' 'cs140' 'cs141' 'cs142' 'cs143' 'cs144' 'cs145' 'cs146']


In [None]:
has_valid_varname(get_unique_nuclides(tfm.dfs), nc_tpl_path())

"cs138" variable name not found in MARIS CDL
"cs139" variable name not found in MARIS CDL
"cs140" variable name not found in MARIS CDL
"cs141" variable name not found in MARIS CDL
"cs142" variable name not found in MARIS CDL
"cs143" variable name not found in MARIS CDL
"cs144" variable name not found in MARIS CDL
"cs145" variable name not found in MARIS CDL
"cs146" variable name not found in MARIS CDL


False

### Parse time

In [None]:
#| export
class ParseTimeCB(Callback):
    def __call__(self, tfm):
        for k in tfm.dfs.keys():
            tfm.dfs[k]['time'] = pd.to_datetime(tfm.dfs[k].DATE, 
                                                format='%m/%d/%y %H:%M:%S')

In [None]:
dfs = load_data(fname_in)
tfm = Transformer(dfs, cbs=[LowerStripRdnNameCB(),
                            RemapRdnNameCB(),
                            ParseTimeCB()])

print(tfm()['seawater']['time'][:5])

0   2012-05-23
1   2012-05-23
2   2012-06-17
3   2012-05-24
4   2012-05-24
Name: time, dtype: datetime64[ns]


### Normalize uncertainty units

In [None]:
#| export
# Make measurement and uncertainty units consistent
def fix_units(df, meas_col, unc_col):
    return df.apply(lambda row: row[unc_col] * row[meas_col]/100, axis=1)

In [None]:
#| export
# Columns of interest
coi_units_unc = [('seawater', 'VALUE_Bq/m³', 'ERROR%_m³'),
                 ('biota', 'VALUE_Bq/kg', 'ERROR%'),
                 ('sediment', 'VALUE_Bq/kg', 'ERROR%_kg')]

In [None]:
#| export
class NormalizeUncUnitCB(Callback):
    "Convert uncertainty from % to activity unit"

    def __init__(self, coi=coi_units_unc): fc.store_attr()

    def __call__(self, tfm):
        for grp, val, unc in self.coi:
            tfm.dfs[grp][unc] = self.fix_units(tfm.dfs[grp], val, unc)

    def fix_units(self, df, meas_col, unc_col):
        return df.apply(lambda row: row[unc_col] * row[meas_col]/100, axis=1)

In [None]:
#| export
class NormalizeUncUnitCB(Callback):
    "Convert uncertainty from % to activity unit"

    def __init__(self, coi=coi_units_unc): fc.store_attr()

    def __call__(self, tfm):
        for grp, val, unc in self.coi:
            tfm.dfs[grp][unc] = self.fix_units(tfm.dfs[grp], val, unc)

    def fix_units(self, df, meas_col, unc_col):
        return df.apply(lambda row: row[unc_col] * row[meas_col]/100, axis=1)

In [None]:
dfs = load_data(fname_in)
tfm = Transformer(dfs, cbs=[LowerStripRdnNameCB(),
                            RemapRdnNameCB(),
                            ParseTimeCB(),
                            NormalizeUncUnitCB()])

print(tfm()['seawater'][['VALUE_Bq/m³', 'ERROR%_m³']][:5])

   VALUE_Bq/m³  ERROR%_m³
0          5.3      1.696
1         19.9      3.980
2         25.5      5.100
3         17.0      4.930
4         22.2      3.996


### Lookup biota species

In [None]:
df_rubin = pd.read_csv(Path(fname_in) / 'RUBIN_NAME.csv')
df_rubin.head(5)

Unnamed: 0,RUBIN_ID,RUBIN,SCIENTIFIC NAME,ENGLISH NAME
0,11,ABRA BRA,ABRAMIS BRAMA,BREAM
1,12,ANGU ANG,ANGUILLA ANGUILLA,EEL
2,13,ARCT ISL,ARCTICA ISLANDICA,ISLAND CYPRINE
3,14,ASTE RUB,ASTERIAS RUBENS,COMMON STARFISH
4,15,CARD EDU,CARDIUM EDULE,COCKLE


In [None]:
len(df_rubin)

43

In [None]:
dfs['biota']['RUBIN'].unique()

array(['GADU MOR', 'SPRA SPR', 'CLUP HAR', 'MERL MNG', 'LIMA LIM',
       'PLEU PLA', 'PLAT FLE', 'SADU ENT', 'ENGR ENC', 'ESOX LUC',
       'MACO BAL', 'FUCU VES', 'ZOAR VIV', 'OSME EPE', 'MYOX SCO',
       'GYMN CER', 'GAST ACU', 'SCOM SCO', 'MYTI EDU', 'CYPR CAR',
       'ABRA BRA', 'STIZ LUC', 'RUTI RUT', 'PERC FLU', 'MYA ARE',
       'CRAN CRA', 'PLANKTON', 'CARD EDU', 'ARCT ISL', 'CLAD GLO',
       'FURC LUM', 'ANGU ANG', 'FISHLARVAE', 'ENCH CIM', 'ASTE RUB',
       'RHODOPHY', 'LAMI SAC', 'PSET MAX', 'GADU MOR  ', 'POLY FUC',
       'STUC PEC', 'ZANN PALU'], dtype=object)

In [None]:
#| export
def get_maris_species(fname_in, fname_cache, overwrite=False, verbose=False):
    fname_cache = cache_path() / fname_cache
    lut = {}
    df = pd.read_csv(Path(fname_in) / 'RUBIN_NAME.csv')
    
    if overwrite or (not fname_cache.exists()):
        if verbose: print('Source:Destination')    
        for _, row in tqdm(df.iterrows(), total=df.shape[0]):
            match = match_maris_species(row['SCIENTIFIC NAME'])
            lut[row['RUBIN']] = {'id': match.iloc[0]['species_id'], 'name': match.iloc[0]['species']}
            if verbose: print(f'{row["SCIENTIFIC NAME"]}: {match.iloc[0]["species"]}')
        fc.save_pickle(fname_cache, lut)
    else:
        lut = fc.load_pickle(fname_cache)
        
    return lut        

In [None]:
get_maris_species(fname_in, 'species_helcom.pkl', overwrite=False)

  0%|          | 0/43 [00:00<?, ?it/s]

100%|██████████| 43/43 [00:26<00:00,  1.61it/s]


{'ABRA BRA': {'id': 271, 'name': 'Abramis brama'},
 'ANGU ANG': {'id': 272, 'name': 'Anguilla anguilla'},
 'ARCT ISL': {'id': 273, 'name': 'Arctica islandica'},
 'ASTE RUB': {'id': 21, 'name': 'Asterias rubens'},
 'CARD EDU': {'id': 988, 'name': 'Cardiidae'},
 'CH HI;BA': {'id': 122, 'name': 'Macoma balthica'},
 'CLAD GLO': {'id': 290, 'name': 'Cladophora glomerata'},
 'CLUP HAR': {'id': 50, 'name': 'Clupea harengus'},
 'CRAN CRA': {'id': 59, 'name': 'Crangon crangon'},
 'CYPR CAR': {'id': 275, 'name': 'Cyprinus carpio'},
 'ENCH CIM': {'id': 276, 'name': 'Echinodermata'},
 'ENGR ENC': {'id': 84, 'name': 'Engraulis encrasicolus'},
 'ESOX LUC': {'id': 269, 'name': 'Esox lucius'},
 'FISHLARVAE': {'id': 277, 'name': 'Fish larvae'},
 'FUCU VES': {'id': 96, 'name': 'Fucus vesiculosus'},
 'FURC LUM': {'id': 289, 'name': 'Furcellaria lumbricalis'},
 'GADU MOR': {'id': 99, 'name': 'Gadus morhua'},
 'GAST ACU': {'id': 286, 'name': 'Gasterosteus aculeatus'},
 'GYMN CER': {'id': 288, 'name': 'Gymn

In [None]:
#| export
def get_worms_species(fname_in, fname_cache, overwrite=False):
    fname_cache = cache_path() / fname_cache
    lut = {}

    if overwrite or (not fname_cache.exists()):
        df = pd.read_csv(Path(fname_in) / 'RUBIN_NAME.csv')
        
        for _, row in tqdm(df[['RUBIN', 'SCIENTIFIC NAME']].iterrows(), total=df.shape[0]):
            res = match_worms(row['SCIENTIFIC NAME'])
            if (res == -1):
                print(f"No match for {row['RUBIN']} ({row['SCIENTIFIC NAME']})")
                id = -1
                lut[row['RUBIN']] = {'id': id, 'name': '', 'status': '', 'match_type': ''}
            else:
                if len(res[0]) > 1:
                    print(f"Several matches for {row['RUBIN']} ({row['SCIENTIFIC NAME']})")
                    
                id, name, status, match_type = [res[0][0].get(key) 
                                                for key in ['AphiaID', 'scientificname', 'status', 'match_type']]        
                
                lut[row['RUBIN']] = {'id': id, 'name': name, 'status': status, 'match_type': match_type}
        fc.save_pickle(fname_cache, lut)
    else:
        lut = fc.load_pickle(fname_cache)
        
    return lut

In [None]:
species_lut = get_worms_species(fname_in, 'species_helcom.pkl', overwrite=False); species_lut

{'ABRA BRA': {'id': 271, 'name': 'Abramis brama'},
 'ANGU ANG': {'id': 272, 'name': 'Anguilla anguilla'},
 'ARCT ISL': {'id': 273, 'name': 'Arctica islandica'},
 'ASTE RUB': {'id': 21, 'name': 'Asterias rubens'},
 'CARD EDU': {'id': 988, 'name': 'Cardiidae'},
 'CH HI;BA': {'id': 122, 'name': 'Macoma balthica'},
 'CLAD GLO': {'id': 290, 'name': 'Cladophora glomerata'},
 'CLUP HAR': {'id': 50, 'name': 'Clupea harengus'},
 'CRAN CRA': {'id': 59, 'name': 'Crangon crangon'},
 'CYPR CAR': {'id': 275, 'name': 'Cyprinus carpio'},
 'ENCH CIM': {'id': 276, 'name': 'Echinodermata'},
 'ENGR ENC': {'id': 84, 'name': 'Engraulis encrasicolus'},
 'ESOX LUC': {'id': 269, 'name': 'Esox lucius'},
 'FISHLARVAE': {'id': 277, 'name': 'Fish larvae'},
 'FUCU VES': {'id': 96, 'name': 'Fucus vesiculosus'},
 'FURC LUM': {'id': 289, 'name': 'Furcellaria lumbricalis'},
 'GADU MOR': {'id': 99, 'name': 'Gadus morhua'},
 'GAST ACU': {'id': 286, 'name': 'Gasterosteus aculeatus'},
 'GYMN CER': {'id': 288, 'name': 'Gymn

In [None]:
#| export
class LookupBiotaSpeciesCB(Callback):
    'Match species with MARIS database.'
    def __init__(self, fn_lut): fc.store_attr()
    def __call__(self, tfm):
        lut = self.fn_lut()
        tfm.dfs['biota']['species_id'] = tfm.dfs['biota']['RUBIN'].apply(
            lambda x: lut[x.strip()]['id'])

In [None]:
dfs = load_data(fname_in)
tfm = Transformer(dfs, cbs=[LowerStripRdnNameCB(),
                            RemapRdnNameCB(),
                            ParseTimeCB(),
                            LookupBiotaSpeciesCB(partial(get_maris_species, 
                                                         fname_in, 'species_helcom.pkl'))
                            ])

print(tfm()['biota'][['RUBIN', 'species_id']][:5])

      RUBIN  species_id
0  GADU MOR          99
1  GADU MOR          99
2  GADU MOR          99
3  GADU MOR          99
4  GADU MOR          99


### Lookup biota tissues

In [None]:
dfs['biota']['TISSUE'].unique()

array([ 5,  1, 41,  3, 51, 43, 42, 12, 10, 18, 52, 20,  8, 54, 53])

In [None]:
#| export
def get_bodypart():
    "Naive lut - TO BE REFACTORED"
    return {
        5: 52, 1: 1,
        41: 1, 3: 3,
        51: 54, 43: 19,
        42: 59, 12: 20,
        10: 7, 18: 25,
        52: 55, 20: 38,
        8: 12, 54: 57,
        53: 56}

In [None]:
#| export
class LookupBiotaBodyPartCB(Callback):
    'Update bodypart id based on MARIS dbo_bodypar.xlsx'
    def __init__(self, fn_lut): fc.store_attr()
    def __call__(self, tfm):
        lut = self.fn_lut()
        tfm.dfs['biota']['body_part'] = tfm.dfs['biota']['TISSUE'].apply(lambda x: lut[x])

In [None]:
dfs = load_data(fname_in)
tfm = Transformer(dfs, cbs=[LowerStripRdnNameCB(),
                            RemapRdnNameCB(),
                            ParseTimeCB(),
                            LookupBiotaBodyPartCB(get_bodypart)
                            ])

print(tfm()['biota'][['TISSUE', 'body_part']][:5])

   TISSUE  body_part
0       5         52
1       5         52
2       5         52
3       5         52
4       5         52


### Lookup sediment types

In [None]:
df_sediment = pd.read_csv(Path(fname_in) / 'SEDIMENT_TYPE.csv')
df_sediment.head(5)

Unnamed: 0,SEDI,SEDIMENT TYPE,RECOMMENDED TO BE USED
0,-99,NO DATA,
1,30,SILT AND GRAVEL,YES
2,0,GRAVEL,YES
3,1,SAND,YES
4,2,FINE SAND,NO


In [None]:
df_sediment['SEDI'].unique()

array([-99,  30,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,
        11,  12,  13,  14,  15,  20,  21,  22,  23,  24,  25,  31,  32,
        34,  35,  40,  33,  41,  42,  43,  44,  45,  46,  47,  48,  49,
        50,  51,  52,  54,  55,  57,  58,  59])

In [None]:
#| export
def get_sediment(verbose=False):
    lut = {}
    if verbose: print('Source:Destination')
    df_sediment = pd.read_csv(Path(fname_in) / 'SEDIMENT_TYPE.csv')
    for _, row in df_sediment.iterrows():
        match = match_maris_sediment(row['SEDIMENT TYPE'])
        lut[row['SEDI']] = match.iloc[0,0]
        if verbose: print(f'({row["SEDI"]}) {row["SEDIMENT TYPE"]}: ({match.iloc[0,0]}) {match.iloc[0,1]}')
    return lut        

In [None]:
get_sediment(verbose=True)

Source:Destination
(-99) NO DATA: (26) Soft
(30) SILT AND GRAVEL: (11) Silt and gravel


(0) GRAVEL: (2) Gravel
(1) SAND: (6) Sand
(2) FINE SAND: (7) Fine sand
(3) SILT: (12) Silt
(4) CLAY: (1) Clay
(5) MUD: (4) Mud
(6) GLACIAL: (25) Glacial
(7) SOFT: (26) Soft
(8) SULPHIDIC: (27) Sulphidic
(9) Fe-Mg CONCRETIONS: (28) Fe-Mg concretions
(10) SAND AND GRAVEL: (29) Sand and gravel
(11) PURE SAND: (30) Pure sand
(12) SAND AND FINE SAND: (31) Sand and fine sand
(13) SAND AND SILT: (62) Sand and silt
(14) SAND AND CLAY: (32) Sand and clay
(15) SAND AND MUD: (33) Sand and mud
(20) FINE SAND AND GRAVEL: (34) Fine sand and gravel
(21) FINE SAND AND SAND: (35) Fine sand and sand
(22) PURE FINE SAND: (36) Pure fine sand
(23) FINE SAND AND SILT: (37) Fine sand and silt
(24) FINE SAND AND CLAY: (38) Fine sand and clay
(25) FINE SAND AND MUD: (39) Fine sand and mud
(31) SILT AND SAND: (40) Silt and sand
(32) SILT AND FINE SAND: (41) Silt and fine sand
(34) SILT AND CLAY: (10) Silt and clay
(35) SILT AND MUD: (43) Silt and mud
(40) CLAY AND GRAVEL: (44) Clay and gravel
(33) PURE SILT: (4

{-99: 26,
 30: 11,
 0: 2,
 1: 6,
 2: 7,
 3: 12,
 4: 1,
 5: 4,
 6: 25,
 7: 26,
 8: 27,
 9: 28,
 10: 29,
 11: 30,
 12: 31,
 13: 62,
 14: 32,
 15: 33,
 20: 34,
 21: 35,
 22: 36,
 23: 37,
 24: 38,
 25: 39,
 31: 40,
 32: 41,
 34: 10,
 35: 43,
 40: 44,
 33: 42,
 41: 45,
 42: 46,
 43: 48,
 44: 47,
 45: 49,
 46: 50,
 47: 51,
 48: 52,
 49: 53,
 50: 54,
 51: 55,
 52: 56,
 54: 57,
 55: 58,
 57: 59,
 58: 60,
 59: 61}

In [None]:
dfs['sediment']['SEDI'].unique()

array([ nan, -99.,   0.,  55.,  11.,  57.,  51.,  52.,  22.,  10.,  44.,
         5.,  50.,  15.,   1.,  40.,  33.,  43.,  59.,  54.,   9.,  45.,
        14.,  41.,  25.,  42.,  24.,  12.,  58.,  13.,   7.,  49.,  48.,
         4.,  47.,  23.,  20.,  46.,   2.,  34.,  32.,  56.,  35.,  73.,
        21.])

In [None]:
lut_sediment = get_sediment(verbose=True)

Source:Destination
(-99) NO DATA: (26) Soft
(30) SILT AND GRAVEL: (11) Silt and gravel


(0) GRAVEL: (2) Gravel
(1) SAND: (6) Sand
(2) FINE SAND: (7) Fine sand
(3) SILT: (12) Silt
(4) CLAY: (1) Clay
(5) MUD: (4) Mud
(6) GLACIAL: (25) Glacial
(7) SOFT: (26) Soft
(8) SULPHIDIC: (27) Sulphidic
(9) Fe-Mg CONCRETIONS: (28) Fe-Mg concretions
(10) SAND AND GRAVEL: (29) Sand and gravel
(11) PURE SAND: (30) Pure sand
(12) SAND AND FINE SAND: (31) Sand and fine sand
(13) SAND AND SILT: (62) Sand and silt
(14) SAND AND CLAY: (32) Sand and clay
(15) SAND AND MUD: (33) Sand and mud
(20) FINE SAND AND GRAVEL: (34) Fine sand and gravel
(21) FINE SAND AND SAND: (35) Fine sand and sand
(22) PURE FINE SAND: (36) Pure fine sand
(23) FINE SAND AND SILT: (37) Fine sand and silt
(24) FINE SAND AND CLAY: (38) Fine sand and clay
(25) FINE SAND AND MUD: (39) Fine sand and mud
(31) SILT AND SAND: (40) Silt and sand
(32) SILT AND FINE SAND: (41) Silt and fine sand
(34) SILT AND CLAY: (10) Silt and clay
(35) SILT AND MUD: (43) Silt and mud
(40) CLAY AND GRAVEL: (44) Clay and gravel
(33) PURE SILT: (4

In [None]:
dfs['sediment']['SEDI'].fillna(-99).astype('int').unique()

array([-99,   0,  55,  11,  57,  51,  52,  22,  10,  44,   5,  50,  15,
         1,  40,  33,  43,  59,  54,   9,  45,  14,  41,  25,  42,  24,
        12,  58,  13,   7,  49,  48,   4,  47,  23,  20,  46,   2,  34,
        32,  56,  35,  73,  21])

In [None]:
#| export
class LookupSedimentCB(Callback):
    'Update sediment id  based on MARIS dbo_sedtype.xlsx'
    def __init__(self, fn_lut): fc.store_attr()
    def __call__(self, tfm):
        lut = self.fn_lut()
        tfm.dfs['sediment']['SEDI'] = dfs['sediment']['SEDI'].fillna(-99).astype('int')
        # To check with Helcom
        tfm.dfs['sediment']['SEDI'].replace(56, -99, inplace=True)
        tfm.dfs['sediment']['SEDI'].replace(73, -99, inplace=True)
        tfm.dfs['sediment']['sed_type'] = tfm.dfs['sediment']['SEDI'].apply(lambda x: lut[x])

In [None]:
dfs = load_data(fname_in)
tfm = Transformer(dfs, cbs=[LowerStripRdnNameCB(),
                            RemapRdnNameCB(),
                            ParseTimeCB(),
                            LookupSedimentCB(get_sediment)
                            ])

print(tfm()['sediment'][['SEDI', 'sed_type']][:5])

   SEDI  sed_type
0   -99        26
1   -99        26
2   -99        26
3   -99        26
4   -99        26


### Rename columns

In [None]:
#| export
# Define columns of interest by sample type
coi_grp = {'seawater': ['NUCLIDE', 'VALUE_Bq/m³', 'ERROR%_m³', 'time',
                        'TDEPTH', 'LATITUDE (dddddd)', 'LONGITUDE (dddddd)'],
           'sediment': ['NUCLIDE', 'VALUE_Bq/kg', 'ERROR%_kg', 'time',
                        'TDEPTH', 'LATITUDE (dddddd)', 'LONGITUDE (dddddd)',
                        'sed_type'],
           'biota': ['NUCLIDE', 'VALUE_Bq/kg', 'ERROR%', 'time',
                     'SDEPTH', 'LATITUDE ddmmmm', 'LONGITUDE ddmmmm',
                     'species_id', 'body_part']}


In [None]:
#| export
# Define column names renaming rules
renaming_rules = {
    'NUCLIDE': 'nuclide',
    'VALUE_Bq/m³': 'value',
    'VALUE_Bq/kg': 'value',
    'ERROR%_m³': 'unc',
    'ERROR%_kg': 'unc',
    'ERROR%': 'unc',
    'TDEPTH': 'depth',
    'SDEPTH': 'depth',
    'LATITUDE (dddddd)': 'lat',
    'LATITUDE ddmmmm': 'lat',
    'LONGITUDE (dddddd)': 'lon',
    'LONGITUDE ddmmmm': 'lon'
}


In [None]:
#| export
class RenameColumnCB(Callback):
    def __init__(self,
                 coi=coi_grp,
                 renaming_rules=renaming_rules):
        fc.store_attr()

    def __call__(self, tfm):
        for k in tfm.dfs.keys():
            # Select cols of interest
            tfm.dfs[k] = tfm.dfs[k].loc[:, self.coi[k]]

            # Rename cols
            tfm.dfs[k].rename(columns=self.renaming_rules, inplace=True)

In [None]:
dfs = load_data(fname_in)
tfm = Transformer(dfs, cbs=[LowerStripRdnNameCB(),
                            RemapRdnNameCB(),
                            ParseTimeCB(),
                            NormalizeUncUnitCB(),
                            LookupBiotaSpeciesCB(partial(get_maris_species, 
                                                         fname_in, 'species_helcom.pkl')),
                            LookupBiotaBodyPartCB(get_bodypart),
                            LookupSedimentCB(get_sediment),
                            RenameColumnCB()])

print(tfm()['biota'].head(5))

  nuclide       value       unc       time  depth    lat    lon  species_id  \
0   cs134    0.010140       NaN 2012-09-23    NaN  54.17  12.19          99   
1     k40  135.300000  4.830210 2012-09-23    NaN  54.17  12.19          99   
2    co60    0.013980       NaN 2012-09-23    NaN  54.17  12.19          99   
3   cs137    4.338000  0.150962 2012-09-23    NaN  54.17  12.19          99   
4   cs134    0.009614       NaN 2012-09-23    NaN  54.17  12.19          99   

   body_part  
0         52  
1         52  
2         52  
3         52  
4         52  


### Reshape: long to wide

In [None]:
#| export
class ReshapeLongToWide(Callback):
    def __init__(self): fc.store_attr()

    def __call__(self, tfm):
        for k in tfm.dfs.keys():
            cols = ['nuclide']
            vals = ['value', 'unc']
            idx = list(set(tfm.dfs[k].columns) -
                       set(cols + vals))  # All others

            tfm.dfs[k] = tfm.dfs[k].pivot_table(index=idx,
                                                columns=cols,
                                                values=vals).reset_index()

            # Flatten cols name
            tfm.dfs[k].columns = rename_cols(tfm.dfs[k].columns)

            # Set index
            tfm.dfs[k].index.name = 'sample'

In [None]:
dfs = load_data(fname_in)
tfm = Transformer(dfs, cbs=[LowerStripRdnNameCB(),
                            RemapRdnNameCB(),
                            ParseTimeCB(),
                            NormalizeUncUnitCB(),
                            LookupBiotaSpeciesCB(partial(get_maris_species, 
                                                         fname_in, 'species_helcom.pkl')),
                            LookupBiotaBodyPartCB(get_bodypart),
                            LookupSedimentCB(get_sediment),
                            RenameColumnCB(),
                            ReshapeLongToWide()])

print(tfm()['biota'].head(5))

          lon       time  species_id  depth    lat  body_part  ac228_unc  \
sample                                                                     
0        9.41 2011-12-11          50    2.0  54.31         52        NaN   
1       10.00 2011-12-13          99    4.0  54.45         52        NaN   
2       10.07 1987-12-08          99   21.5  54.33         25        NaN   
3       10.07 1987-12-08          99   21.5  54.33         52        NaN   
4       10.07 1994-10-09          50   27.0  54.34          1        NaN   

        ag108m_unc  ag110m_unc  am241_unc  ...  sr89    sr90  tc99  te129m  \
sample                                     ...                               
0              NaN         NaN        NaN  ...   NaN     NaN   NaN     NaN   
1              NaN         NaN        NaN  ...   NaN     NaN   NaN     NaN   
2              NaN     0.11470        NaN  ...   NaN     NaN   NaN     NaN   
3              NaN     0.00858        NaN  ...   NaN  0.0042   NaN     NaN   

### Encode time (seconds since ...)

In [None]:
dfs = load_data(fname_in)
tfm = Transformer(dfs, cbs=[LowerStripRdnNameCB(),
                            RemapRdnNameCB(),
                            ParseTimeCB(),
                            NormalizeUncUnitCB(),
                            LookupBiotaSpeciesCB(partial(get_maris_species, 
                                                         fname_in, 'species_helcom.pkl')),
                            LookupBiotaBodyPartCB(get_bodypart),
                            LookupSedimentCB(get_sediment),
                            RenameColumnCB(),
                            ReshapeLongToWide(),
                            EncodeTimeCB(cfg())])

print(tfm()['seawater'].head(5))


        lat  lon        time  depth  ag110m_unc  am241_unc  ba140_unc  \
sample                                                                  
0       0.0  0.0  1429574400   12.0         NaN        NaN        NaN   
1       0.0  0.0  1429747200    4.0         NaN        NaN        NaN   
2       0.0  0.0  1430352000   13.0         NaN        NaN        NaN   
3       0.0  0.0  1431993600   81.0         NaN        NaN        NaN   
4       0.0  0.0  1432080000   69.0         NaN        NaN        NaN   

        ce144_unc  cm242_unc  cm243_244_tot_unc  ...  pu240  ru103  ru106  \
sample                                           ...                        
0             NaN        NaN                NaN  ...    NaN    NaN    NaN   
1             NaN        NaN                NaN  ...    NaN    NaN    NaN   
2             NaN        NaN                NaN  ...    NaN    NaN    NaN   
3             NaN        NaN                NaN  ...    NaN    NaN    NaN   
4             NaN        N

### Sanitize coordinates

In [None]:
dfs = load_data(fname_in)
tfm = Transformer(dfs, cbs=[LowerStripRdnNameCB(),
                            RemapRdnNameCB(),
                            ParseTimeCB(),
                            NormalizeUncUnitCB(),
                            LookupBiotaSpeciesCB(partial(get_maris_species, 
                                                         fname_in, 'species_helcom.pkl')),
                            LookupBiotaBodyPartCB(get_bodypart),
                            LookupSedimentCB(get_sediment),
                            RenameColumnCB(),
                            ReshapeLongToWide(),
                            EncodeTimeCB(cfg()),
                            SanitizeLonLatCB()])

print(tfm()['seawater'].head(5))


            lat      lon        time  depth  ag110m_unc  am241_unc  ba140_unc  \
sample                                                                          
10      53.9422  14.2578  1339545600   10.0         NaN        NaN        NaN   
11      53.9483  14.2633   870220800   10.0         NaN        NaN        NaN   
12      53.9483  14.2633   966729600   10.0         NaN        NaN        NaN   
13      53.9483  14.2633   992044800   10.0         NaN        NaN        NaN   
14      53.9483  14.2633  1023580800   10.0         NaN        NaN        NaN   

        ce144_unc  cm242_unc  cm243_244_tot_unc  ...  pu240  ru103  ru106  \
sample                                           ...                        
10            NaN        NaN                NaN  ...    NaN    NaN    NaN   
11            NaN        NaN                NaN  ...    NaN    NaN    NaN   
12            NaN        NaN                NaN  ...    NaN    NaN    NaN   
13            NaN        NaN                NaN

## Encode to NetCDF

In [None]:
dfs = load_data(fname_in)
tfm = Transformer(dfs, cbs=[LowerStripRdnNameCB(),
                            RemapRdnNameCB(),
                            ParseTimeCB(),
                            NormalizeUncUnitCB(),
                            LookupBiotaSpeciesCB(partial(get_maris_species, 
                                                         fname_in, 'species_helcom.pkl')),
                            LookupBiotaBodyPartCB(get_bodypart),
                            LookupSedimentCB(get_sediment),
                            RenameColumnCB(),
                            ReshapeLongToWide(),
                            EncodeTimeCB(cfg()),
                            SanitizeLonLatCB()])

dfs_tfm = tfm()


In [None]:
tfm.logs

['Convert nuclide names to lowercase & strip any trailing space(s)',
 'Remap to MARIS radionuclide names.',
 'Convert uncertainty from % to activity unit',
 'Match species with MARIS database.',
 'Update bodypart id based on MARIS dbo_bodypar.xlsx',
 'Update sediment id  based on MARIS dbo_sedtype.xlsx',
 'Encode time as `int` representing seconds since xxx',
 'Drop row when both longitude & latitude equal 0.']

### Feed global attributes

In [None]:
#| export
kw = ['oceanography', 'Earth Science > Oceans > Ocean Chemistry> Radionuclides',
      'Earth Science > Human Dimensions > Environmental Impacts > Nuclear Radiation Exposure',
      'Earth Science > Oceans > Ocean Chemistry > Ocean Tracers, Earth Science > Oceans > Marine Sediments',
      'Earth Science > Oceans > Ocean Chemistry, Earth Science > Oceans > Sea Ice > Isotopes',
      'Earth Science > Oceans > Water Quality > Ocean Contaminants',
      'Earth Science > Biological Classification > Animals/Vertebrates > Fish',
      'Earth Science > Biosphere > Ecosystems > Marine Ecosystems',
      'Earth Science > Biological Classification > Animals/Invertebrates > Mollusks',
      'Earth Science > Biological Classification > Animals/Invertebrates > Arthropods > Crustaceans',
      'Earth Science > Biological Classification > Plants > Macroalgae (Seaweeds)']


In [None]:
#| export
def get_attrs(tfm, zotero_key='26VMZZ2Q', kw=kw):
    return GlobAttrsFeeder(tfm.dfs, cbs=[
        BboxCB(),
        DepthRangeCB(),
        TimeRangeCB(cfg()),
        ZoteroCB(zotero_key, cfg=cfg()),
        KeyValuePairCB('keywords', ', '.join(kw)),
        KeyValuePairCB('publisher_postprocess_logs', ', '.join(tfm.logs))
        ])()

In [None]:
get_attrs(tfm, zotero_key='26VMZZ2Q', kw=kw)

{'geospatial_lat_min': '31.1667',
 'geospatial_lat_max': '65.6347',
 'geospatial_lon_min': '9.41',
 'geospatial_lon_max': '53.458',
 'geospatial_bounds': 'POLYGON ((9.41 53.458, 31.1667 53.458, 31.1667 65.6347, 9.41 65.6347, 9.41 53.458))',
 'geospatial_vertical_max': '0',
 'geospatial_vertical_min': '-460.0',
 'time_coverage_start': '1984-01-10T00:00:00',
 'time_coverage_end': '2018-12-12T00:00:00',
 'title': 'Environmental database - Helsinki Commission Monitoring of Radioactive Substances',
 'summary': 'MORS Environment database has been used to collate data resulting from monitoring of environmental radioactivity in the Baltic Sea based on HELCOM Recommendation 26/3.\n\nThe database is structured according to HELCOM Guidelines on Monitoring of Radioactive Substances (https://www.helcom.fi/wp-content/uploads/2019/08/Guidelines-for-Monitoring-of-Radioactive-Substances.pdf), which specifies reporting format, database structure, data types and obligatory parameters used for reporting d

### Encoding

In [None]:
species_lut

{'ABRA BRA': {'id': 271, 'name': 'Abramis brama'},
 'ANGU ANG': {'id': 272, 'name': 'Anguilla anguilla'},
 'ARCT ISL': {'id': 273, 'name': 'Arctica islandica'},
 'ASTE RUB': {'id': 21, 'name': 'Asterias rubens'},
 'CARD EDU': {'id': 988, 'name': 'Cardiidae'},
 'CH HI;BA': {'id': 122, 'name': 'Macoma balthica'},
 'CLAD GLO': {'id': 290, 'name': 'Cladophora glomerata'},
 'CLUP HAR': {'id': 50, 'name': 'Clupea harengus'},
 'CRAN CRA': {'id': 59, 'name': 'Crangon crangon'},
 'CYPR CAR': {'id': 275, 'name': 'Cyprinus carpio'},
 'ENCH CIM': {'id': 276, 'name': 'Echinodermata'},
 'ENGR ENC': {'id': 84, 'name': 'Engraulis encrasicolus'},
 'ESOX LUC': {'id': 269, 'name': 'Esox lucius'},
 'FISHLARVAE': {'id': 277, 'name': 'Fish larvae'},
 'FUCU VES': {'id': 96, 'name': 'Fucus vesiculosus'},
 'FURC LUM': {'id': 289, 'name': 'Furcellaria lumbricalis'},
 'GADU MOR': {'id': 99, 'name': 'Gadus morhua'},
 'GAST ACU': {'id': 286, 'name': 'Gasterosteus aculeatus'},
 'GYMN CER': {'id': 288, 'name': 'Gymn

In [None]:
{info['name']: info['id'] for info in species_lut.values() if info['name'] != ''}

{'Abramis brama': 271,
 'Anguilla anguilla': 272,
 'Arctica islandica': 273,
 'Asterias rubens': 21,
 'Cardiidae': 988,
 'Macoma balthica': 122,
 'Cladophora glomerata': 290,
 'Clupea harengus': 50,
 'Crangon crangon': 59,
 'Cyprinus carpio': 275,
 'Echinodermata': 276,
 'Engraulis encrasicolus': 84,
 'Esox lucius': 269,
 'Fish larvae': 277,
 'Fucus vesiculosus': 96,
 'Furcellaria lumbricalis': 289,
 'Gadus morhua': 99,
 'Gasterosteus aculeatus': 286,
 'Gymnocephalus cernua': 288,
 'Laminaria japonica': 149,
 'Limanda limanda': 270,
 'Merlangius merlangus': 139,
 'Mya arenaria': 120,
 'Myoxocephalus scorpius': 278,
 'Mytilus edulis': 129,
 'Osmerus eperlanus': 279,
 'Perca fluviatilis': 247,
 'Plankton': 280,
 'Platichthys flesus': 191,
 'Pleuronectes platessa': 192,
 'Polysiphonia fucoides': 245,
 'Pinctada maxima': 675,
 'Rhodophyta': 282,
 'Rutilus rutilus': 283,
 'Saduria entomon': 284,
 'Scomber scombrus': 244,
 'Solea solea': 397,
 'Sprattus sprattus': 243,
 'Sander lucioperca': 

In [None]:
#| export
def encode(fname_in, fname_out, nc_tpl_path, **kwargs):
    dfs = load_data(fname_in)
    tfm = Transformer(dfs, cbs=[
        LowerStripRdnNameCB(),
        RemapRdnNameCB(),
        ParseTimeCB(),
        NormalizeUncUnitCB(),
        LookupBiotaSpeciesCB(partial(get_maris_species, 
                                     fname_in, 'species_helcom.pkl')),
        LookupBiotaBodyPartCB(get_bodypart),
        LookupSedimentCB(get_sediment),
        RenameColumnCB(),
        ReshapeLongToWide(),
        EncodeTimeCB(cfg()),
        SanitizeLonLatCB()
        ])
    
    species_lut = get_maris_species(fname_in, 'species_helcom.pkl')
    enums_xtra = {
        'species_t': {info['name']: info['id'] 
                      for info in species_lut.values() if info['name'] != ''}
    }
    
    encoder = NetCDFEncoder(tfm(), 
                            src_fname=nc_tpl_path,
                            dest_fname=fname_out, 
                            global_attrs=get_attrs(tfm, zotero_key='26VMZZ2Q', kw=kw),
                            enums_xtra=enums_xtra,
                            **kwargs)
    encoder.encode()

In [None]:
encode(fname_in, fname_out, nc_tpl_path(), verbose=False)