In [None]:
#| default_exp handlers.maris_dump

# MARIS dump
> Data pipeline (handler) to convert global MARIS db dump into `NetCDF` format

The input data is a dump from already imported MARIS datasets.

**Dev. board**: https://trello.com/b/IszgV1bj/marisco

## Packages import

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
#| export
from tqdm import tqdm
from pathlib import Path
import fastcore.all as fc
import pandas as pd

from marisco.callbacks import (Callback, Transformer, SanitizeLonLatCB, EncodeTimeCB)
from marisco.metadata import (GlobAttrsFeeder, BboxCB,
                              DepthRangeCB, TimeRangeCB,
                              ZoteroCB, KeyValuePairCB)
from marisco.configs import lut_path, cdl_cfg, cfg, nc_tpl_path, get_enum_dicts
from marisco.serializers import NetCDFEncoder

In [None]:
pd.set_option('display.max_rows', 100)

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
fname_in = Path().home() / 'pro/data/maris/MARIS_exportSample_20240312_tab2.txt'
dir_dest = '../../_data/output/dump'

## Utils

In [None]:
# | export
def load_dump(fname): 
    return pd.read_csv(fname, sep='\t', encoding='ISO-8859-1')   

def load_data(df:pd.DataFrame, # MARIS global dump 
                 ref_id:int, # Reference id of interest
                 ):
    "Load specific MARIS dataset through its ref_id"
    lut = {
        'Sediment': 'sediment',
        'Seawater': 'seawater',
        'Suspended matter': 'suspended-matter',
        'Biota': 'biota'}
    dfs = {}
    for name, grp in df[df.ref_id  == ref_id].groupby('samptype'): 
        dfs[lut[name]] = grp
    return dfs

def get_zotero_key(dfs):
    return dfs[next(iter(dfs))][['zoterourl']].iloc[0].values[0].split('/')[-1]

def get_fname(dfs):
    id, name = dfs[next(iter(dfs))][['ref_id', 'displaytext']].iloc[0]
    name = name.replace(',', '').replace('.', '').replace('-', ' ').split(' ')
    return '-'.join(([str(id)] + name)) + '.nc'

## Load data

In [None]:
df = load_dump(fname_in)

In [None]:
df.head()

Unnamed: 0,ref_id,displaytext,samptype,nuclide_id,latitude,longitude,begperiod,endperiod,sampdepth,totdepth,...,bodypar_id,sedtype_id,volume,salinity,temperatur,sampmet_id,prepmet_id,counmet_id,activity,zoterourl
0,374,"Ostlund et al., 1987",Seawater,53,-53.0,103.016944,1978-02-18 00:00:00,,1867.0,3808.0,...,0,0,,34.731998,,1,0,0,3.508917,https://www.zotero.org/groups/2432820/maris/it...
1,374,"Ostlund et al., 1987",Seawater,3,-53.0,103.016944,1978-02-18 00:00:00,,1079.0,3808.0,...,0,0,,34.740002,,1,0,0,-144.7,https://www.zotero.org/groups/2432820/maris/it...
2,374,"Ostlund et al., 1987",Seawater,53,-53.0,103.016944,1978-02-18 00:00:00,,1079.0,3808.0,...,0,0,,34.740002,,1,0,0,3.320633,https://www.zotero.org/groups/2432820/maris/it...
3,374,"Ostlund et al., 1987",Seawater,1,-53.0,103.016944,1978-02-18 00:00:00,,1140.0,3808.0,...,0,0,,34.740002,,12,0,0,-0.01,https://www.zotero.org/groups/2432820/maris/it...
4,374,"Ostlund et al., 1987",Seawater,1,-53.0,103.016944,1978-02-18 00:00:00,,1583.0,3808.0,...,0,0,,34.743999,,1,0,0,-0.05,https://www.zotero.org/groups/2432820/maris/it...


In [None]:
# 52, 191 (OSPAR)
ref_id = 174

In [None]:
dfs = load_data(df, ref_id)

In [None]:
# from marisco.inout import read_toml

# lut_src_dir_test = '../files/lut'
# cdl_enums_test = read_toml('../files/cdl.toml')['enums']

# enums = get_enum_dicts(lut_src_dir=lut_src_dir_test, 
#                        cdl_enums=cdl_enums_test)

# species = {v:k for k, v in enums['species_t'].items()}

# for idx in dfs['biota'].species_id.unique():
#     print(idx in species)

## Data transformation pipeline

### Normalize nuclide names

In [None]:
#| export
def get_varnames_lut():
    fname = lut_path() / 'dbo_nuclide.xlsx'
    df_nuclide = pd.read_excel(fname, usecols=['nuclide_id', 'nc_name'])
    return df_nuclide.set_index('nuclide_id').to_dict()['nc_name']

In [None]:
# | export
class RemapRdnNameCB(Callback):
    "Remap to MARIS radionuclide names."
    def __init__(self,
                 fn_lut=get_varnames_lut):
        fc.store_attr()

    def __call__(self, tfm):
        lut = self.fn_lut()
        for k in tfm.dfs.keys():
            tfm.dfs[k]['nuclide_id'] = tfm.dfs[k]['nuclide_id'].replace(lut)

In [None]:
dfs = load_data(df, ref_id)

In [None]:
dfs = load_data(df, ref_id)
tfm = Transformer(dfs, cbs=[RemapRdnNameCB()])

print(tfm()['sediment']['nuclide_id'].unique())

['ra228' 'k40' 'pu238' 'pu239' 'sr90' 'th232' 'u238' 'pb210' 'ra226']


### Rename columns

In [None]:
dfs['sediment'].columns

Index(['ref_id', 'displaytext', 'samptype', 'nuclide_id', 'latitude',
       'longitude', 'begperiod', 'endperiod', 'sampdepth', 'totdepth',
       'uncertaint', 'unit_id', 'detection', 'area_id', 'species_id',
       'biogroup_id', 'bodypar_id', 'sedtype_id', 'volume', 'salinity',
       'temperatur', 'sampmet_id', 'prepmet_id', 'counmet_id', 'activity',
       'zoterourl'],
      dtype='object')

In [None]:
#| export
# To be added: endperiod, totdepth, lab
def renaming_rules():
    vars = cdl_cfg()['vars']
    # Define column names renaming rules
    return {
        'latitude': vars['defaults']['lat']['name'],
        'longitude': vars['defaults']['lon']['name'],
        'begperiod': vars['defaults']['time']['name'],
        'sampdepth': vars['defaults']['depth']['name'],
        'uncertaint': vars['suffixes']['uncertainty']['name'],
        'unit_id': vars['suffixes']['unit']['name'],
        'detection': vars['suffixes']['detection_limit']['name'],
        'area_id': vars['defaults']['area']['name'], 
        'species_id': vars['bio']['species']['name'],
        'biogroup_id': vars['bio']['bio_group']['name'],
        'bodypar_id': vars['bio']['body_part']['name'],
        'sedtype_id': vars['sed']['sed_type']['name'],
        'volume': vars['suffixes']['volume']['name'],
        'salinity': vars['suffixes']['salinity']['name'],
        'temperatur': vars['suffixes']['temperature']['name'],
        'sampmet_id': vars['suffixes']['sampling_method']['name'],
        'prepmet_id': vars['suffixes']['preparation_method']['name'],
        'counmet_id': vars['suffixes']['counting_method']['name'],
        'activity': 'value',
        'nuclide_id': 'nuclide'
    }

In [None]:
#| export
class RenameColumnCB(Callback):
    "Renaming variables to MARIS standard names."
    def __init__(self, renaming_rules=renaming_rules): fc.store_attr()
    def __call__(self, tfm):
        lut = renaming_rules()
        coi = lut.keys()
        for k in tfm.dfs.keys():
            # Select cols of interest
            tfm.dfs[k] = tfm.dfs[k].loc[:, coi]
            # Rename cols
            tfm.dfs[k].rename(columns=lut, inplace=True)

In [None]:
dfs = load_data(df, ref_id)
tfm = Transformer(dfs, cbs=[
    RemapRdnNameCB(),
    RenameColumnCB()
    ])

print(tfm()['sediment'])

             lat        lon                 time  depth     unc  unit dl  \
14936 -62.166667 -94.276111  2003-01-31 00:00:00    NaN    5.00     4  =   
14937 -62.166667 -94.276111  2003-01-31 00:00:00    NaN  222.00     4  =   
14938 -62.166667 -94.276111  2003-01-31 00:00:00    NaN    0.10     4  =   
14939 -62.166667 -94.276111  2003-01-31 00:00:00    NaN    0.07     4  =   
14940 -62.166667 -94.276111  2003-01-31 00:00:00    NaN   40.00     4  =   
15080 -62.169444 -58.601944  2003-01-31 00:00:00    NaN    0.71     4  =   
15081 -62.169444 -58.601944  2003-01-31 00:00:00    NaN    0.79     4  =   
15082 -62.172500 -58.546667  2003-01-31 00:00:00    NaN    0.50     4  =   
15083 -62.172500 -58.546667  2003-01-31 00:00:00    NaN    0.43     4  =   
15084 -62.173889 -58.443889  2003-01-31 00:00:00    NaN    0.41     4  =   
15085 -62.173889 -58.443889  2003-01-31 00:00:00    NaN    0.47     4  =   
15086 -62.190556 -58.610556  2003-01-31 00:00:00    NaN    0.63     4  =   
15087 -62.19

### Drop NaN only columns

In [None]:
#| export
class DropNAColumnsCB(Callback):
    "Drop variable containing only NaN or 'Not available' (id=0 in MARIS lookup tables)."
    def __init__(self, na_value=0):
        fc.store_attr()
        
    def isMarisNA(self, col): 
        return len(col.unique()) == 1 and col.iloc[0] == self.na_value
    
    def dropMarisNA(self, df):
        na_cols = [col for col in df.columns if self.isMarisNA(df[col])]
        return df.drop(labels=na_cols, axis=1)
        
    def __call__(self, tfm):
        for k in tfm.dfs.keys():
            tfm.dfs[k] = tfm.dfs[k].dropna(axis=1, how='all')
            tfm.dfs[k] = self.dropMarisNA(tfm.dfs[k])

In [None]:
dfs = load_data(df, ref_id)
tfm = Transformer(dfs, cbs=[
    RemapRdnNameCB(),
    RenameColumnCB(),
    DropNAColumnsCB()
    ])

print(tfm()['sediment'])

             lat        lon                 time     unc  unit dl  area  \
14936 -62.166667 -94.276111  2003-01-31 00:00:00    5.00     4  =  1907   
14937 -62.166667 -94.276111  2003-01-31 00:00:00  222.00     4  =  1907   
14938 -62.166667 -94.276111  2003-01-31 00:00:00    0.10     4  =  1907   
14939 -62.166667 -94.276111  2003-01-31 00:00:00    0.07     4  =  1907   
14940 -62.166667 -94.276111  2003-01-31 00:00:00   40.00     4  =  1907   
15080 -62.169444 -58.601944  2003-01-31 00:00:00    0.71     4  =  1907   
15081 -62.169444 -58.601944  2003-01-31 00:00:00    0.79     4  =  1907   
15082 -62.172500 -58.546667  2003-01-31 00:00:00    0.50     4  =  1907   
15083 -62.172500 -58.546667  2003-01-31 00:00:00    0.43     4  =  1907   
15084 -62.173889 -58.443889  2003-01-31 00:00:00    0.41     4  =  1907   
15085 -62.173889 -58.443889  2003-01-31 00:00:00    0.47     4  =  1907   
15086 -62.190556 -58.610556  2003-01-31 00:00:00    0.63     4  =  1907   
15087 -62.190556 -58.6105

### Sanitize detection limit values

In [None]:
#| export
def get_dl_lut():
    fname = lut_path() / 'dbo_detectlimit.xlsx'
    df_nuclide = pd.read_excel(fname, usecols=['name', 'id'])
    return df_nuclide.set_index('name').to_dict()['id']

In [None]:
get_dl_lut()

{'Not Available': 0, '=': 1, '<': 2, 'ND': 3, 'DE': 4}

In [None]:
#| export
class SanitizeDetectionLimitCB(Callback):
    "Assign Detection Limit name to its id based on MARIS nomenclature."
    def __init__(self,
                 fn_lut=get_dl_lut):
        fc.store_attr()

    def __call__(self, tfm):
        lut = self.fn_lut()
        for k in tfm.dfs.keys():
            tfm.dfs[k]['dl'] = tfm.dfs[k]['dl'].replace(lut)

In [None]:
dfs = load_data(df, ref_id)
tfm = Transformer(dfs, cbs=[
    RemapRdnNameCB(),
    RenameColumnCB(),
    DropNAColumnsCB(),
    SanitizeDetectionLimitCB()
    ])

print(tfm()['sediment'])

             lat        lon                 time     unc  unit  dl  area  \
14936 -62.166667 -94.276111  2003-01-31 00:00:00    5.00     4   1  1907   
14937 -62.166667 -94.276111  2003-01-31 00:00:00  222.00     4   1  1907   
14938 -62.166667 -94.276111  2003-01-31 00:00:00    0.10     4   1  1907   
14939 -62.166667 -94.276111  2003-01-31 00:00:00    0.07     4   1  1907   
14940 -62.166667 -94.276111  2003-01-31 00:00:00   40.00     4   1  1907   
15080 -62.169444 -58.601944  2003-01-31 00:00:00    0.71     4   1  1907   
15081 -62.169444 -58.601944  2003-01-31 00:00:00    0.79     4   1  1907   
15082 -62.172500 -58.546667  2003-01-31 00:00:00    0.50     4   1  1907   
15083 -62.172500 -58.546667  2003-01-31 00:00:00    0.43     4   1  1907   
15084 -62.173889 -58.443889  2003-01-31 00:00:00    0.41     4   1  1907   
15085 -62.173889 -58.443889  2003-01-31 00:00:00    0.47     4   1  1907   
15086 -62.190556 -58.610556  2003-01-31 00:00:00    0.63     4   1  1907   
15087 -62.19

### Parse time

In [None]:
#| export
class ParseTimeCB(Callback):
    def __call__(self, tfm):
        for k in tfm.dfs.keys():
            tfm.dfs[k]['time'] = pd.to_datetime(tfm.dfs[k].time, format='ISO8601')

In [None]:
dfs = load_data(df, ref_id)
tfm = Transformer(dfs, cbs=[
    RemapRdnNameCB(),
    RenameColumnCB(),
    DropNAColumnsCB(),
    SanitizeDetectionLimitCB(),
    ParseTimeCB()
    ])

print(tfm()['sediment'])

             lat        lon       time     unc  unit  dl  area  sed_type  \
14936 -62.166667 -94.276111 2003-01-31    5.00     4   1  1907         0   
14937 -62.166667 -94.276111 2003-01-31  222.00     4   1  1907         0   
14938 -62.166667 -94.276111 2003-01-31    0.10     4   1  1907         0   
14939 -62.166667 -94.276111 2003-01-31    0.07     4   1  1907         0   
14940 -62.166667 -94.276111 2003-01-31   40.00     4   1  1907         0   
15080 -62.169444 -58.601944 2003-01-31    0.71     4   1  1907        12   
15081 -62.169444 -58.601944 2003-01-31    0.79     4   1  1907        12   
15082 -62.172500 -58.546667 2003-01-31    0.50     4   1  1907         6   
15083 -62.172500 -58.546667 2003-01-31    0.43     4   1  1907         6   
15084 -62.173889 -58.443889 2003-01-31    0.41     4   1  1907         0   
15085 -62.173889 -58.443889 2003-01-31    0.47     4   1  1907         0   
15086 -62.190556 -58.610556 2003-01-31    0.63     4   1  1907         2   
15087 -62.19

### Reshape: long to wide

In [None]:
#| export
class ReshapeLongToWide(Callback):
    "Convert data from long to wide with renamed columns."
    def __init__(self, value_col='nuclide'):
        fc.store_attr()
        self.derived_cols = [value['name'] for value in cdl_cfg()['vars']['suffixes'].values()]
    
    def renamed_cols(self, cols):
        return [f'{inner}_{outer}' if inner else outer for outer, inner in cols]

    def pivot(self, df):
        derived_coi = [col for col in self.derived_cols if col in df.columns]
        df.reset_index(names='sample', inplace=True)
        idx = list(set(df.columns) - set([self.value_col] + derived_coi))
        return df.pivot_table(index=idx,
                              columns=self.value_col,
                              values=derived_coi,
                              fill_value=0,
                              aggfunc=lambda x: x
                              ).reset_index()

    def __call__(self, tfm):
        for k in tfm.dfs.keys():
            tfm.dfs[k] = self.pivot(tfm.dfs[k])
            tfm.dfs[k].columns = self.renamed_cols(tfm.dfs[k].columns)
            # tfm.dfs[k].index.name = 'sample'
            tfm.dfs[k].set_index('sample', inplace=True)

In [None]:
dfs = load_data(df, ref_id)
tfm = Transformer(dfs, cbs=[
    RemapRdnNameCB(),
    RenameColumnCB(),
    DropNAColumnsCB(),
    SanitizeDetectionLimitCB(),
    ParseTimeCB(),
    ReshapeLongToWide()
    ])

print(tfm()['sediment'])

        sed_type   value       time        lon  area        lat  k40_counmet  \
sample                                                                         
14939          0    0.19 2003-01-31 -94.276111  1907 -62.166667            0   
14938          0    0.30 2003-01-31 -94.276111  1907 -62.166667            0   
15084          0   14.26 2003-01-31 -58.443889  1907 -62.173889            0   
15104          0   15.07 2003-01-31 -58.490833  1907 -62.157500            0   
15107          0   15.07 2003-01-31 -58.368056  1907 -62.160556            0   
15085          0   16.12 2003-01-31 -58.443889  1907 -62.173889            0   
15108          0   16.12 2003-01-31 -58.368056  1907 -62.160556            0   
14936          0   17.00 2003-01-31 -94.276111  1907 -62.166667            0   
15110          0   21.00 2003-01-31 -94.276111  1907 -62.166667            0   
14940          0   31.00 2003-01-31 -94.276111  1907 -62.166667            0   
15109          0  105.00 2003-01-31 -94.

### Encode time (seconds since ...)

In [None]:
dfs = load_data(df, ref_id)
tfm = Transformer(dfs, cbs=[
    RemapRdnNameCB(),
    RenameColumnCB(),
    DropNAColumnsCB(),
    SanitizeDetectionLimitCB(),
    ParseTimeCB(),
    ReshapeLongToWide(),
    EncodeTimeCB(cfg())
    ])

print(tfm()['sediment'])

        sed_type   value        time        lon  area        lat  k40_counmet  \
sample                                                                          
14939          0    0.19  1043971200 -94.276111  1907 -62.166667            0   
14938          0    0.30  1043971200 -94.276111  1907 -62.166667            0   
15084          0   14.26  1043971200 -58.443889  1907 -62.173889            0   
15104          0   15.07  1043971200 -58.490833  1907 -62.157500            0   
15107          0   15.07  1043971200 -58.368056  1907 -62.160556            0   
15085          0   16.12  1043971200 -58.443889  1907 -62.173889            0   
15108          0   16.12  1043971200 -58.368056  1907 -62.160556            0   
14936          0   17.00  1043971200 -94.276111  1907 -62.166667            0   
15110          0   21.00  1043971200 -94.276111  1907 -62.166667            0   
14940          0   31.00  1043971200 -94.276111  1907 -62.166667            0   
15109          0  105.00  10

### Sanitize coordinates

In [None]:
dfs = load_data(df, ref_id)
tfm = Transformer(dfs, cbs=[
    RemapRdnNameCB(),
    RenameColumnCB(),
    DropNAColumnsCB(),
    SanitizeDetectionLimitCB(),
    ParseTimeCB(),
    ReshapeLongToWide(),
    EncodeTimeCB(cfg()),
    SanitizeLonLatCB()
    ])

# print(tfm()['sediment'])
df_debug = tfm()['sediment']

## Encode to NetCDF

In [None]:
dfs = load_data(df, ref_id)
tfm = Transformer(dfs, cbs=[
    RemapRdnNameCB(),
    RenameColumnCB(),
    DropNAColumnsCB(),
    SanitizeDetectionLimitCB(),
    ParseTimeCB(),
    ReshapeLongToWide(),
    EncodeTimeCB(cfg()),
    SanitizeLonLatCB()
    ])

dfs_tfm = tfm()

In [None]:
tfm.logs

['Remap to MARIS radionuclide names.',
 'Renaming variables to MARIS standard names.',
 "Drop variable containing only NaN or 'Not available' (id=0 in MARIS lookup tables).",
 'Assign Detection Limit name to its id based on MARIS nomenclature.',
 'Convert data from long to wide with renamed columns.',
 'Encode time as `int` representing seconds since xxx',
 'Drop row when both longitude & latitude equal 0.']

In [None]:
#| export
kw = ['oceanography', 'Earth Science > Oceans > Ocean Chemistry> Radionuclides',
      'Earth Science > Human Dimensions > Environmental Impacts > Nuclear Radiation Exposure',
      'Earth Science > Oceans > Ocean Chemistry > Ocean Tracers, Earth Science > Oceans > Marine Sediments',
      'Earth Science > Oceans > Ocean Chemistry, Earth Science > Oceans > Sea Ice > Isotopes',
      'Earth Science > Oceans > Water Quality > Ocean Contaminants',
      'Earth Science > Biological Classification > Animals/Vertebrates > Fish',
      'Earth Science > Biosphere > Ecosystems > Marine Ecosystems',
      'Earth Science > Biological Classification > Animals/Invertebrates > Mollusks',
      'Earth Science > Biological Classification > Animals/Invertebrates > Arthropods > Crustaceans',
      'Earth Science > Biological Classification > Plants > Macroalgae (Seaweeds)']

In [None]:
#| export
def get_attrs(tfm, zotero_key='26VMZZ2Q', kw=kw):
    return GlobAttrsFeeder(tfm.dfs, cbs=[
        BboxCB(),
        DepthRangeCB(),
        TimeRangeCB(cfg()),
        ZoteroCB(zotero_key, cfg=cfg()),
        KeyValuePairCB('keywords', ', '.join(kw)),
        KeyValuePairCB('publisher_postprocess_logs', ', '.join(tfm.logs))
        ])()

In [None]:
get_attrs(tfm, zotero_key='3W354SQG', kw=kw)

{'geospatial_lat_min': '-58.331388888888895',
 'geospatial_lat_max': '-62.084722222222226',
 'geospatial_lon_min': '-94.2761111111111',
 'geospatial_lon_max': '-62.2225',
 'geospatial_bounds': 'POLYGON ((-94.2761111111111 -62.2225, -58.331388888888895 -62.2225, -58.331388888888895 -62.084722222222226, -94.2761111111111 -62.084722222222226, -94.2761111111111 -62.2225))',
 'time_coverage_start': '2003-01-31T00:00:00',
 'time_coverage_end': '2003-01-31T00:00:00',
 'title': 'Radioactivity Monitoring of the Irish Marine Environment 1991 and 1992',
 'summary': '',
 'creator_name': '[{"creatorType": "author", "firstName": "A.", "lastName": "McGarry"}, {"creatorType": "author", "firstName": "S.", "lastName": "Lyons"}, {"creatorType": "author", "firstName": "C.", "lastName": "McEnri"}, {"creatorType": "author", "firstName": "T.", "lastName": "Ryan"}, {"creatorType": "author", "firstName": "M.", "lastName": "O\'Colmain"}, {"creatorType": "author", "firstName": "J.D.", "lastName": "Cunningham"}]'

In [None]:
#| export
def encode(fname_in, fname_out, nc_tpl_path, **kwargs):
    df = load_dump(fname_in)
    ref_ids = kwargs.get('ref_ids', df.ref_id.unique())
    print('Encoding ...')
    for ref_id in tqdm(ref_ids, leave=False):
        dfs = load_data(df, ref_id)
        print(get_fname(dfs))
        tfm = Transformer(dfs, cbs=[
            RemapRdnNameCB(),
            RenameColumnCB(),
            DropNAColumnsCB(),
            SanitizeDetectionLimitCB(),
            ParseTimeCB(),
            ReshapeLongToWide(),
            EncodeTimeCB(cfg()),
            SanitizeLonLatCB()
            ])
        
        # species_lut = get_maris_species(fname_in, 'species_helcom.pkl')
        # enums_xtra = {
        #     'species_t': {info['name']: info['id'] 
        #                   for info in species_lut.values() if info['name'] != ''}
        # }
        encoder = NetCDFEncoder(tfm(), 
                                src_fname=nc_tpl_path,
                                dest_fname=Path(fname_out) / get_fname(dfs), 
                                global_attrs=get_attrs(tfm, zotero_key=get_zotero_key(dfs), kw=kw),
                                verbose=kwargs.get('verbose', False)
                                # enums_xtra=enums_xtra
                                )
        encoder.encode()

### Single dataset

In [None]:
nc_tpl_path()

Path('/Users/franckalbinet/.marisco/maris-template.nc')

In [None]:
ref_id = 174
encode(fname_in, dir_dest, nc_tpl_path(), verbose=True, ref_ids=[ref_id])

Encoding ...


  0%|          | 0/1 [00:00<?, ?it/s]

174-Hurtado-Bermúdez-et-al-2018.nc


                                     

Group: biota, Variable: sample
Group: biota, Variable: lon
Group: biota, Variable: lat
Group: biota, Variable: depth
Group: biota, Variable: time
Group: biota, Variable: area
Group: biota, Variable: bio_group
Group: biota, Variable: species




ValueError: trying to assign illegal value to Enum variable

### All datasets

In [None]:
encode(fname_in, dir_dest, nc_tpl_path(), verbose=False)

Encoding ...


  0%|          | 0/462 [00:00<?, ?it/s]

374-Ostlund-et-al-1987.nc


  0%|          | 1/462 [00:04<31:05,  4.05s/it]

401-Olsen-et-al-2016.nc


  0%|          | 2/462 [00:06<22:26,  2.93s/it]

402-CCHDO-2018.nc


  1%|          | 3/462 [00:10<26:01,  3.40s/it]

30-Östlund-and-Grall-1991.nc


  1%|          | 4/462 [00:11<20:47,  2.72s/it]

99-Aoyama-and-Hirose-2004.nc


  1%|          | 5/462 [00:16<26:40,  3.50s/it]

106-Yamada-et-al-2006.nc


  1%|▏         | 6/462 [00:18<21:29,  2.83s/it]

182-Urban-et-al-2015.nc


  2%|▏         | 7/462 [00:19<18:35,  2.45s/it]

183-Bokor-et-al-2016.nc


  2%|▏         | 8/462 [00:21<16:44,  2.21s/it]

226-Sdraulig-2018.nc


  2%|▏         | 9/462 [00:23<15:34,  2.06s/it]

323-Johansen-et-al-2019.nc


  2%|▏         | 10/462 [00:25<15:06,  2.01s/it]

568-Johansen-2020.nc


  2%|▏         | 11/462 [00:27<14:46,  1.97s/it]

395-Bailly-du-Bois-et-al-2020.nc


  3%|▎         | 12/462 [00:37<33:23,  4.45s/it]

400-Boyer-et-al-2013.nc


  3%|▎         | 13/462 [00:39<27:54,  3.73s/it]

97-ASPAMARD-2004.nc


  3%|▎         | 14/462 [00:41<24:49,  3.33s/it]

358-Kall-et-al-2014.nc


  3%|▎         | 15/462 [00:43<21:05,  2.83s/it]

443-Heyraud-et-al-1994.nc


  3%|▎         | 16/462 [00:45<18:21,  2.47s/it]

508-Lee-et-al-2018.nc


  4%|▎         | 17/462 [00:46<16:13,  2.19s/it]

509-Johansen-et-al-2015.nc


  4%|▍         | 18/462 [00:48<15:28,  2.09s/it]

16-Cherry-and-Heyraud-1981.nc


  4%|▍         | 19/462 [00:49<14:17,  1.94s/it]

18-Cherry-and-Heyraud-1982.nc


  4%|▍         | 20/462 [00:51<13:27,  1.83s/it]

190-Schlitzer-et-al-2018.nc


  5%|▍         | 21/462 [00:57<22:12,  3.02s/it]

191-OSPAR-Comissions-Radioactive-Substances-Committee-(RSC)-2018.nc


  5%|▍         | 22/462 [01:02<26:29,  3.61s/it]

199-Skjerdal-et-al-2020.nc


  5%|▍         | 23/462 [01:04<22:28,  3.07s/it]

200-Zaborska-et-al-2010.nc


  5%|▌         | 24/462 [01:05<19:25,  2.66s/it]

381-Smith-2020.nc


  5%|▌         | 25/462 [01:08<18:23,  2.53s/it]

720-Payne-et-al-2024.nc
Item TBC does not exist in Zotero library


  6%|▌         | 26/462 [01:09<16:15,  2.24s/it]

432-Efurd-et-al-1997.nc


  6%|▌         | 27/462 [01:11<16:19,  2.25s/it]

685-Chamizo-et-al-2021.nc


  6%|▌         | 28/462 [01:13<14:59,  2.07s/it]

380-Smith-et-al-2020.nc


  6%|▋         | 29/462 [01:15<13:48,  1.91s/it]

718-Smith-2024.nc


  6%|▋         | 30/462 [01:16<13:31,  1.88s/it]

122-Casacuberta-et-al-2018.nc


  7%|▋         | 31/462 [01:18<13:05,  1.82s/it]

477-Valette-Silver-et-al-1999.nc


  7%|▋         | 32/462 [01:20<13:20,  1.86s/it]

222-Huang-et-al-2019.nc


  7%|▋         | 33/462 [01:22<12:40,  1.77s/it]

103-RADNOR-2010.nc


  7%|▋         | 34/462 [01:23<12:43,  1.78s/it]

409-Cherry-et-al-1987.nc


  8%|▊         | 35/462 [01:25<12:35,  1.77s/it]

201-Mietelski-et-al-2008.nc


  8%|▊         | 36/462 [01:27<12:15,  1.73s/it]

570-Szufa-2020.nc


  8%|▊         | 37/462 [01:28<12:00,  1.70s/it]

571-Szufa-2018.nc


  8%|▊         | 38/462 [01:30<11:39,  1.65s/it]

109-Gulin-and-Stokozov-2005.nc


  8%|▊         | 39/462 [01:32<11:49,  1.68s/it]

712-Fávaro-et-al-2012.nc


  9%|▊         | 40/462 [01:34<12:02,  1.71s/it]

130-Wada-et-al-2016.nc


  9%|▉         | 41/462 [01:41<24:32,  3.50s/it]

132-Fukushima-Prefectural-Federation-of-Fisheries-Co-operative-Associations-2012.nc


  9%|▉         | 42/462 [01:44<23:51,  3.41s/it]

121-TEPCO---Tokyo-Electric-Power-Company-2011.nc


  9%|▉         | 43/462 [02:01<51:46,  7.41s/it]

225-Takata-et-al-2016.nc


 10%|▉         | 44/462 [02:03<40:18,  5.79s/it]

141-TEPCO---Tokyo-Electric-Power-Company-2011.nc


 10%|▉         | 45/462 [02:05<32:27,  4.67s/it]

234-Aoyama-et-al-2013.nc


 10%|▉         | 46/462 [02:07<26:05,  3.76s/it]

237-Takata-et-al-2018.nc


 10%|█         | 47/462 [02:09<22:11,  3.21s/it]

143-Fukushima-Prefecture-2011.nc


 10%|█         | 48/462 [02:11<19:32,  2.83s/it]

156-Yoshida-et-al-2015.nc


 11%|█         | 49/462 [02:12<16:58,  2.47s/it]

157-Buesseler-2018.nc


 11%|█         | 50/462 [02:14<15:50,  2.31s/it]

159-Buesseler-2018.nc


 11%|█         | 51/462 [02:16<14:17,  2.09s/it]

178-Pham-et-al-2016.nc


 11%|█▏        | 52/462 [02:17<13:16,  1.94s/it]

193-Kusakabe-and-Takata-2020.nc


 11%|█▏        | 53/462 [02:19<12:50,  1.88s/it]

194-Tateda-and-Misonou-1990.nc


 12%|█▏        | 54/462 [02:21<12:05,  1.78s/it]

195-Tateda-and-Koyanagi-1994.nc


 12%|█▏        | 55/462 [02:22<11:40,  1.72s/it]

202-MERI-2015.nc


 12%|█▏        | 56/462 [02:24<12:31,  1.85s/it]

205-MERI-2018.nc


 12%|█▏        | 57/462 [02:27<13:45,  2.04s/it]

224-Takata-et-al-2019.nc


 13%|█▎        | 58/462 [02:29<14:07,  2.10s/it]

129-Wada-et-al-2016.nc


 13%|█▎        | 59/462 [02:31<14:30,  2.16s/it]

113-Kitamura-et-al-2013.nc


 13%|█▎        | 60/462 [02:33<13:19,  1.99s/it]

114-Charette-et-al-2013.nc


 13%|█▎        | 61/462 [02:35<12:32,  1.88s/it]

149-JAEA---Japan-Atomic-Energy-Agency-2013.nc


 13%|█▎        | 62/462 [02:37<12:42,  1.91s/it]

155-Buesseler-et-al-2012.nc


 14%|█▎        | 63/462 [02:38<12:21,  1.86s/it]

131-Ibaraki-Prefecture-2011.nc


 14%|█▍        | 64/462 [02:42<15:52,  2.39s/it]

158-Buesseler-2018.nc


 14%|█▍        | 65/462 [02:44<14:10,  2.14s/it]

127-Fukushima-Prefecture-2013.nc


 14%|█▍        | 66/462 [02:45<13:13,  2.00s/it]

147-MOE---Ministry-of-the-Environment-2012.nc


 15%|█▍        | 67/462 [02:47<13:20,  2.03s/it]

148-JAEA---Japan-Atomic-Energy-Agency-2013.nc


 15%|█▍        | 68/462 [02:49<12:20,  1.88s/it]

118-Kaeriyama-et-al-2013.nc


 15%|█▍        | 69/462 [02:51<11:49,  1.81s/it]

119-MEXT---Ministry-of-Education-Culture-Sports-Science-and-Technology-2011.nc


 15%|█▌        | 70/462 [02:53<12:11,  1.87s/it]

117-Oikawa-et-al-2013.nc


 15%|█▌        | 71/462 [02:54<12:15,  1.88s/it]

115-Casacuberta-et-al-2013.nc


 16%|█▌        | 72/462 [02:56<11:56,  1.84s/it]

116-Suzuki-et-al-2013.nc


 16%|█▌        | 73/462 [02:58<11:22,  1.75s/it]

120-NRA---Nuclear-Regulation-Authority-2013.nc


 16%|█▌        | 74/462 [03:00<11:43,  1.81s/it]

126-Fukushima-Prefecture-2011.nc


 16%|█▌        | 75/462 [03:02<12:42,  1.97s/it]

133-Fisheries-Agency---Ministry-of-Agriculture-Forestry-and-Fisheries-2011.nc


 16%|█▋        | 76/462 [03:04<11:58,  1.86s/it]

137-JCG---Japan-Coast-Guard-2011.nc


 17%|█▋        | 77/462 [03:05<11:14,  1.75s/it]

135-Japan-Fisheries-Research-and-Education-Agency-2015.nc


 17%|█▋        | 78/462 [03:07<11:51,  1.85s/it]

124-MOE---Ministry-of-the-Environment-2011.nc


 17%|█▋        | 79/462 [03:09<11:26,  1.79s/it]

142-NRA---Nuclear-Regulation-Authority-2013.nc


 17%|█▋        | 80/462 [03:11<11:09,  1.75s/it]

123-Fukushima-Prefecture-2013.nc


 18%|█▊        | 81/462 [03:12<11:07,  1.75s/it]

139-MEXT---Ministry-of-Education-Culture-Sports-Science-and-Technology-2011.nc


 18%|█▊        | 82/462 [03:14<10:42,  1.69s/it]

144-Fukushima-Prefecture-2013.nc


 18%|█▊        | 83/462 [03:16<10:43,  1.70s/it]

145-MOE---Ministry-of-the-Environment-2011.nc


 18%|█▊        | 84/462 [03:17<10:45,  1.71s/it]

146-MEXT---Ministry-of-Education-Culture-Sports-Science-and-Technology-2011.nc


 18%|█▊        | 85/462 [03:19<10:56,  1.74s/it]

680-TEPCO---Tokyo-Electric-Power-Company-2021.nc


 19%|█▊        | 86/462 [03:22<12:44,  2.03s/it]

681-NRA---Nuclear-Regulation-Authority-2021.nc


 19%|█▉        | 87/462 [03:24<13:18,  2.13s/it]

682-NRA---Nuclear-Regulation-Authority-2021.nc


 19%|█▉        | 88/462 [03:26<13:10,  2.11s/it]

427-Yamamoto-et-al-1994.nc


 19%|█▉        | 89/462 [03:28<12:39,  2.04s/it]

428-Yu-et-al-2015.nc


 19%|█▉        | 90/462 [03:30<11:54,  1.92s/it]

445-Hoffman-et-al-1974.nc


 20%|█▉        | 91/462 [03:31<11:12,  1.81s/it]

232-Honda-et-al-2012.nc


 20%|█▉        | 92/462 [03:33<11:27,  1.86s/it]

233-Aoyama-et-al-2013.nc


 20%|██        | 93/462 [03:35<10:58,  1.78s/it]

683-NRA---Nuclear-Regulation-Authority-2021.nc


 20%|██        | 94/462 [03:37<10:45,  1.76s/it]

687-TEPCO---Tokyo-Electric-Power-Company-2021.nc


 21%|██        | 95/462 [03:38<10:52,  1.78s/it]

717-Smith-et-al-2017.nc


 21%|██        | 96/462 [03:40<10:54,  1.79s/it]

688-NRA---Nuclear-Regulation-Authority-2021.nc


 21%|██        | 97/462 [03:42<11:38,  1.91s/it]

689-NRA---Nuclear-Regulation-Authority-2021.nc


 21%|██        | 98/462 [03:44<11:26,  1.89s/it]

479-Morita-2010.nc


 21%|██▏       | 99/462 [03:47<12:05,  2.00s/it]

511-Zeng-2017.nc


 22%|██▏       | 100/462 [03:48<11:30,  1.91s/it]

516-Madigan-and-Fisher-2013.nc


 22%|██▏       | 101/462 [03:50<10:49,  1.80s/it]

517-Morita-et-al-2007.nc


 22%|██▏       | 102/462 [03:51<10:27,  1.74s/it]

550-Sohtome-2014.nc


 22%|██▏       | 103/462 [03:53<10:10,  1.70s/it]

679-TEPCO---Tokyo-Electric-Power-Company-2021.nc


 23%|██▎       | 104/462 [03:55<11:22,  1.91s/it]

22-Fowler-et-al-1983.nc


 23%|██▎       | 105/462 [03:57<11:13,  1.89s/it]

56-Kaeriyama-et-al-2014.nc


 23%|██▎       | 106/462 [03:59<10:39,  1.80s/it]

572-Aono-et-al-2000.nc


 23%|██▎       | 107/462 [04:00<10:20,  1.75s/it]

134-MOE---Ministry-of-the-Environment-2012.nc


 23%|██▎       | 108/462 [04:03<11:03,  1.88s/it]

204-MERI-2017.nc


 24%|██▎       | 109/462 [04:05<11:39,  1.98s/it]

206-MERI-2019.nc


 24%|██▍       | 110/462 [04:07<12:17,  2.10s/it]

207-MERI-2020.nc


 24%|██▍       | 111/462 [04:09<12:26,  2.13s/it]

125-MOE---Ministry-of-the-Environment-2012.nc


 24%|██▍       | 112/462 [04:11<11:25,  1.96s/it]

440-Yamada-and-Nagaya-2000.nc


 24%|██▍       | 113/462 [04:12<10:34,  1.82s/it]

512-Takahashi-et-al-2015.nc


 25%|██▍       | 114/462 [04:14<10:12,  1.76s/it]

513-Madigan-2012.nc


 25%|██▍       | 115/462 [04:16<09:50,  1.70s/it]

223-Buesseler-et-al-2018.nc


 25%|██▌       | 116/462 [04:17<09:56,  1.73s/it]

436-Nakamura-et-al-2015.nc


 25%|██▌       | 117/462 [04:19<09:47,  1.70s/it]

474-Madigan-et-al-2017.nc


 26%|██▌       | 118/462 [04:21<09:42,  1.69s/it]

153-Yu-et-al-2018.nc


 26%|██▌       | 119/462 [04:22<09:32,  1.67s/it]

203-MERI-2016.nc


 26%|██▌       | 120/462 [04:24<10:19,  1.81s/it]

430-Baumann-et-al-2015.nc


 26%|██▌       | 121/462 [04:26<09:52,  1.74s/it]

433-Smith-and-Towler-1993.nc


 26%|██▋       | 122/462 [04:28<09:36,  1.70s/it]

446-Valette-Silver-and-Lauenstein-1995.nc


 27%|██▋       | 123/462 [04:30<09:52,  1.75s/it]

514-Miki-et-al-2016.nc


 27%|██▋       | 124/462 [04:31<10:00,  1.78s/it]

549-Shigeoka-et-al-2019.nc


 27%|██▋       | 125/462 [04:33<10:12,  1.82s/it]

520-Baumann-et-al-2013.nc


 27%|██▋       | 126/462 [04:35<09:49,  1.75s/it]

542-Ruelas-Inzunza-et-al-2012.nc


 27%|██▋       | 127/462 [04:36<09:27,  1.70s/it]

546-Takagi-et-al-2015.nc


 28%|██▊       | 128/462 [04:38<09:21,  1.68s/it]

547-Ruelas-Inzunza-2014.nc


 28%|██▊       | 129/462 [04:40<09:08,  1.65s/it]

548-Suchanek-et-al-1996.nc


 28%|██▊       | 130/462 [04:41<08:58,  1.62s/it]

128-NRA---Nuclear-Regulation-Authority-2013.nc


 28%|██▊       | 131/462 [04:43<08:54,  1.61s/it]

684-NRA---Nuclear-Regulation-Authority-2021.nc


 29%|██▊       | 132/462 [04:44<08:49,  1.60s/it]

196-Tateda-and-Koyanagi-1996.nc


 29%|██▉       | 133/462 [04:47<09:43,  1.77s/it]

476-Ruelas-Inzunza-et-al-2012.nc


 29%|██▉       | 134/462 [04:48<09:23,  1.72s/it]

478-Yamada-et-al-1999.nc


 29%|██▉       | 135/462 [04:50<09:27,  1.74s/it]

404-Azouz-and-Dulai-2017.nc


 29%|██▉       | 136/462 [04:51<09:10,  1.69s/it]

518-Morita-et-al-2010.nc


 30%|██▉       | 137/462 [04:53<08:57,  1.65s/it]

140-MEXT---Ministry-of-Education-Culture-Sports-Science-and-Technology-2011.nc


 30%|██▉       | 138/462 [04:55<08:44,  1.62s/it]

322-Département-de-Suivi-des-Centres-dExpérimentations-Nucléaires-(DSCEN)-2018.nc


 30%|███       | 139/462 [04:56<09:08,  1.70s/it]

321-Département-de-Suivi-des-Centres-dExpérimentations-Nucléaires-(DSCEN)-2011.nc


 30%|███       | 140/462 [04:58<09:20,  1.74s/it]

282-Pearson-et-al-2016.nc


 31%|███       | 141/462 [05:00<08:59,  1.68s/it]

279-Pearson-et-al-2016.nc


 31%|███       | 142/462 [05:01<08:48,  1.65s/it]

169-Villa-Alfageme-et-al-2019.nc


 31%|███       | 143/462 [05:03<08:34,  1.61s/it]

411-Guy-et-al-2020.nc


 31%|███       | 144/462 [05:05<08:36,  1.62s/it]

467-Poletiko-et-al-1994.nc


 31%|███▏      | 145/462 [05:06<08:37,  1.63s/it]

569-Bellamy-and-Hunter-1997.nc


 32%|███▏      | 146/462 [05:08<08:28,  1.61s/it]

466-Jeffree-et-al-1997.nc


 32%|███▏      | 147/462 [05:09<08:24,  1.60s/it]

396-Ostlund-nd.nc


 32%|███▏      | 148/462 [05:11<08:32,  1.63s/it]

308-Jalili-et-al-2009.nc


 32%|███▏      | 149/462 [05:13<08:26,  1.62s/it]

371-CNESTEN-2020.nc


 32%|███▏      | 150/462 [05:14<08:26,  1.62s/it]

397-Ostlund-and-Grall-1987.nc


 33%|███▎      | 151/462 [05:16<08:30,  1.64s/it]

399-Andrié-et-al-1988.nc


 33%|███▎      | 152/462 [05:18<08:24,  1.63s/it]

303-Carvalho-et-al-2011.nc


 33%|███▎      | 153/462 [05:19<08:25,  1.64s/it]

352-Hamid-et-al-2010.nc


 33%|███▎      | 154/462 [05:21<08:17,  1.62s/it]

357-Arnedo-et-al-2013.nc


 34%|███▎      | 155/462 [05:22<08:10,  1.60s/it]

372-CNESTEN-2020.nc


 34%|███▍      | 156/462 [05:24<08:11,  1.61s/it]

188-Thünen-Institute-2018.nc


 34%|███▍      | 157/462 [05:36<23:54,  4.70s/it]

408-Carvalho-2011.nc


 34%|███▍      | 158/462 [05:38<19:56,  3.94s/it]

431-Malta-and-Carvalho-2011.nc


 34%|███▍      | 159/462 [05:40<16:22,  3.24s/it]

465-Tejera-et-al-2019.nc


 35%|███▍      | 160/462 [05:41<13:48,  2.74s/it]

84-MAFF-(now-Cefas)-2004.nc


 35%|███▍      | 161/462 [05:45<15:43,  3.13s/it]

25-Nies-1989.nc


 35%|███▌      | 162/462 [05:47<13:17,  2.66s/it]

31-Aarkrog-et-al-1992.nc


 35%|███▌      | 163/462 [05:48<11:34,  2.32s/it]

41-Aarkrog-et-al-1994.nc


 35%|███▌      | 164/462 [05:50<10:23,  2.09s/it]

419-Kanisch-and-Aust-2013.nc


 36%|███▌      | 165/462 [05:52<09:46,  1.97s/it]

448-HEYRAUD-et-al-1988.nc


 36%|███▌      | 166/462 [05:53<09:23,  1.90s/it]

296-Carvalho-et-al-2011.nc


 36%|███▌      | 167/462 [05:55<08:58,  1.83s/it]

301-Fonollosa-et-al-2015.nc


 36%|███▋      | 168/462 [05:57<08:37,  1.76s/it]

339-Benkdad-et-al-2011.nc


 37%|███▋      | 169/462 [05:58<08:32,  1.75s/it]

341-CNESTEN-2019.nc


 37%|███▋      | 170/462 [06:00<08:21,  1.72s/it]

161-Gómez-Guzmán-et-al-2013.nc


 37%|███▋      | 171/462 [06:02<08:54,  1.84s/it]

168-Vivo-Vilches-et-al-2018.nc


 37%|███▋      | 172/462 [06:04<08:42,  1.80s/it]

174-Hurtado-Bermúdez-et-al-2018.nc


                                                 

ValueError: trying to assign illegal value to Enum variable