In [None]:
#| default_exp decoders

# Decoders
> Various utilities to decode MARIS dataset from `NetCDF`.

In [None]:
#| hide
%load_ext autoreload
%autoreload 2

In [None]:
#| export
from pathlib import Path
from netCDF4 import Dataset
import pandas as pd
import numpy as np
from fastcore.basics import patch, store_attr
import fastcore.all as fc

from marisco.configs import (
    NC_DTYPES, 
    NC_VARS, 
    NC_DIM,
    NC_GROUPS,
    lut_path, 
    Enums,
    nc_tpl_path,
    get_time_units
)

In [None]:
#| exports
def nc_to_dfs(
    fname: str # Path to NetCDF file
    ) -> dict: # Dictionary with group names as keys and pandas DataFrames as values
    "Convert a NetCDF (with groups) file to a dictionary of dataframes."
    dfs = {}
    
    with Dataset(fname, 'r') as nc:
        # Process each group in the NetCDF file
        for group_name in nc.groups:
            group = nc.groups[group_name]
            
            # Get all variables in the group
            data = {}
            for var_name in group.variables:
                # Skip dimension variables (like 'id')
                if var_name not in group.dimensions:
                    data[var_name] = group.variables[var_name][:]
            
            # Convert to DataFrame
            df = pd.DataFrame(data)
            
            # Convert time from seconds since epoch if present
            if 'time' in df.columns:
                df['time'] = pd.to_datetime(df['time'], unit='s')
                
            dfs[group_name.upper()] = df
    
    return dfs

Example usage:

In [None]:
#| eval: false
# fname = Path('../files/nc/encoding-test.nc')
# fname = Path('../../_data/output/dump/100-HELCOM-MORS-2018.nc')
fname = Path('../../_data/output/190-geotraces-2021.nc')

dfs = nc_to_dfs(fname)

for grp, df in dfs.items():
    print('group:', grp)
    print(f'shape: {df.shape}')
    print(df.head(), '\n')

group: SEAWATER
shape: (19139, 11)
          lon        lat   smp_depth  tot_depth                time  smp_id  \
0  170.337921  38.327099   17.799999     2827.0 2010-10-17 00:13:29  842525   
1  170.337921  38.327099   34.700001     2827.0 2010-10-17 00:13:29  842528   
2  170.337921  38.327099   67.500000     2827.0 2010-10-17 00:13:29  842531   
3  170.337921  38.327099   91.900002     2827.0 2010-10-17 00:13:29  842534   
4  170.337921  38.327099  136.600006     2827.0 2010-10-17 00:13:29  842540   

   nuclide  value  unit  filt  samp_met  
0        1  0.733     7     1         1  
1        1  0.696     7     1         1  
2        1  0.718     7     1         1  
3        1  0.709     7     1         1  
4        1  0.692     7     1         1   

group: SUSPENDED_MATTER
shape: (7606, 11)
          lon        lat   smp_depth  tot_depth                time  smp_id  \
0 -171.006165 -42.341301   24.799999     4569.0 2008-02-13 21:05:05  740882   
1 -171.006165 -42.341301  100.199997