In [None]:
#| default_exp inout

# Input/Output
> Files reader and writer.

In [None]:
#| export
import tomli_w
import tomli
from typing import Dict, Any


In [None]:
#| exports
def write_toml(fname: str, cfg: Dict[str, Any]):
    "Write a TOML file from a dictionary."
    none_keys = [k for k, v in flatten_dict(cfg).items() if v is None]
    if none_keys:
        print(f"Warning: The following config keys have None values: {', '.join(none_keys)}")
        
    print(f'Creating {fname}')
    with open(fname, "wb") as f:
        tomli_w.dump(cfg, f)

In [None]:
#| exports

def flatten_dict(d: Dict[str, Any], parent_key: str = '', sep: str = '.') -> Dict[str, Any]:
    """Flatten a nested dictionary."""
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

In [None]:
#| exports
def read_toml(fname):
    "Read a TOML file into a dictionary."
    with open(fname, "rb") as f:
        config = tomli.load(f)
    return config

In [None]:
from marisco.configs import get_time_units

In [None]:
from netCDF4 import Dataset
import pandas as pd
import numpy as np

def nc_to_dfs(
    fname: str # Path to NetCDF file
    ) -> dict: # Dictionary with group names as keys and pandas DataFrames as values
    "Convert a NetCDF (with groups) file to a dictionary of dataframes."
    dfs = {}
    
    with Dataset(fname, 'r') as nc:
        # Process each group in the NetCDF file
        for group_name in nc.groups:
            group = nc.groups[group_name]
            
            # Get all variables in the group
            data = {}
            for var_name in group.variables:
                # Skip dimension variables (like 'id')
                if var_name not in group.dimensions:
                    data[var_name] = group.variables[var_name][:]
            
            # Convert to DataFrame
            df = pd.DataFrame(data)
            
            # Convert time from seconds since epoch if present
            if 'time' in df.columns:
                df['time'] = pd.to_datetime(df['time'], unit='s')
                
            dfs[group_name.upper()] = df
    
    return dfs

In [None]:
from pathlib import Path
# Example usage:
fname = Path('../../_data/output/dump/100-HELCOM-MORS-2018.nc')
dfs = nc_to_dfs(fname)

In [None]:
dfs.keys()

dict_keys(['BIOTA', 'SEAWATER', 'SEDIMENT'])

In [None]:
dfs['BIOTA'].shape

(61535, 17)