# Bailly du Bois et al., IRSN, 2012


> NetCDF handler for ["Bailly du Bois, Pascal; Dumas, F; Solier, L; Voiseux, C (2011): DISPRO datasets for validation of coastal hydrodynamic models. PANGAEA"](https://doi.org/10.1594/PANGAEA.762253).

## Packages import

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import numpy as np

from netCDF4 import Dataset
from datetime import datetime, timedelta
from cftime import num2date, date2num

#from marisco.utils import has_valid_varname
from marisco.serializers import to_netcdf
from datetime import datetime
#import re

## Load `tab` file

The data is provided as a tabulation separated file

In [None]:
path = '../../_data/Bailly-du-Bois_2011-dispro/datasets/dispro-tritium.tab'

In [None]:
df = pd.read_csv(path, skiprows=23, delimiter='\t'); df.head()

## Data preparation

In [None]:
cols_of_interest = ['Depth water [m]', 'Date/Time', 'Longitude', 'Latitude', '3H [Bq/l]', '3H std dev [±]']
df = df[cols_of_interest]

In [None]:
# Rename columns
df.columns = ['depth', 'time', 'lon', 'lat', 'h3', 'h3_unc']

In [None]:
# Convert time to datetime type 
df.time = pd.to_datetime(df['time'], infer_datetime_format=True)

In [None]:
# Encoding time as seconds since ...
format_time = lambda x: date2num(x, units="seconds since 1970-01-01 00:00:00.0")
df['time'] = df['time'].apply(format_time)

In [None]:
df.head()

## Encode as NetCDF

In [None]:
CONFIGS = {
    'global_attr': {
        'description': 'Bailly du Bois, IRSN dataset ...',
        'keyword': '3H seawater radionuclides ...',
        'license': 'tbd',
    }
}

In [None]:
def units_fn(grp_name,
             rdn_name):
    if grp_name == 'seawater':
        if '_unc' in rdn_name:
            return '%' # to be confirmed
        else:
            return 'Bq/l'

In [None]:
fname_cdl = '../../_data/output/maris-cdl.nc'
fname_output = f"../../_data/output/bailly-du-bois-2012.nc"
dfs = {'seawater': df}

to_netcdf(dfs, fname_cdl, fname_output, CONFIGS, units_fn)