# Bailly du Bois et al., IRSN, 2012


> NetCDF handler for ["Bailly du Bois, Pascal; Dumas, F; Solier, L; Voiseux, C (2011): DISPRO datasets for validation of coastal hydrodynamic models. PANGAEA"](https://doi.org/10.1594/PANGAEA.762253).

## Packages import

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import numpy as np

from netCDF4 import Dataset
from datetime import datetime, timedelta
from cftime import num2date, date2num

#from marisco.utils import has_valid_varname
from marisco.serializers import to_netcdf
from datetime import datetime
#import re

## Load `tab` file

The data is provided as a tabulation separated file

In [None]:
path = '../../_data/Bailly-du-Bois_2011-dispro/datasets/dispro-tritium.tab'

In [None]:
df = pd.read_csv(path, skiprows=23, delimiter='\t'); df.head()

Unnamed: 0,Depth water [m],Sample ID,Date/Time,Longitude,Latitude,3H [Bq/l],3H std dev [±],Expedition,ID (Release ID),ID (Transect number),ID (Crossing plume (1/0))
0,2.0,1,2002-08-27T10:24:00,-1.949,49.750917,3.17,4.31,Dsp08_0001,0,0,0
1,2.0,2,2002-08-27T10:30:00,-1.9494,49.751333,2.54,4.26,Dsp08_0002,0,0,0
2,2.0,3,2002-08-27T10:40:00,-1.949267,49.751267,0.85,4.16,Dsp08_0003,0,0,0
3,2.0,4,2002-08-27T10:50:00,-1.95055,49.751417,1.06,4.16,Dsp08_0004,0,0,0
4,2.0,5,2002-08-27T11:01:00,-1.96095,49.748967,0.0,2.84,Dsp08_0005,0,0,0


## Data preparation

In [None]:
cols_of_interest = ['Depth water [m]', 'Date/Time', 'Longitude', 'Latitude', '3H [Bq/l]', '3H std dev [±]']
df = df[cols_of_interest]

In [None]:
# Rename columns
df.columns = ['depth', 'time', 'lon', 'lat', 'h3', 'h3_unc']

In [None]:
# Convert time to datetime type 
df.time = pd.to_datetime(df['time'], infer_datetime_format=True)

In [None]:
# Encoding time as seconds since ...
format_time = lambda x: date2num(x, units="seconds since 1970-01-01 00:00:00.0")
df['time'] = df['time'].apply(format_time)

In [None]:
df.head()

Unnamed: 0,depth,time,lon,lat,h3,h3_unc
0,2.0,1030443840,-1.949,49.750917,3.17,4.31
1,2.0,1030444200,-1.9494,49.751333,2.54,4.26
2,2.0,1030444800,-1.949267,49.751267,0.85,4.16
3,2.0,1030445400,-1.95055,49.751417,1.06,4.16
4,2.0,1030446060,-1.96095,49.748967,0.0,2.84


## Encode as NetCDF

In [None]:
CONFIGS = {
    'global_attr': {
        'description': 'Bailly du Bois, IRSN dataset ...',
        'keyword': '3H seawater radionuclides ...',
        'license': 'tbd',
    }
}

In [None]:
def units_fn(grp_name,
             rdn_name):
    if grp_name == 'seawater':
        if '_unc' in rdn_name:
            return '%' # to be confirmed
        else:
            return 'Bq/l'

In [None]:
fname_cdl = '../../_data/output/maris-cdl.nc'
fname_output = f"../../_data/output/bailly-du-bois-2012.nc"
dfs = {'seawater': df}

to_netcdf(dfs, fname_cdl, fname_output, CONFIGS, units_fn)

% of discarded data for grp seawater: 0.0
