# De Bailly
> Quick postprocessing

In [None]:
from io import BytesIO
from urllib.request import urlopen
from zipfile import ZipFile
from fastcore.xtras import L
from pathlib import Path

import pandas as pd

In [None]:
zipurl = 'https://doi.pangaea.de/10.1594/PANGAEA.762253?format=zip'
with urlopen(zipurl) as zipresp:
    with ZipFile(BytesIO(zipresp.read())) as zfile:
        zfile.extractall('./irsn')

In [None]:
src_name = Path('irsn/datasets')

In [None]:
for f in src_name.ls():
    print(f)

irsn/datasets/20030808_StMartin_ADCP.tab
irsn/datasets/20030717_StMartin_ADCP.tab
irsn/datasets/20030614_Omonville_ADCP.tab
irsn/datasets/StMartin_tide-gauge.tab
irsn/datasets/20030809_Vauville_ADCP.tab
irsn/datasets/dispro-drifter_72975.tab
irsn/datasets/20050410_Vauville_ADCP.tab
irsn/datasets/20030810_Flamanville_ADCP.tab
irsn/datasets/20050412_Omonville_ADCP.tab
irsn/datasets/20050411_Flamanville_ADCP.tab
irsn/datasets/dispro-bathymetry.tab
irsn/datasets/20050403_StMartin_ADCP.tab
irsn/datasets/Herqueville_tide-gauge.tab
irsn/datasets/Areva-NC_tritium-release.tab
irsn/datasets/Gaury_tide-gauge.tab
irsn/datasets/20030613_Ecalgrain_ADCP.tab
irsn/datasets/dispro-tritium.tab
irsn/datasets/20050404_Ecalgrain_ADCP.tab
irsn/datasets/dispro-drifter_72878.tab


## Areva Releases

In [None]:
df_release = pd.read_csv('irsn/datasets/Areva-NC_tritium-release.tab', delimiter='\t', skiprows=12)

In [None]:
df_release.head()

Unnamed: 0,Date/time start,Date/time end,Sample ID,A [Bq]
0,1982-01-02T16:00,1982-01-03T02:17,4318V,2900000000000.0
1,1982-01-05T18:50,1982-01-06T08:00,4321V,550000000000.0
2,1982-01-07T18:50,1982-01-08T07:30,4323V,99000000000.0
3,1982-01-12T00:50,1982-01-12T14:00,4325V,4400000000000.0
4,1982-01-15T18:50,1982-01-16T08:00,4332V,9500000000000.0


In [None]:
df_release['A [Bq]']

0       2.900000e+12
1       5.500000e+11
2       9.900000e+10
3       4.400000e+12
4       9.500000e+12
            ...     
7835    3.800000e+13
7836    6.300000e+13
7837    3.500000e+10
7838    3.500000e+13
7839    6.900000e+11
Name: A [Bq], Length: 7840, dtype: float64

In [None]:
df_release.to_csv('./de-bailly-release.csv', index=False)

## ADCPs

In [None]:
paths_adcp = [p for p in src_name.ls() if 'ADCP' in p.name]; paths_adcp

[Path('irsn/datasets/20030808_StMartin_ADCP.tab'),
 Path('irsn/datasets/20030717_StMartin_ADCP.tab'),
 Path('irsn/datasets/20030614_Omonville_ADCP.tab'),
 Path('irsn/datasets/20030809_Vauville_ADCP.tab'),
 Path('irsn/datasets/20050410_Vauville_ADCP.tab'),
 Path('irsn/datasets/20030810_Flamanville_ADCP.tab'),
 Path('irsn/datasets/20050412_Omonville_ADCP.tab'),
 Path('irsn/datasets/20050411_Flamanville_ADCP.tab'),
 Path('irsn/datasets/20050403_StMartin_ADCP.tab'),
 Path('irsn/datasets/20030613_Ecalgrain_ADCP.tab'),
 Path('irsn/datasets/20050404_Ecalgrain_ADCP.tab')]

In [None]:
df_adcps = []
for p in paths_adcp:
    df_adcps.append(pd.read_csv(p, delimiter='\t', skiprows=14))
    
df_adcp = pd.concat(df_adcps)

In [None]:
df_adcp.to_csv('./adcps.csv', index=False)

## Bathymetry

In [None]:
pd.read_csv('irsn/datasets/dispro-bathymetry.tab', delimiter='\t', skiprows=13).to_csv('./bathy.csv', index=False)

## Tritium

In [None]:
pd.read_csv('irsn/datasets/dispro-tritium.tab', delimiter='\t', skiprows=23).to_csv('./tritium.csv', index=False)