# HELCOM dataset
> Importing, cleaning and transforming HELCOM data ([source](https://helcom.fi/about-us))

## Packages import

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
from netCDF4 import Dataset
from datetime import datetime
from cftime import num2date, date2num
import re
import glob
from pathlib import Path

## Data reading

The data is provided as a Microsoft Access `.accdb` database file.

In [None]:
src_dir = '../../_data/accdb/mors/csv'

### Database overview

In [None]:
for file in glob.glob(src_dir + '/*.csv'):
    print(80*'-')
    print(f'{Path(file).name}')
    print(80*'-', '\n')
    df = pd.read_csv(file)
    print(f'Columns: {df.columns.values}\n')
    print(df.info(), '\n')
    print(df.head(), '\n')
    #with open(file, 'rb') as f: 
    #    dumps.append(pickle.load(f))

--------------------------------------------------------------------------------
NUCLIDE.csv
-------------------------------------------------------------------------------- 

Columns: ['NUCLIDE' 'NUCLIDE_NAME' 'CLASSIFICATION' 'WEIGHT']

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 68 entries, 0 to 67
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   NUCLIDE         68 non-null     object 
 1   NUCLIDE_NAME    68 non-null     object 
 2   CLASSIFICATION  10 non-null     object 
 3   WEIGHT          0 non-null      float64
dtypes: float64(1), object(3)
memory usage: 2.2+ KB
None 

  NUCLIDE   NUCLIDE_NAME CLASSIFICATION  WEIGHT
0   AC228   Actinium-228            NaN     NaN
1  AG108M     Silver-108            NaN     NaN
2  AG110M     Silver-110            NaN     NaN
3   AM241  Americium-241            NaN     NaN
4   BA140     Barium-140            NaN     NaN 

----------------------------------------

In [None]:
sed01 = pd.read_csv(Path(src_dir)/'SED01.csv')
sed02 = pd.read_csv(Path(src_dir)/'SED02.csv')

In [None]:
# station/location date of sample (KEY is unique)
sed01

Unnamed: 0,KEY,COUNTRY,LABORATORY,SEQUENCE,DATE,YEAR,MONTH,DAY,STATION,LATITUDE (ddmmmm),...,LOWSLI,AREA,SEDI,OXIC,DW%,LOI%,MORS_SUBBASIN,HELCOM_SUBBASIN,SUM_LINK,DATE_OF_ENTRY
0,SCLOR1984001,67.0,CLOR,1984001,06/03/84 00:00:00,1984,6,3.0,B12,54.200,...,5.0,0.00500,-99.0,,,,2,6,a,
1,SCLOR1984002,67.0,CLOR,1984002,06/03/84 00:00:00,1984,6,3.0,B12,54.200,...,10.0,0.00500,-99.0,,,,2,6,a,
2,SCLOR1984003,67.0,CLOR,1984003,06/03/84 00:00:00,1984,6,3.0,B12,54.200,...,15.0,0.00500,-99.0,,,,2,6,a,
3,SCLOR1984004,67.0,CLOR,1984004,06/03/84 00:00:00,1984,6,3.0,B12,54.200,...,20.0,0.00500,-99.0,,,,2,6,a,
4,SCLOR1984005,67.0,CLOR,1984005,06/03/84 00:00:00,1984,6,3.0,B12,54.200,...,25.0,0.00500,-99.0,,,,2,6,a,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12747,SCLOR2018068,67.0,CLOR,2018068,06/06/18 00:00:00,2018,6,6.0,P39,54.445,...,11.0,0.01178,34.0,A,23.1,,6,6,,03/30/20 00:00:00
12748,SCLOR2018069,67.0,CLOR,2018069,06/06/18 00:00:00,2018,6,6.0,P39,54.445,...,13.0,0.01178,34.0,A,24.2,,6,6,,03/30/20 00:00:00
12749,SCLOR2018070,67.0,CLOR,2018070,06/06/18 00:00:00,2018,6,6.0,P39,54.445,...,15.0,0.01178,34.0,A,25.3,,6,6,,03/30/20 00:00:00
12750,SCLOR2018071,67.0,CLOR,2018071,06/06/18 00:00:00,2018,6,6.0,P39,54.445,...,17.0,0.01178,34.0,A,25.3,,6,6,,03/30/20 00:00:00


In [None]:
sed01['LATITUDE (ddmmmm)']

0        54.200
1        54.200
2        54.200
3        54.200
4        54.200
          ...  
12747    54.445
12748    54.445
12749    54.445
12750    54.445
12751    54.445
Name: LATITUDE (ddmmmm), Length: 12752, dtype: float64

In [None]:
sed01['LATITUDE (dddddd)']

0        54.3333
1        54.3333
2        54.3333
3        54.3333
4        54.3333
          ...   
12747    54.7417
12748    54.7417
12749    54.7417
12750    54.7417
12751    54.7417
Name: LATITUDE (dddddd), Length: 12752, dtype: float64

In [None]:
sed01.LABORATORY.unique()

array(['CLOR', 'DHIG', 'EBRS', 'ERPC', 'KRIL', 'LEPA', 'LVDC', 'LREB',
       'LVEA', 'NCRS', 'RISO', 'SAAS', 'SSSI', 'SSSM', 'STUK'],
      dtype=object)

In [None]:
# 
len(sed01.KEY.unique())

12752

In [None]:
len(sed02.KEY.unique())

12730

In [None]:
sed02[sed02.duplicated(subset='KEY')]

Unnamed: 0,KEY,NUCLIDE,METHOD,< VALUE_Bq/kg,VALUE_Bq/kg,ERROR%_kg,< VALUE_Bq/m²,VALUE_Bq/m²,ERROR%_m²,DATE_OF_ENTRY
186,SKRIL2012048,CS137,,,3.00,33.0,,,,08/20/14 00:00:00
187,SKRIL2012049,CS137,,<,1.00,,,,,08/20/14 00:00:00
188,SKRIL2012050,CS137,,,270.00,21.0,,,,08/20/14 00:00:00
189,SKRIL2012051,CS137,,,420.00,20.0,,,,08/20/14 00:00:00
190,SKRIL2012052,CS137,,,440.00,20.0,,,,08/20/14 00:00:00
...,...,...,...,...,...,...,...,...,...,...
37342,SSTUK2016044,CS137,STUK01,,1.20,12.0,,8.916443,15.0,
37343,SSTUK2016045,CS137,STUK01,,0.79,20.0,,5.992930,23.0,
37344,SSTUK2016050,CS137,STUK01,,512.00,11.0,,2164.945699,14.0,
37345,SSTUK2016051,CS137,STUK01,,527.00,6.3,,2523.279045,9.3,


In [None]:
sed02[sed02.KEY == 'SKRIL2012048']

Unnamed: 0,KEY,NUCLIDE,METHOD,< VALUE_Bq/kg,VALUE_Bq/kg,ERROR%_kg,< VALUE_Bq/m²,VALUE_Bq/m²,ERROR%_m²,DATE_OF_ENTRY
0,SKRIL2012048,RA226,,,35.0,26.0,,,,08/20/14 00:00:00
186,SKRIL2012048,CS137,,,3.0,33.0,,,,08/20/14 00:00:00
562,SKRIL2012048,RA228,,,60.0,20.0,,,,08/20/14 00:00:00
825,SKRIL2012048,K40,,,980.0,20.0,,,,08/20/14 00:00:00


## Load sediments

In [None]:
sediments = pd.merge(pd.read_csv(Path(src_dir)/'SED02.csv'), # measurements
                     pd.read_csv(Path(src_dir)/'SED01.csv'), # sample
                     on='KEY', how='left')

In [None]:
sediments.head()

Unnamed: 0,KEY,NUCLIDE,METHOD,< VALUE_Bq/kg,VALUE_Bq/kg,ERROR%_kg,< VALUE_Bq/m²,VALUE_Bq/m²,ERROR%_m²,DATE_OF_ENTRY_x,...,LOWSLI,AREA,SEDI,OXIC,DW%,LOI%,MORS_SUBBASIN,HELCOM_SUBBASIN,SUM_LINK,DATE_OF_ENTRY_y
0,SKRIL2012048,RA226,,,35.0,26.0,,,,08/20/14 00:00:00,...,20.0,0.006,,,,,11.0,11.0,,08/20/14 00:00:00
1,SKRIL2012049,RA226,,,36.0,22.0,,,,08/20/14 00:00:00,...,27.0,0.006,,,,,11.0,11.0,,08/20/14 00:00:00
2,SKRIL2012050,RA226,,,38.0,24.0,,,,08/20/14 00:00:00,...,2.0,0.006,,,,,11.0,11.0,,08/20/14 00:00:00
3,SKRIL2012051,RA226,,,36.0,25.0,,,,08/20/14 00:00:00,...,4.0,0.006,,,,,11.0,11.0,,08/20/14 00:00:00
4,SKRIL2012052,RA226,,,30.0,23.0,,,,08/20/14 00:00:00,...,6.0,0.006,,,,,11.0,11.0,,08/20/14 00:00:00


In [None]:
sediments.columns

Index(['KEY', 'NUCLIDE', 'METHOD', '< VALUE_Bq/kg', 'VALUE_Bq/kg', 'ERROR%_kg',
       '< VALUE_Bq/m²', 'VALUE_Bq/m²', 'ERROR%_m²', 'DATE_OF_ENTRY_x',
       'COUNTRY', 'LABORATORY', 'SEQUENCE', 'DATE', 'YEAR', 'MONTH', 'DAY',
       'STATION', 'LATITUDE (ddmmmm)', 'LATITUDE (dddddd)',
       'LONGITUDE (ddmmmm)', 'LONGITUDE (dddddd)', 'DEVICE', 'TDEPTH',
       'UPPSLI', 'LOWSLI', 'AREA', 'SEDI', 'OXIC', 'DW%', 'LOI%',
       'MORS_SUBBASIN', 'HELCOM_SUBBASIN', 'SUM_LINK', 'DATE_OF_ENTRY_y'],
      dtype='object')

### Select cols of interest

In [None]:
cols = ['NUCLIDE', 'VALUE_Bq/kg', 'ERROR%_kg', 'DATE',
        'TDEPTH', 'LATITUDE (dddddd)', 'LONGITUDE (dddddd)']

In [None]:
sediments = sediments[cols]; sediments

Unnamed: 0,NUCLIDE,VALUE_Bq/kg,ERROR%_kg,DATE,TDEPTH,LATITUDE (dddddd),LONGITUDE (dddddd)
0,RA226,35.00,26.0,06/17/12 00:00:00,71.0,59.6667,24.0000
1,RA226,36.00,22.0,06/17/12 00:00:00,71.0,59.6667,24.0000
2,RA226,38.00,24.0,08/10/12 00:00:00,23.0,59.8600,28.8433
3,RA226,36.00,25.0,08/10/12 00:00:00,23.0,59.8600,28.8433
4,RA226,30.00,23.0,08/10/12 00:00:00,23.0,59.8600,28.8433
...,...,...,...,...,...,...,...
37342,CS137,1.20,12.0,06/09/16 00:00:00,171.0,59.0360,21.0830
37343,CS137,0.79,20.0,06/09/16 00:00:00,171.0,59.0360,21.0830
37344,CS137,512.00,11.0,05/29/16 00:00:00,131.0,61.0667,19.7297
37345,CS137,527.00,6.3,05/29/16 00:00:00,131.0,61.0667,19.7297


### Parse time

In [None]:
pd.to_datetime(sediments.DATE, infer_datetime_format=True)  

0       2012-06-17
1       2012-06-17
2       2012-08-10
3       2012-08-10
4       2012-08-10
           ...    
37342   2016-06-09
37343   2016-06-09
37344   2016-05-29
37345   2016-05-29
37346   2016-05-29
Name: DATE, Length: 37347, dtype: datetime64[ns]

In [None]:
sediments['time'] = pd.to_datetime(sediments.DATE, infer_datetime_format=True)  

In [None]:
sediments

Unnamed: 0,NUCLIDE,VALUE_Bq/kg,ERROR%_kg,DATE,TDEPTH,LATITUDE (dddddd),LONGITUDE (dddddd),time
0,RA226,35.00,26.0,06/17/12 00:00:00,71.0,59.6667,24.0000,2012-06-17
1,RA226,36.00,22.0,06/17/12 00:00:00,71.0,59.6667,24.0000,2012-06-17
2,RA226,38.00,24.0,08/10/12 00:00:00,23.0,59.8600,28.8433,2012-08-10
3,RA226,36.00,25.0,08/10/12 00:00:00,23.0,59.8600,28.8433,2012-08-10
4,RA226,30.00,23.0,08/10/12 00:00:00,23.0,59.8600,28.8433,2012-08-10
...,...,...,...,...,...,...,...,...
37342,CS137,1.20,12.0,06/09/16 00:00:00,171.0,59.0360,21.0830,2016-06-09
37343,CS137,0.79,20.0,06/09/16 00:00:00,171.0,59.0360,21.0830,2016-06-09
37344,CS137,512.00,11.0,05/29/16 00:00:00,131.0,61.0667,19.7297,2016-05-29
37345,CS137,527.00,6.3,05/29/16 00:00:00,131.0,61.0667,19.7297,2016-05-29


In [None]:
sediments.drop(columns=['DATE'], inplace=True)

### Clean radionuclide types name

In [None]:
'240Pu/239Pu'

'240Pu/239Pu'

In [None]:
pd.DataFrame({'240Pu/239Pu': [1,2,3], '240Pu/239Pu_err': [1,2,3],
              '239,240Pu III,IV': [11, 12, 13], '239,240Pu III,IV_err': [11, 12, 13]})

Unnamed: 0,240Pu/239Pu,240Pu/239Pu_err,"239,240Pu III,IV","239,240Pu III,IV_err"
0,1,1,11,11
1,2,2,12,12
2,3,3,13,13


In [None]:
sediments.NUCLIDE.unique()

array(['RA226', 'CS137', 'RA228', 'K40', 'SR90', 'CS134137', 'CS134',
       'PU239240', 'PU238', 'CO60', 'RU103', 'RU106', 'SB125', 'AG110M',
       'CE144', 'AM241', 'BE7', 'TH228', 'PB210', 'CO58', 'MN54', 'ZR95',
       'BA140', 'PO210', 'RA224', 'NB95', 'PU238240', 'PU241', 'PU239',
       'EU155', 'IR192', 'Cs137', 'TH232', 'Pu239240', 'CD109', 'SB124',
       'ZN65', 'TH234', 'Pu238', 'Ra226', 'Sr90', 'TL208', 'PB212',
       'PB214', 'BI214', 'AC228', 'RA223', 'U235', 'BI212', 'CS137 '],
      dtype=object)

In [None]:
# 1. to lower case
sediments.NUCLIDE = sediments.NUCLIDE.str.lower()

In [None]:
sediments.NUCLIDE.unique()

array(['ra226', 'cs137', 'ra228', 'k40', 'sr90', 'cs134137', 'cs134',
       'pu239240', 'pu238', 'co60', 'ru103', 'ru106', 'sb125', 'ag110m',
       'ce144', 'am241', 'be7', 'th228', 'pb210', 'co58', 'mn54', 'zr95',
       'ba140', 'po210', 'ra224', 'nb95', 'pu238240', 'pu241', 'pu239',
       'eu155', 'ir192', 'th232', 'cd109', 'sb124', 'zn65', 'th234',
       'tl208', 'pb212', 'pb214', 'bi214', 'ac228', 'ra223', 'u235',
       'bi212', 'cs137 '], dtype=object)

In [None]:
# 2. strip (it appears that 'cs137' is also written as 'cs137 '
sediments.NUCLIDE = sediments.NUCLIDE.str.strip()

In [None]:
sediments.NUCLIDE.unique()

array(['ra226', 'cs137', 'ra228', 'k40', 'sr90', 'cs134137', 'cs134',
       'pu239240', 'pu238', 'co60', 'ru103', 'ru106', 'sb125', 'ag110m',
       'ce144', 'am241', 'be7', 'th228', 'pb210', 'co58', 'mn54', 'zr95',
       'ba140', 'po210', 'ra224', 'nb95', 'pu238240', 'pu241', 'pu239',
       'eu155', 'ir192', 'th232', 'cd109', 'sb124', 'zn65', 'th234',
       'tl208', 'pb212', 'pb214', 'bi214', 'ac228', 'ra223', 'u235',
       'bi212'], dtype=object)

In [None]:
# 3. normalizing radionuclide aggregates (ration, total) - renaming to be confirmed
sediments = sediments.replace({'cs134137': 'cs134_137_tot', 
                               'pu238240': 'pu238_240_ratio',
                               'pu239240': 'pu239_240_ratio'})

In [None]:
sediments.NUCLIDE.unique()

array(['ra226', 'cs137', 'ra228', 'k40', 'sr90', 'cs134_137_tot', 'cs134',
       'pu239_240_ratio', 'pu238', 'co60', 'ru103', 'ru106', 'sb125',
       'ag110m', 'ce144', 'am241', 'be7', 'th228', 'pb210', 'co58',
       'mn54', 'zr95', 'ba140', 'po210', 'ra224', 'nb95',
       'pu238_240_ratio', 'pu241', 'pu239', 'eu155', 'ir192', 'th232',
       'cd109', 'sb124', 'zn65', 'th234', 'tl208', 'pb212', 'pb214',
       'bi214', 'ac228', 'ra223', 'u235', 'bi212'], dtype=object)

### Columns renaming

In [None]:
rules = {
    'NUCLIDE': 'nuclide',
    'VALUE_Bq/kg': 'value',
    'ERROR%_kg': 'err',
    'TDEPTH': 'depth',
    'LATITUDE (dddddd)':'latitude',
    'LONGITUDE (dddddd)':'longitude'}
    
sediments.rename(columns=rules, inplace=True)

In [None]:
sediments

Unnamed: 0,nuclide,value,err,depth,latitude,longitude,time
0,ra226,35.00,26.0,71.0,59.6667,24.0000,2012-06-17
1,ra226,36.00,22.0,71.0,59.6667,24.0000,2012-06-17
2,ra226,38.00,24.0,23.0,59.8600,28.8433,2012-08-10
3,ra226,36.00,25.0,23.0,59.8600,28.8433,2012-08-10
4,ra226,30.00,23.0,23.0,59.8600,28.8433,2012-08-10
...,...,...,...,...,...,...,...
37342,cs137,1.20,12.0,171.0,59.0360,21.0830,2016-06-09
37343,cs137,0.79,20.0,171.0,59.0360,21.0830,2016-06-09
37344,cs137,512.00,11.0,131.0,61.0667,19.7297,2016-05-29
37345,cs137,527.00,6.3,131.0,61.0667,19.7297,2016-05-29


In [None]:
sediments = sediments.pivot_table(index=['time', 'latitude', 'longitude', 'depth'], 
                                  columns='nuclide', values=['value', 'err']).reset_index(); sediments

Unnamed: 0_level_0,time,latitude,longitude,depth,err,err,err,err,err,err,...,value,value,value,value,value,value,value,value,value,value
nuclide,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,ac228,ag110m,am241,ba140,be7,bi212,...,sb124,sb125,sr90,th228,th232,th234,tl208,u235,zn65,zr95
0,1984-06-03,54.3333,14.4183,63.0,,,,,,,...,,,,,,,,,,
1,1984-06-03,55.2167,17.0667,92.0,,,,,,,...,,,,,,,,,,
2,1984-06-03,55.2500,15.9833,91.0,,,,,,,...,,,,,,,,,,
3,1984-06-03,55.2500,19.9833,91.0,,,,,,,...,,,,,,,,,,
4,1984-06-05,54.8333,19.3333,109.0,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1727,2018-09-17,61.0833,19.5797,125.0,,,,,,,...,,,,,,,,,,
1728,2018-10-10,55.7272,12.8437,20.0,,,,,,,...,,,,,,,,,,
1729,2018-10-18,55.6333,20.8000,45.0,,,,,,,...,,,9.17,,,,,,,
1730,2018-10-18,55.5583,21.0783,13.0,,,,,,,...,,,3.42,,,,,,,


In [None]:
def rename_cols(cols):
    new_cols = []
    for outer, inner in cols:
        if not inner:
            new_cols.append(outer)
        else:
            if outer == 'err':
                new_cols.append(inner + '_' + outer)
            if outer == 'value':
                new_cols.append(inner)
    return new_cols

In [None]:
sediments.columns = rename_cols(sediments.columns)

In [None]:
sediments

Unnamed: 0,time,latitude,longitude,depth,ac228_err,ag110m_err,am241_err,ba140_err,be7_err,bi212_err,...,sb124,sb125,sr90,th228,th232,th234,tl208,u235,zn65,zr95
0,1984-06-03,54.3333,14.4183,63.0,,,,,,,...,,,,,,,,,,
1,1984-06-03,55.2167,17.0667,92.0,,,,,,,...,,,,,,,,,,
2,1984-06-03,55.2500,15.9833,91.0,,,,,,,...,,,,,,,,,,
3,1984-06-03,55.2500,19.9833,91.0,,,,,,,...,,,,,,,,,,
4,1984-06-05,54.8333,19.3333,109.0,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1727,2018-09-17,61.0833,19.5797,125.0,,,,,,,...,,,,,,,,,,
1728,2018-10-10,55.7272,12.8437,20.0,,,,,,,...,,,,,,,,,,
1729,2018-10-18,55.6333,20.8000,45.0,,,,,,,...,,,9.17,,,,,,,
1730,2018-10-18,55.5583,21.0783,13.0,,,,,,,...,,,3.42,,,,,,,


### Renaming index (dim) & encoding time

In [None]:
sediments.index.name = 'sample'

In [None]:
# Encoding time as seconds since ...
format_time = lambda x: date2num(x, units="seconds since 1970-01-01 00:00:00.0")
sediments['time'] = sediments['time'].apply(format_time)

In [None]:
sediments

Unnamed: 0_level_0,time,latitude,longitude,depth,ac228_err,ag110m_err,am241_err,ba140_err,be7_err,bi212_err,...,sb124,sb125,sr90,th228,th232,th234,tl208,u235,zn65,zr95
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,455068800,54.3333,14.4183,63.0,,,,,,,...,,,,,,,,,,
1,455068800,55.2167,17.0667,92.0,,,,,,,...,,,,,,,,,,
2,455068800,55.2500,15.9833,91.0,,,,,,,...,,,,,,,,,,
3,455068800,55.2500,19.9833,91.0,,,,,,,...,,,,,,,,,,
4,455241600,54.8333,19.3333,109.0,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1727,1537142400,61.0833,19.5797,125.0,,,,,,,...,,,,,,,,,,
1728,1539129600,55.7272,12.8437,20.0,,,,,,,...,,,,,,,,,,
1729,1539820800,55.6333,20.8000,45.0,,,,,,,...,,,9.17,,,,,,,
1730,1539820800,55.5583,21.0783,13.0,,,,,,,...,,,3.42,,,,,,,


### As NetCDF3 (classic)

In [None]:
with Dataset('maris-template-v3.nc') as src, Dataset('output/helcom-sed-v3.nc', 'w', format='NETCDF3_CLASSIC') as dst:
    # copy global attributes all at once via dictionary
    dst.setncatts(src.__dict__)
    
    # copy dimensions
    for name, dimension in src.dimensions.items():
        dst.createDimension(
            name, (len(dimension) if not dimension.isunlimited() else None))
    
    # copy all variables of interest and fill them
    for name_var_src, var_src in src.variables.items():
        if name_var_src in sediments.reset_index().columns:
            x = dst.createVariable(name_var_src, var_src.datatype, var_src.dimensions,
                                   compression='zlib', complevel=9)
            # fill variables
            dst[name_var_src][:] = sediments.reset_index()[name_var_src].values
            # copy variable attributes all at once via dictionary
            dst[name_var_src].setncatts(src[name_var_src].__dict__)