# Fendt Site
## Create metadata

In [1]:
import pandas as pd
from io import StringIO
import glob
from datetime import datetime as dt

In [2]:
flist = glob.glob('./data/*.csv')
print(flist)

['./data/Gr_P_ET_DRAIN_2012-2014.csv', './data/FE1_P_ET_DRAIN_2012-2014.csv', './data/FE3_P_ET_DRAIN_2012-2014.csv', './data/FE2_P_ET_DRAIN_2012-2014.csv']


In [3]:
fname = './data/FE3_P_ET_DRAIN_2012-2014.csv'

with open(fname, 'r') as f:
    s = StringIO()
    s.write(f.read())
    s.seek(0)

    desc = []

    # first get the metadata
    while True:
        line = s.readline()
        if line.startswith('#'):
            desc.append(line)
        else:
            df = pd.read_csv(s, header=None)
            df.columns = line.split(',')
            break
df

Unnamed: 0,DOY,date,day,month,year,station,num_lys,ID_lys,origin_lys,man,station_man,EVAP_mm,PREC_mm,DRAIN_mm\n
0,1,2012-01-01,1,1,2012,Fe3,L1,Fe_3-1,Gr15,int,Fe_int,0.00,4.43,0.00
1,2,2012-01-02,2,1,2012,Fe3,L1,Fe_3-1,Gr15,int,Fe_int,0.00,0.78,0.00
2,3,2012-01-03,3,1,2012,Fe3,L1,Fe_3-1,Gr15,int,Fe_int,0.00,0.44,0.00
3,4,2012-01-04,4,1,2012,Fe3,L1,Fe_3-1,Gr15,int,Fe_int,0.00,10.20,0.00
4,5,2012-01-05,5,1,2012,Fe3,L1,Fe_3-1,Gr15,int,Fe_int,0.00,11.28,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6571,361,2014-12-27,27,12,2014,Fe3,L6,Fe_3-6,Fe6,int,Fe_int,0.00,0.01,0.36
6572,362,2014-12-28,28,12,2014,Fe3,L6,Fe_3-6,Fe6,int,Fe_int,0.16,0.42,0.15
6573,363,2014-12-29,29,12,2014,Fe3,L6,Fe_3-6,Fe6,int,Fe_int,0.00,0.01,0.24
6574,364,2014-12-30,30,12,2014,Fe3,L6,Fe_3-6,Fe6,int,Fe_int,0.00,0.02,0.29


In [4]:
description = ''.join(desc)
print(description.split('CC BY NC')[0])

#Project 
##TERENO_2010-2018
#Summary
##Daily sums of precipitation, evapotranspiration, and drainage calculated from Lysimeter mass change measurements (minute data) of the TERENO preAlpine Observatory 
#Creator
##Katrin Schneider
#Creation Date (of data file)
##2019-10-01
#Site
##Fendt3
#Location 
##47.83243 lat., 11.06111 lon. WGS 84
#Elevation
##595
#Quantity keyword
##precipitation, evapotranspiration, drainage, lysimeter 
#UsageRights 
##


## connect

In [5]:
UPLOAD = True
CONNECTION = 'test_iso'
#CONNECTION = 'default'

In [7]:
from metacatalog import api
session = api.connect_database(CONNECTION)
print('Using: %s' % session.bind)

Using: Engine(postgresql://postgres:***@localhost:5432/test_iso)


### add person

In [8]:
author = api.find_person(session, first_name='Katrin', last_name='Schneider', return_iterator=True).first()

# check if exists
if author is None:
    author = api.add_person(
        session,
        first_name='Katrin',
        last_name='Schneider',
        affiliation='Karlsruhe Institute of Technology (KIT)',
    )

author

<metacatalog.models.person.Person at 0x7fedc2f2f0a0>

### license

In [9]:
license = api.find_license(session, short_title='CC BY-NC 4.0')[0]

print(license.id, license.title)

8 Creative Commons Attribution-NonCommerical 4.0 International


### Keywords

> #Quantity keyword
> ##precipitation, evapotranspiration, drainage, lysimeter 

In [10]:
for k in api.find_keyword(session, value='PRECIPITATION'):
    print(k.uuid, k.full_path)

for k in api.find_keyword(session, value='EVAPOTRANSPIRATION'):
    print(k.uuid, k.full_path)

for k in api.find_keyword(session, value='DRAINAGE'):
    print(k.uuid, k.full_path)

for k in api.find_keyword(session, value='LYSIMETER'):
    print(k.uuid, k.full_path)

1532e590-a62d-46e3-8d03-2351bc48166a EARTH SCIENCE > ATMOSPHERE > PRECIPITATION
26fc4850-7ba9-44d8-a156-5c623e17b72f EARTH SCIENCE > ATMOSPHERE > ATMOSPHERIC WATER VAPOR > WATER VAPOR PROCESSES > EVAPOTRANSPIRATION
6a2107ab-38ab-42dc-beb0-8ba5f65e8022 EARTH SCIENCE > TERRESTRIAL HYDROSPHERE > GROUND WATER > GROUND WATER PROCESSES/MEASUREMENTS > DRAINAGE
269c7277-fa8f-4c1c-bd8b-ab772c1df4e5 EARTH SCIENCE > TERRESTRIAL HYDROSPHERE > SURFACE WATER > SURFACE WATER PROCESSES/MEASUREMENTS > DRAINAGE


In [11]:
# use uuids from above
keyword_uuids = ('1532e590-a62d-46e3-8d03-2351bc48166a', '26fc4850-7ba9-44d8-a156-5c623e17b72f', '269c7277-fa8f-4c1c-bd8b-ab772c1df4e5')
variables = ['daily rainfall sum', 'evapotranspiration', 'drainage']
keywords = {}

for uuid, var in zip(keyword_uuids, variables):
    k = api.find_keyword(session, uuid=uuid, return_iterator=True).one()
    keywords[var] = k.id

keywords

{'daily rainfall sum': 115, 'evapotranspiration': 6319, 'drainage': 7328}

### location, abstract and title

In [12]:
location = 'SRID=4326;POINT (11.06111 47.83243)'
group_title = 'Fendt 3 TERENO preAlpine Observatory / SUSALPS'
title = 'Fendt 3 lysimeter %s' 

abstract = """
Summary
-------
Daily sums of precipitation, evapotranspiration, and drainage calculated from Lysimeter mass change measurements
(minute data) of the TERENO preAlpine Observatory

Lineage statement 
-----------------
data represents weight measurements of six large lysimsters (area 1m2, depth 1.4 m); evapotranspiration, precipitation and drainage at 1.4 m was calculated from these measurements
the lysimeters are part of the TERENO preAlpine Observatory; two agricultural management systems are applied: extensive and intensive (refers to frequency and amount of slurry applications and cutting frequency)

Processing of the ra data involves several steps:

1. processing of .DBD files (rawdata files) into .TDM files with the National Instruments (NI) DIAdem software (version 2017 was used for the presented data): 
   NI DIAdem software reads in *.DBD files

2. processing of the data using the Matlab script of Jin Fu.
   Open the .TDM files in Excel and convert them into .xlsx files; Add-In “TDM” is required; the .TDM have to be opened with the TDM add-in, not with the standard Excel open or import menu! 

3. Apply the AWAT filter to the data to correct the data
   the AWAT software is decribed in Peters et al 2017, http://dx.doi.org/10.1016/j.jhydrol.2017.04.015
   data from 2018 has been preprocessed with the updated Version AWAT 3 -> use this version for new data
   The settings can be adapted in the file “input.dat” (boundaries, moving window); also the file (rawdata1.dat, rawdata2.dat,…) has to be specified since each stands for one lysimeter1!
   Run AWAT3.exe (formerly AWATexe); you will need one run per rawdataXX.dat file (i.e. each of the 6 rawdata files have to be specified in the ini file. 

4. Final check
   data check for huge differences between the lysimeters of one hexagon: sometimes, weight measurements of single lysimeters seem to be erroneous (e.g. freezing, lysimeter sticks,…)
   Snow correction: Weight measurements are erroneous under snow / frost conditions: 
   data are corrected according to snow cover (albedo > 0.5) as measured by nearby EC tower + visual check using the camara picture
   lysimeter precipitation is replaced by pluvio / composite data when albedo is > 0.5 (using filter routine for albedo > 0.5 at 12:00 am)
   for snow cover periods, ET data have been calculated with Penman-Monteith for 2012 -2014 and replaces with literature values for data from 2015 on
   gap filling / data correction under snow free conditions: in case daily sums of ET and P values of one lysimeter differ much from other lysimeters (d

Stations
--------
stations/sites of the lysimeter network: Graswang = Gr; Rottenbuch = Rb; Fendt1 = Fe1; Fendt2 = Fe2, Fendt3 = Fe3
 
cloumn "origin_lys" indicates, where the soil columns come from; in the TERENO set-up, soil columns were transplanted from higher elevations to lower elevations to simulate the effects of higher temperatures on hydro-biogeochemical processes;
lysimeters labelled "GrX" were transplanted from Graswang, those labelled with "RbX" were transplanted from Rottenbuch, and "FeX" indicate control lysimeters from Fendt; 
lysimeters at the Fendt site are equipped with soil columns from Graswang, Rottenbuch and Fendt (==control lysimeters); lysimeters at the Graswang site are only equipped with soils from Graswang (from three sites along the valley bottom); and lysimeters in Rottenbuch are euquipped with soil columns transplanted from Graswang and with soil columns from the Rottenbuch area (=control lysimeters)

Data from 2012-2014 was prepared by Jin Fu; data correction and/or replacement of missing values for these data is unclear
published in: Fu, J., Gasche, R., Wang, N., Lu, H., Butterbach-Bahl, K., Kiese, R., 2017. Impacts of climate and management on water balance and nitrogen leaching from montane grassland soils of S-Germany. Environ. Pollut. 229, 119–131. https://doi.org/10.1016/j.envpol.2017.05.071


data correction and flags: under snow and/ or freezing conditions, the lysimeter weight measurements and the calculated evapotranspiration and precipitations values are prone to errors;
therefore, calculated evapotranspiration was replaced with literature data and precipitations was replaced with composite data (from rain gauage or EC data)
more details on data flags and replacement of measurements are available from the author

information on lysimeter labels and origins of the soil columns are available from the author

Variables (by column)
---------------------

.. note::
    Not all columns are available for all sites

ID_lys=lysimeter label as defined by UMS,
DOY=day of year, date=YYYY-MM-DD, day, month, year,
station=experimental site, num_lys=number of lysimeter,
origin_lys=origin of lysimeter (either from other experimental site or from actual site),
man=agricultural management (extensive or intensive),
station_man= combined information on station and management,
EVAP_mm=daily sum of evapotranspiration [mm],
PREC_mm=daily sum of precipitation [mm],
DRAIN_mm=daily sum of drainage [mm] (measured at 140 cm), 
cum_evap=cumulative evapotranspiration [mm],
cum_prec=cumulative precipitation [mm],
cum_drain=cumulative drainage [mm],
exp_unit=experimental unit
"""

## derive the number of stations

In [13]:
external_ids = df.ID_lys.unique().tolist()
variables = ['daily rainfall sum', 'evapotranspiration', 'drainage']

print(external_ids)

['Fe_3-1', 'Fe_3-2', 'Fe_3-3', 'Fe_3-4', 'Fe_3-5', 'Fe_3-6']


Adding the entires is a bit nested:

- Entry for each variable
- Composite for one Lysimeter
- Label Group for Grasswang
- Project for TERENO

In [14]:
# 
entries = []
composites = []

for ext_id in external_ids:
    grp_entries = []
    for var_name in variables:
        entry = api.find_entry(session, external_id=ext_id, variable=var_name, return_iterator=True).first()
        variable = api.find_variable(session, name=var_name)[0]
    
        # add if missing
        if entry is None:
            entry = api.add_entry(
                session,
                title=title % ext_id,
                author=author.id,
                location=location,
                variable=variable.id,
                abstract=abstract,
                external_id=ext_id,
                license=license,
                embargo=True
            )
            # add keyword
            api.add_keywords_to_entries(session, [entry], [keywords[var_name]])
            
        grp_entries.append(entry)
        entries.append(entry)
        
    # check the composite
    composite = api.find_group(session, title=title % ext_id, return_iterator=True).first()
    if composite is None:
        composite = api.add_group(
            session,
            group_type='Composite',
            entry_ids=[e.id for e in grp_entries],
            title=title % ext_id,
            description=f'Full Lysimeter Record ID {ext_id}'
        )
    composites.append(composite)

for e in entries:
    print(e.external_id, e.variable.name)

Fe_3-1 daily rainfall sum
Fe_3-1 evapotranspiration
Fe_3-1 drainage
Fe_3-2 daily rainfall sum
Fe_3-2 evapotranspiration
Fe_3-2 drainage
Fe_3-3 daily rainfall sum
Fe_3-3 evapotranspiration
Fe_3-3 drainage
Fe_3-4 daily rainfall sum
Fe_3-4 evapotranspiration
Fe_3-4 drainage
Fe_3-5 daily rainfall sum
Fe_3-5 evapotranspiration
Fe_3-5 drainage
Fe_3-6 daily rainfall sum
Fe_3-6 evapotranspiration
Fe_3-6 drainage


In [15]:
# find the label group
group = api.find_group(session, title=group_title, return_iterator=True).first()

if group is None:
    group = api.add_group(
        session,
        group_type='Label',
        entry_ids = [e.id for e in entries],
        title=group_title,
        description='All data from Fendt-1 of the TERENO preAlpine Observatory'
    )

group

<metacatalog.models.entrygroup.EntryGroup at 0x7fedc2f2eaa0>

## Data

In [16]:
df

Unnamed: 0,DOY,date,day,month,year,station,num_lys,ID_lys,origin_lys,man,station_man,EVAP_mm,PREC_mm,DRAIN_mm\n
0,1,2012-01-01,1,1,2012,Fe3,L1,Fe_3-1,Gr15,int,Fe_int,0.00,4.43,0.00
1,2,2012-01-02,2,1,2012,Fe3,L1,Fe_3-1,Gr15,int,Fe_int,0.00,0.78,0.00
2,3,2012-01-03,3,1,2012,Fe3,L1,Fe_3-1,Gr15,int,Fe_int,0.00,0.44,0.00
3,4,2012-01-04,4,1,2012,Fe3,L1,Fe_3-1,Gr15,int,Fe_int,0.00,10.20,0.00
4,5,2012-01-05,5,1,2012,Fe3,L1,Fe_3-1,Gr15,int,Fe_int,0.00,11.28,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6571,361,2014-12-27,27,12,2014,Fe3,L6,Fe_3-6,Fe6,int,Fe_int,0.00,0.01,0.36
6572,362,2014-12-28,28,12,2014,Fe3,L6,Fe_3-6,Fe6,int,Fe_int,0.16,0.42,0.15
6573,363,2014-12-29,29,12,2014,Fe3,L6,Fe_3-6,Fe6,int,Fe_int,0.00,0.01,0.24
6574,364,2014-12-30,30,12,2014,Fe3,L6,Fe_3-6,Fe6,int,Fe_int,0.00,0.02,0.29


In [17]:
MAP = {
    'daily rainfall sum': 'PREC_mm', 
    'evapotranspiration': 'EVAP_mm',
    'drainage': 'DRAIN_mm\n'
} 

for label, grp in df.groupby('ID_lys'):
    # go for the data
    for var_name, col_name in MAP.items():
        # laod the entry
        entry = api.find_entry(session, external_id=label, variable=var_name, return_iterator=True).one()
        
        # check if data is available
        if entry.datasource:
            print(f'Skipping Entry ID={entry.id} ({entry.variable.name}): has data.')
        else:
            data = grp[['date', col_name]].dropna()
            data['tstamp'] = data.date.map(lambda d: dt.strptime(d, '%Y-%m-%d'))
            data.drop('date', axis=1, inplace=True)
            data.set_index('tstamp', inplace=True)
            
            
            # create the datasource
            if not data.empty:
                entry.create_datasource(path='timeseries', type=1, datatype='timeseries', commit=True)
                
                # do the import
                entry.import_data(data)
                
                # create scale
                entry.datasource.create_scale(
                    resolution='1d',
                    extent=[data.index.min(), data.index.max()],
                    support=1.0,
                    scale_dimension='temporal'
                )
                print(f'Entry ID {entry.id} - {entry.variable.name}: uploaded: {len(data)} points.')

    

Entry ID 44 - daily rainfall sum: uploaded: 1093 points.
Entry ID 45 - evapotranspiration: uploaded: 1093 points.
Entry ID 46 - drainage: uploaded: 1093 points.
Entry ID 47 - daily rainfall sum: uploaded: 1093 points.
Entry ID 48 - evapotranspiration: uploaded: 1093 points.
Entry ID 49 - drainage: uploaded: 1093 points.
Entry ID 50 - daily rainfall sum: uploaded: 1093 points.
Entry ID 51 - evapotranspiration: uploaded: 1093 points.
Entry ID 52 - drainage: uploaded: 1093 points.
Entry ID 53 - daily rainfall sum: uploaded: 1093 points.
Entry ID 54 - evapotranspiration: uploaded: 1093 points.
Entry ID 55 - drainage: uploaded: 1093 points.
Entry ID 56 - daily rainfall sum: uploaded: 1093 points.
Entry ID 57 - evapotranspiration: uploaded: 1093 points.
Entry ID 58 - drainage: uploaded: 1093 points.
Entry ID 59 - daily rainfall sum: uploaded: 1093 points.
Entry ID 60 - evapotranspiration: uploaded: 1093 points.
Entry ID 61 - drainage: uploaded: 1093 points.
