# Playground

In [1]:
from metacatalog import api, ext
from metacatalog.util.results import ImmutableResultSet
from datetime import datetime as dt
import xarray

In [2]:
session = api.connect_database()
session.bind

Engine(postgresql://metacatalog:***@localhost:5432/metacatalog)

In [3]:
entry = api.find_entry(session, id=13)[0]
entry

<metacatalog.models.entry.Entry at 0x7fbb2ff1d850>

In [4]:
entry.export(fmt='pickle', flat=True)

{'uuid': ['0aa2c8e7-f310-4e3c-96f6-4131b49cc8f3',
  'a5a96b19-eb5a-4b8d-8ed9-fb6b8c0f0ae0',
  '58364ded-5c7d-4538-a26a-312a287acfbb',
  '66068e92-f1d7-45a4-b30f-0835887b5027'],
 'external_id': 'Gr_1-4',
 'title': 'Grasswang lysimeter Gr_1-4',
 'authors': [{'id': 1,
   'uuid': '3d5c0ebd-319f-4aaa-9a29-261e8c147413',
   'first_name': 'Katrin',
   'last_name': 'Schneider',
   'affiliation': 'Karlsruhe Institute of Technology (KIT)'}],
 'abstract': '\nSummary\n-------\nDaily sums of precipitation, evapotranspiration, and drainage calculated from Lysimeter mass change measurements\n(minute data) of the TERENO preAlpine Observatory\n\nLineage statement \n-----------------\ndata represents weight measurements of six large lysimsters (area 1m2, depth 1.4 m); evapotranspiration, precipitation and drainage at 1.4 m was calculated from these measurements\nthe lysimeters are part of the TERENO preAlpine Observatory; two agricultural management systems are applied: extensive and intensive (refers t

## Develop Export functions

In [None]:
r = ImmutableResultSet(entry)

In [25]:
ENTRY_KEYS = (
    'uuid',
    'external_id',
    'title',
    'authors',
    'abstract',
    'citation',
    'location_shape',
    'variable',
    'license',
    'datasource',
    'details',
    'embargo',
    'embargo_end',
    'version',
    'latest_version',
    'plain_keyword_dict',
    'publication',
    'lastUpdate',
    'comment',
    'associated_groups'
)


def clear_output(data: dict) -> dict:
    return {k: _stringify(v) for k,v in data.items() if k != 'id'}

def _stringify(val):
    if isinstance(val, (float, int, str)):
        return val
    elif isinstance(val, (list, float)):
        return [_stringify(v) for v in val]
    elif isinstance(val, dict):
        return clear_output(val)
    elif isinstance(val, dt):
        return val.isoformat()
    else:
        return str(val)

    
def to_dict(entry, stringify=True):
    result = ImmutableResultSet(entry)
    
    out = dict()
    
    for key in ENTRY_KEYS:
        val = result.get(key)
        if val == 'id' or val is None:
            continue
        
#         if isinstance(val, list) and :
#             val = {e.uuid:getattr(e, key) for e in result._members if hasattr(e, key)}
        if stringify:
            out[key] = _stringify(val)
        else:
            out[key] = val
    
    return out

def flat_keys(data: dict, delimiter: str = '.', **kwargs) -> dict:
    out = dict()

    # expand the keys
    for key, value in data.items():
        if isinstance(value, dict):
            nested = _flatten(key, value, delimiter, '')
            out.update(nested)
        elif isinstance(value, list):
            nested = _flatten(key, {f'{key}.{i}': v for i, v in enumerate(value)}, delimiter, '')
            out.update(nested)
        else:
            out[key] = value

    # return 
    return out

def _flatten(key, value, delimiter, prefix):
    if isinstance(value, dict):
        prefixed_key = ''.join([prefix, key, delimiter])
        return {f'{prefixed_key}{k}': _flatten(v, k, delimiter, prefix=prefixed_key) for k, v in value.items()}
    else:
        return value

def flatten(data: dict, prefix=False, delimiter='.'):
    tuples = []
    
    # expand the keys
    for key, value in data.items():
        # build the new key
        if prefix:
            prefixed_key = f'{prefix}{delimiter}{key}'
        else:
            prefixed_key = key
        
        # check value
        if isinstance(value, dict):
            tuples.extend(flatten(value, prefixed_key, delimiter).items())
        elif isinstance(value, list):
            for i, v in enumerate(value):
                tuples.extend(flatten({str(i): v}, prefixed_key, delimiter).items())
        else:
            tuples.append((prefixed_key, value))
    return dict(tuples)

In [26]:
metadata = to_dict(entry)
flat_metadata = flatten(metadata)

In [27]:
flat_metadata

{'uuid.0': '0aa2c8e7-f310-4e3c-96f6-4131b49cc8f3',
 'uuid.1': 'a5a96b19-eb5a-4b8d-8ed9-fb6b8c0f0ae0',
 'uuid.2': '58364ded-5c7d-4538-a26a-312a287acfbb',
 'uuid.3': '66068e92-f1d7-45a4-b30f-0835887b5027',
 'external_id': 'Gr_1-4',
 'title': 'Grasswang lysimeter Gr_1-4',
 'authors.0.uuid': '3d5c0ebd-319f-4aaa-9a29-261e8c147413',
 'authors.0.first_name': 'Katrin',
 'authors.0.last_name': 'Schneider',
 'authors.0.affiliation': 'Karlsruhe Institute of Technology (KIT)',
 'abstract': '\nSummary\n-------\nDaily sums of precipitation, evapotranspiration, and drainage calculated from Lysimeter mass change measurements\n(minute data) of the TERENO preAlpine Observatory\n\nLineage statement \n-----------------\ndata represents weight measurements of six large lysimsters (area 1m2, depth 1.4 m); evapotranspiration, precipitation and drainage at 1.4 m was calculated from these measurements\nthe lysimeters are part of the TERENO preAlpine Observatory; two agricultural management systems are applied:

## Handle Data

In [5]:
Export = ext.extension('export')
metadata = Export.to_dict(entry, no_data=True)
metadata = Export.flat_keys(metadata)
data = Export.get_data(entry, serialize=False)

In [6]:
metadata

{'uuid': ['0aa2c8e7-f310-4e3c-96f6-4131b49cc8f3',
  'a5a96b19-eb5a-4b8d-8ed9-fb6b8c0f0ae0',
  '58364ded-5c7d-4538-a26a-312a287acfbb',
  '66068e92-f1d7-45a4-b30f-0835887b5027'],
 'external_id': 'Gr_1-4',
 'title': 'Grasswang lysimeter Gr_1-4',
 'authors': [{'uuid': '3d5c0ebd-319f-4aaa-9a29-261e8c147413',
   'first_name': 'Katrin',
   'last_name': 'Schneider',
   'affiliation': 'Karlsruhe Institute of Technology (KIT)'}],
 'abstract': '\nSummary\n-------\nDaily sums of precipitation, evapotranspiration, and drainage calculated from Lysimeter mass change measurements\n(minute data) of the TERENO preAlpine Observatory\n\nLineage statement \n-----------------\ndata represents weight measurements of six large lysimsters (area 1m2, depth 1.4 m); evapotranspiration, precipitation and drainage at 1.4 m was calculated from these measurements\nthe lysimeters are part of the TERENO preAlpine Observatory; two agricultural management systems are applied: extensive and intensive (refers to frequency 

In [42]:
import numpy as np
import pandas as pd
# merge the dfs if possible
merged = pd.DataFrame()
uuids = []
col_meta = {}

for uuid, df in data.items():
    merged = pd.merge(merged, df, right_index=True, left_index=True, how='outer')
    names = df.columns
    print(names)
    for name in names:
        col_meta[name] = {'.'.join(k.split('.')[2:]) :v for k,v in flat_metadata.items() if k.startswith(f'variable.{uuid}') or k.startswith(f'datasource.{uuid}')}
        col_meta[name]['uuid'] = uuid
    
# get data indices
#for k,v in flat_metadata:
    
    
#merged
xr = xarray.Dataset.from_dataframe(merged)
xr.attrs={k:v for k,v in flat_metadata.items() if not k.startswith('variable') and not k.startswith('datasource')}
for var_name, meta in col_meta.items():
    xr[var_name].attrs=meta
xr
#uuids

Index(['evapotranspiration'], dtype='object')
Index(['drainage'], dtype='object')
Index(['daily_rainfall_sum'], dtype='object')


In [40]:
xr.to_netcdf('/home/mirko/Schreibtisch/test.nc')