In [None]:
#| default_exp metadata

# Metadata
> Various utilities to populate NetCDF global attributes as well as ISO13195 metadata

In [None]:
#| export
import pandas as pd
from fastcore.xtras import load_pickle

from marisco.utils import read_toml, get_bbox
from marisco.configs import BASE_PATH, get_cfgs

from cftime import num2date
from pyzotero import zotero
import json

In [None]:
#| export
class Metadata:
    def __init__(self, dfs):
        self.dfs = dfs
        self.attrs = read_toml(BASE_PATH / 'cdl.toml')['global_attrs']

    def __repr__(self):
        return json.dumps(self.attrs, indent=4) 

    def fill_geo(self):
        bbox = get_bbox(pd.concat(self.dfs))
        lon_min, lon_max, lat_min, lat_max = [str(bound) for bound in bbox.bounds]
        self.attrs['geospatial_lat_min'] = lat_min
        self.attrs['geospatial_lat_max'] = lat_max
        self.attrs['geospatial_lon_min'] = lon_min
        self.attrs['geospatial_lon_max'] = lon_max
        self.attrs['geospatial_bounds'] = bbox.wkt
        return self.attrs
    
    def fill_z(self):
        max_depth = pd.concat(self.dfs).depth.max()
        min_depth = pd.concat(self.dfs).depth.min()
        self.attrs['geospatial_vertical_max'] =  '0' if min_depth == 0 else str(-min_depth)
        self.attrs['geospatial_vertical_min'] = str(-max_depth)
        return self.attrs
    
    def fill_time(self):
        time = pd.concat(self.dfs)['time']
        start, end = [num2date(t,units=get_cfgs('units')['time']).isoformat() for t in (time.min(), time.max())]
        self.attrs['time_coverage_start'] = start
        self.attrs['time_coverage_end'] = end
        return self.attrs
    
    def fill_zotero(self, zoteroItem):
        for attr in ['title', 'summary', 'creator_name']:
            self.attrs[attr] = getattr(zoteroItem, attr)()
        return self.attrs
    
    def fill(self, zoteroItem):
        self.fill_geo()
        self.fill_z()
        self.fill_time()
        self.fill_zotero(zoteroItem)
        return self.attrs

In [None]:
#| export
class ZoteroItem:
    def __init__(self, item_id, cfgs):
        self.cfgs = cfgs
        self.item = self.getItem(item_id)
        
    def getItem(self, item_id):
        zot = zotero.Zotero(self.cfgs['lib_id'], 'group', self.cfgs['api_key'])
        return zot.item(item_id)
    
    def title(self):
        return self.item['data']['title']
    
    def summary(self):
        return self.item['data']['abstractNote']
    
    def creator_name(self):
        creators = [f'{c["creatorType"]}: {c["name"]}' for c in self.item['data']['creators']]
        return '; '.join(creators)
            
    def __repr__(self):
        return json.dumps(self.item, indent=4) 

## How to use

In [None]:
dfs = load_pickle('../files/pkl/dfs_test.pkl')

In [None]:
global_attrs = Metadata(dfs)
item_id = '26VMZZ2Q'
global_attrs.fill(ZoteroItem(item_id, get_cfgs('zotero')))

{'id': '',
 'title': 'Environmental database - Helsinki Commission Monitoring of Radioactive Substances',
 'summary': 'MORS Environment database has been used to collate data resulting from monitoring of environmental radioactivity in the Baltic Sea based on HELCOM Recommendation 26/3.\n\nThe database is structured according to HELCOM Guidelines on Monitoring of Radioactive Substances (https://www.helcom.fi/wp-content/uploads/2019/08/Guidelines-for-Monitoring-of-Radioactive-Substances.pdf), which specifies reporting format, database structure, data types and obligatory parameters used for reporting data under Recommendation 26/3.\n\nThe database is updated and quality assured annually by HELCOM MORS EG.',
 'keywords': '',
 'keywords_vocabulary': 'GCMD Science Keywords',
 'keywords_vocabulary_url': 'https://gcmd.earthdata.nasa.gov/static/kms/',
 'record': '',
 'featureType': '',
 'cdm_data_type': '',
 'Conventions': 'CF-1.10 ACDD-1.3',
 'publisher_name': 'Paul McGinnity',
 'publisher_em