# Create InvenioRDM package from Astropedia

- https://astrogeology.usgs.gov/search/map/Moon/Geology/Unified_Geologic_Map_of_the_Moon_GIS_v2.xml
- https://inveniordm.docs.cern.ch/install/run/


In [1]:
# Code block

import json
import requests
from dataclasses import dataclass, asdict

class InvenioResults:
    pass

class InvenioAPI:
    """
    Client for Invenio(RDM) API
    """
    _scheme = 'https'
    _hostname = 'localhost' #'127.0.0.1:5000'
    _path_api = '/api'
    _token = None

    def __init__(self, hostname:str, token:str=None):
        self._hostname = hostname
        self._token = token

    def read_records(self) -> InvenioResults:
        """
        Read all records from server, return InvenioResults object
        """
        path_ext = '/records'
        res = self._get(path_ext)
        js = res.json()
        return InvenioResults(js)
        
    def create_draft(self, payload) -> dict:
        """
        Create draft (see publish_draft() for publishing it)
        """
        assert isinstance(payload, InvenioAstropedia)
        path_ext = '/records'
        payload = json.dumps(payload.create_record())
        res = self._post(path_ext, payload)
        js = res.json()
        return js
        
    def publish_draft(self):
        """
        Publish a previously created draft (see create_record())
        """
        NotImplementedError

    def _url(self, path_ext=''):
        path = self._path_api + path_ext
        return f"{self._scheme}://{self._hostname}{path}"
        
    def _headers(self):
        hdr = {'Authorization': f"Bearer {self._token}",
               'Content-Type': 'application/json'}
        return hdr
        
    def _get(self, path_ext, params=None):
        base_url = self._url(path_ext)
        return requests.get(base_url, params=params, verify=False)
    
    def _post(self, path_ext, payload=None):
        base_url = self._url(path_ext)
        return requests.post(base_url, data=payload, 
                             headers=self._headers(), verify=False)
    


class InvenioResults:
    """
    Handle results from GET records
    """
    def __init__(self, records_json):
        self._js = records_json
        self._hits = self._js['hits']['hits']
        
    def __len__(self):
        return int(self._js['hits']['total'])
    count = __len__
    
    def __str__(self):
        return json.dumps(self._js, indent=2)
    
    @property
    def records(self):
        return self._hits
    
    @property
    def links(self):
        return self._js['links']
    
    @property
    def aggregations(self):
        return self._js['aggregations']



@dataclass
class InvenioAstropedia:
    """
    Formatter from our/astropedia metadata to invenio-rdm records
    """
    
    _RECORD_TEMPLATE = {
      "access": {
        "record": "public",
        "files": "public"
      },
      "files": {
        "enabled": True
      },
      "metadata": {
      }
    }

    title: str
    date_pub: str
    origin: str
    url: str
    description: str
    authors: str
    document_url: str
    status: str
    bounding_box: dict
    scope: str
    browse: str
    product_url: str
    
    def asdict(self):
        return asdict(self)
        
    def create_record(self):
        """
        Format our meta/data into Invenio's
        """
        def _creators(authors:list, person_or_org:list=None):
            """
            Define list of creators (authors)
            """
            out = []
            person_or_org = person_or_org if person_or_org else ['person']*len(authors)
            for name,p_o in zip(authors,person_or_org):
                if p_o == 'org':
                    crt = {'name': f"{name}", 
                            'type': 'organizational'
                          }
                else:
                    assert p_o == 'person'
                    f_name, g_name = name.split(',')
                    crt = {'family_name': f"{f_name}", 
                            'given_name': f"{g_name}", 
                            'type': 'personal'
                          }
                    
                out.append({'person_or_org': crt})
            return out
        
        def _publication_date(date_string:str):
            from dateutil.parser import isoparse
            return isoparse(date_string).date().isoformat()
        
        def _description(description, **kwargs):
            sup_info = "\n<b>Extra info:</b>\n"
            sup_info += "<ul>"
            for k,v in kwargs.items():
                if k == 'bounding_box':
                    _sub = "Bounding-Box:"
                    _sub += "<ul>"
                    _sub += ("<li>"
                             f"{', '.join(str(k_)+' = '+str(v_) for k_,v_ in v.items())}"
                             "</li>")
                    _sub += "</ul>"
                else:
                    _sub = f"{k.title()}:"
                    _sub += "<ul>"
                    if isinstance(v, str) and v.startswith('http'):
                        _sub += f"<li><a href='{str(v)}'>{str(v)}</a></li>"
                    else:
                        _sub += f"<li>{str(v)}</li>"
                    _sub += "</ul>"
                sup_info += f"<li>{_sub}</li>"                
            sup_info += "</ul>"
            description += sup_info
            return description
        
        payload = self._RECORD_TEMPLATE.copy()
        creators = _creators(self.authors)
        publisher = self.origin
        publication_date = _publication_date(self.date_pub)
        resource_type = {'id': 'dataset'}
        title = self.title
        description = _description(description=self.description, 
                                   bounding_box=self.bounding_box,
                                   link=self.url)
        access = {'status': 'metadata-only'}
        
        payload.update({
            'metadata': {
                'creators': creators,
                'publisher': publisher,
                'publication_date': publication_date,
                'resource_type': resource_type,
                'title': title,
                'description': description,
                'access': access,
            }
        })
        
        return payload


In [2]:
# Create client for our instance. 
# If we would only read, there is no need for token.
# If we want to create/update records, we need a token;
# To get a token, you go to your (Invenio) user's profile, in "Application", and get a token;
# See https://inveniordm.docs.cern.ch/reference/rest_api_index/ for further details.

# pub = InvenioAPI('data.europlanet-gmap.eu')
pub = InvenioAPI('10.72.2.28', 
                 token='I1c9WD9pPW3lHUW2HsBhIHhTvgQse7DZNxmXIrgLs0ijdQTUxWwKR1xGuncN')

In [3]:
# Let's just read all the records for a basic check/view

res = pub.read_records()
# print(res)
res.records[0]



{'id': 'hb6va-6c834',
 'revision_id': 3,
 'is_published': True,
 'versions': {'index': 1, 'is_latest': True},
 'access': {'status': 'metadata-only',
  'files': 'public',
  'record': 'public',
  'embargo': {'active': False, 'reason': None}},
 'parent': {'id': '4h3r5-63x63'},
 'files': {'enabled': False, 'order': []},
 'metadata': {'creators': [{'person_or_org': {'given_name': 'C.M.',
     'type': 'personal',
     'name': 'Fortezzo, C.M.',
     'family_name': 'Fortezzo'}},
   {'person_or_org': {'given_name': 'P. D.',
     'type': 'personal',
     'name': 'Spudis, P. D.',
     'family_name': 'Spudis'}},
   {'person_or_org': {'given_name': 'S. L.',
     'type': 'personal',
     'name': 'Harrel, S. L.',
     'family_name': 'Harrel'}}],
  'description': 'This new work represents a seamless, globally consistent, 1:5,000,000-scale geologic map derived from \n the six digitally renovated geologic maps (see Source Online Linkage below). The goal of this project was to \ncreate a digital resource

In [4]:
# Load "our" metadata (extracted from Astropedia in notebook "astropedia_product_parse".
# Then, create an Invencio payload/package

import json

with open('Unified_Geologic_Map_of_the_Moon_GIS_v2_OurMeta.json') as fp:
    js = json.load(fp)

data = InvenioAstropedia(**js)

data.asdict()

{'title': 'Unified Geologic Map of the Moon, 1:5M, 2020',
 'date_pub': '20200303',
 'origin': 'USGS Astrogeology Science Center',
 'url': 'https://astrogeology.usgs.gov/search/map/Moon/Geology/Unified_Geologic_Map_of_the_Moon_GIS_v2',
 'description': 'This new work represents a seamless, globally consistent, 1:5,000,000-scale geologic map derived from \n the six digitally renovated geologic maps (see Source Online Linkage below). The goal of this project was to \ncreate a digital resource for science research and analysis, future geologic mapping efforts, be it local-, regional-, \nor global-scale products, and as a resource\n for the educators and the public interested in lunar geology. Here we present the completed mapping \n project as unit contacts, geologic unit polygons, linear features, and unit and feature nomenclature \n annotation. The product overlies shaded-relief products derived from SELENE Kaguya terrain camera stereo \n (equatorial, ~60 m/pix) and LOLA altimetry (north 

In [5]:
# Create record (draft)

res = pub.create_draft(data)

res



{'id': '7413q-fjq04',
 'revision_id': 4,
 'expires_at': '2022-04-20 00:10:39.156924',
 'is_published': False,
 'versions': {'index': 1, 'is_latest_draft': True, 'is_latest': False},
 'access': {'status': 'metadata-only',
  'files': 'public',
  'record': 'public',
  'embargo': {'active': False, 'reason': None}},
 'parent': {'id': 'q02dh-9a396',
  'access': {'links': [], 'owned_by': [{'user': 2}]}},
 'files': {'enabled': True, 'order': []},
 'metadata': {'creators': [{'person_or_org': {'given_name': 'C.M.',
     'type': 'personal',
     'name': 'Fortezzo, C.M.',
     'family_name': 'Fortezzo'}},
   {'person_or_org': {'given_name': 'P. D.',
     'type': 'personal',
     'name': 'Spudis, P. D.',
     'family_name': 'Spudis'}},
   {'person_or_org': {'given_name': 'S. L.',
     'type': 'personal',
     'name': 'Harrel, S. L.',
     'family_name': 'Harrel'}}],
  'description': 'This new work represents a seamless, globally consistent, 1:5,000,000-scale geologic map derived from \n the six dig

In [6]:
# Open a browser tab at just-created record

import webbrowser

url = res['links']['self_html']
webbrowser.open(url)

True