# Create InvenioRDM package from Astropedia

- https://astrogeology.usgs.gov/search/map/Moon/Geology/Unified_Geologic_Map_of_the_Moon_GIS_v2.xml
- https://inveniordm.docs.cern.ch/install/run/


In [47]:
# Code block

import json
import requests
from dataclasses import dataclass, asdict

class InvenioResults:
    pass

class InvenioAPI:
    """
    Client for Invenio(RDM) API
    """
    _scheme = 'https'
    _hostname = 'localhost' #'127.0.0.1:5000'
    _path_api = '/api'
    _token = None

    def __init__(self, hostname:str, token:str=None):
        self._hostname = hostname
        self._token = token

    def read_records(self) -> InvenioResults:
        """
        Read all records from server, return InvenioResults object
        """
        path_ext = '/records'
        res = self._get(path_ext)
        js = res.json()
        return InvenioResults(js)
        
    def create_draft(self, payload) -> dict:
        """
        Create draft (see publish_draft() for publishing it)
        """
        assert isinstance(payload, InvenioAstropedia)

        # Create draft
        path_ext = '/records'
        data_record = payload.create_record_payload()
        
        resp_record = self._post(path_ext, json.dumps(data_record))
        print("Create draft:", resp_record.json())
        
        # Upload files
        draft_id = resp_record.json()['id']
        
        # 1) initialize file key(s)
        #
        path_ext = f'/records/{draft_id}/draft/files'
        data_files = payload.create_files_payload()
        
        if data_files:
            resp_files = self._post(path_ext, json.dumps(data_files))
            print("Declare files:", resp_files.json())

            # 2) push files data
            #
            for obj in data_files:
                key = obj['key']
                # push data
                path_ext = f"/records/{draft_id}/draft/files/{key}/content"
                data = payload.read_file(key)
                resp_file = self._put(path_ext, data)
                print(f"Pushed file {key}", resp_file.json())
                # commit
                path_ext = f"/records/{draft_id}/draft/files/{key}/commit"
                resp_commit = self._post(path_ext, None)
                print(f"Commit file {key}", resp_commit.json())
                data = None

        # End) let's read what's in there now
        # path_ext = f"/records/{draft_id}/draft"
        # res = self._get(path_ext)
        return resp_record.json()
            
    def publish_draft(self, draft_id):
        """
        Publish a previously created draft (see create_record())
        """
        path_ext = f"/records/{draft_id}/draft/actions/publish"
        res = self._post(path_ext)
        js = res.json()
        return js
    
    def delete_draft(self, draft_id):
        """
        Delete draft
        """
        path_ext = f"/records/{draft_id}/draft"
        res = self._delete(path_ext)
        js = res.json()
        return js

    def _url(self, path_ext=''):
        path = self._path_api + path_ext
        return f"{self._scheme}://{self._hostname}{path}"
        
    def _headers(self, content_type:str='application/json'):
        hdr = {'Authorization': f"Bearer {self._token}",
               'Content-Type': content_type}
        return hdr
        
    def _get(self, path_ext, params=None):
        base_url = self._url(path_ext)
        return requests.get(base_url, params=params, verify=False)
    
    def _post(self, path_ext, payload=None):
        base_url = self._url(path_ext)
        return requests.post(base_url, data=payload, 
                             headers=self._headers(), verify=False)
    
    def _put(self, path_ext, payload=None):
        content_type:str='application/octet-stream'
        base_url = self._url(path_ext)
        return requests.put(base_url, data=payload, 
                            headers=self._headers(content_type), verify=False)
    
    def _delete(self, path_ext):
        base_url = self._url(path_ext)
        return requests.delete(base_url)
    


class InvenioResults:
    """
    Handle results from GET records
    """
    def __init__(self, records_json):
        self._js = records_json
        self._hits = self._js['hits']['hits']
        
    def __len__(self):
        return int(self._js['hits']['total'])
    count = __len__
    
    def __str__(self):
        return json.dumps(self._js, indent=2)
    
    @property
    def records(self):
        return self._hits
    
    @property
    def links(self):
        return self._js['links']
    
    @property
    def aggregations(self):
        return self._js['aggregations']



@dataclass
class InvenioAstropedia:
    """
    Formatter from our/astropedia metadata to invenio-rdm records
    """
    title: str
    date_pub: str
    origin: str
    url: str
    description: str
    authors: str
    document_url: str
    status: str
    bounding_box: dict
    scope: str
    browse: str
    product_url: str
    
    _RECORD_TEMPLATE = {
      "access": {
        "record": "public",
        "files": "public"
      },
      "files": {
        "enabled": True
      },
      "metadata": {
      }
    }
    
    def __post_init__(self):
        from os import path
        files = {}
        for f in [self.document_url, self.browse]:
            if f:
                files.update({path.basename(f): f})
        self._files = files
        
    def asdict(self):
        return asdict(self)
    
    to_dict = asdict
    
    def read_file(self, key):
        url = self._files.get(key)
        try:
            resp = requests.get(url)
            resp.raise_for_status()
        except Exception as err:
            print(f"Request for '{url}' failed, code: {resp.status_code}")
            return None
        
        return resp.content

    def create_files_payload(self):
        """
        Return array of `{'key':<filename>}` objects
        (See https://inveniordm.docs.cern.ch/reference/rest_api_drafts_records
        """
        payload = [] # "entries"
        preview = None
        for key in self._files.keys():
            payload.append({ 'key': key })
        
        return payload
        
    def create_record_payload(self) -> dict:
        """
        Return json data for InvenioRDM record draft
        (See https://inveniordm.docs.cern.ch/reference/rest_api_drafts_records
        """
        def _creators(authors:list, person_or_org:list=None):
            """
            Define list of creators (authors)
            """
            out = []
            person_or_org = person_or_org if person_or_org else ['person']*len(authors)
            for name,p_o in zip(authors,person_or_org):
                if p_o == 'org':
                    crt = {'name': f"{name}", 
                            'type': 'organizational'
                          }
                else:
                    assert p_o == 'person'
                    f_name, g_name = name.split(',')
                    crt = {'family_name': f"{f_name}", 
                            'given_name': f"{g_name}", 
                            'type': 'personal'
                          }
                    
                out.append({'person_or_org': crt})
            return out
        
        def _publication_date(date_string:str):
            from dateutil.parser import isoparse
            return isoparse(date_string).date().isoformat()
        
        def _description(description, **kwargs):
            if kwargs:
                sup_info = "\n<b>Extra:</b>\n"
                sup_info += "<ul>"
                for k,v in kwargs.items():
                    if k == 'bounding_box':
                        _sub = "Bounding-Box:"
                        _sub += "<ul>"
                        _sub += ("<li>"
                                 f"{', '.join(str(k_)+' = '+str(v_) for k_,v_ in v.items())}"
                                 "</li>")
                        _sub += "</ul>"
                    else:
                        _sub = f"{k.title().replace('_',' ')}:"
                        _sub += "<ul>"
                        if isinstance(v, str) and v.startswith('http'):
                            _sub += f"<li><a href='{str(v)}'>{str(v)}</a></li>"
                        else:
                            _sub += f"<li>{str(v)}</li>"
                        _sub += "</ul>"
                    sup_info += f"<li>{_sub}</li>"                
                sup_info += "</ul>"
                description += sup_info
                
            description = (description.replace('<b>', '<p/><b>')
                                      .replace('\n\n', '<br>')
                                      .replace('\n',''))
            return description
        
        def _identifiers(url):
            return [{
                'identifier': url,
                'scheme': 'url'
            }]
                
        payload = self._RECORD_TEMPLATE.copy()
        creators = _creators(self.authors)
        publisher = self.origin
        publication_date = _publication_date(self.date_pub)
        resource_type = {'id': 'dataset'}
        title = self.title
        description = _description(
            description=self.description, 
            bounding_box=self.bounding_box,
            product_page=self.url
        )        
        files = {'enabled': bool(len(self._files))}
        identifiers = _identifiers(url=self.url)
        
        payload.update({
            'metadata': {
                'creators': creators,
                'publisher': publisher,
                'publication_date': publication_date,
                'resource_type': resource_type,
                'title': title,
                'description': description,
                'identifiers': identifiers
            },
            'files': files
        })
        
        return payload


In [48]:
%env IRDM_HOSTNAME=10.72.2.28
%env IRDM_USER_TOKEN=I1c9WD9pPW3lHUW2HsBhIHhTvgQse7DZNxmXIrgLs0ijdQTUxWwKR1xGuncN
# %env IRDM_HOSTNAME=data.europlanet-gmap.eu
# %env IRDM_USER_TOKEN=RFpFvUl8m2PWnjhqdzd9tPkw4tZruNrgC032wyrhmf2G8iOxSC2y8S7WiZ4g

env: IRDM_HOSTNAME=10.72.2.28
env: IRDM_USER_TOKEN=I1c9WD9pPW3lHUW2HsBhIHhTvgQse7DZNxmXIrgLs0ijdQTUxWwKR1xGuncN


In [49]:
# Create client for our instance. 
# If we would only read, there is no need for token.
# If we want to create/update records, we need a token;
# To get a token, you go to your (Invenio) user's profile, in "Application", and get a token;
# See https://inveniordm.docs.cern.ch/reference/rest_api_index/ for further details.

# %env IRDM_HOSTNAME=10.72.2.28
# %env IRDM_USER_TOKEN=I1c9WD9pPW3lHUW2HsBhIHhTvgQse7DZNxmXIrgLs0ijdQTUxWwKR1xGuncN

import os
hostname = os.environ.get('IRDM_HOSTNAME')
token = os.environ.get('IRDM_USER_TOKEN')

#print(hostname, token)

# pub = InvenioAPI('data.europlanet-gmap.eu')
pub = InvenioAPI(hostname, token)

In [50]:
# Let's just read all the records for a basic check/view

res = pub.read_records()
# print(res)
# res.records[0]



In [51]:
# Load "our" metadata (extracted from Astropedia in notebook "astropedia_product_parse".
# Then, create an Invencio payload/package

import json

with open('Unified_Geologic_Map_of_the_Moon_GIS_v2_OurMeta.json') as fp:
    js = json.load(fp)

data = InvenioAstropedia(**js)

# data.asdict()

In [52]:
# Create record (draft)

res = pub.create_draft(data)

# res



Create draft: {'id': 'tj9ak-4aj61', 'revision_id': 4, 'expires_at': '2022-04-20 21:27:31.875749', 'is_published': False, 'versions': {'index': 1, 'is_latest_draft': True, 'is_latest': False}, 'access': {'status': 'metadata-only', 'files': 'public', 'record': 'public', 'embargo': {'active': False, 'reason': None}}, 'parent': {'id': 'jxzpc-7sv76', 'access': {'links': [], 'owned_by': [{'user': 2}]}}, 'files': {'enabled': True, 'order': []}, 'metadata': {'creators': [{'person_or_org': {'given_name': 'C.M.', 'type': 'personal', 'name': 'Fortezzo, C.M.', 'family_name': 'Fortezzo'}}, {'person_or_org': {'given_name': 'P. D.', 'type': 'personal', 'name': 'Spudis, P. D.', 'family_name': 'Spudis'}}, {'person_or_org': {'given_name': 'S. L.', 'type': 'personal', 'name': 'Harrel, S. L.', 'family_name': 'Harrel'}}], 'description': 'This new work represents a seamless, globally consistent, 1:5,000,000-scale geologic map derived from  the six digitally renovated geologic maps (see Source Online Linkage



Declare files: {'enabled': True, 'links': {'self': 'https://10.72.2.28/api/records/tj9ak-4aj61/draft/files'}, 'entries': [{'key': '2760.pdf', 'status': 'pending', 'metadata': None, 'links': {'self': 'https://10.72.2.28/api/records/tj9ak-4aj61/draft/files/2760.pdf', 'content': 'https://10.72.2.28/api/records/tj9ak-4aj61/draft/files/2760.pdf/content', 'commit': 'https://10.72.2.28/api/records/tj9ak-4aj61/draft/files/2760.pdf/commit'}, 'updated': '2022-04-20T21:27:32.646936+00:00', 'created': '2022-04-20T21:27:32.644529+00:00'}, {'key': 'merc_Unified_Geologic_Map_of_The_Moon_1024.jpg', 'status': 'pending', 'metadata': None, 'links': {'self': 'https://10.72.2.28/api/records/tj9ak-4aj61/draft/files/merc_Unified_Geologic_Map_of_The_Moon_1024.jpg', 'content': 'https://10.72.2.28/api/records/tj9ak-4aj61/draft/files/merc_Unified_Geologic_Map_of_The_Moon_1024.jpg/content', 'commit': 'https://10.72.2.28/api/records/tj9ak-4aj61/draft/files/merc_Unified_Geologic_Map_of_The_Moon_1024.jpg/commit'}, '



Pushed file 2760.pdf {'key': '2760.pdf', 'status': 'pending', 'metadata': None, 'links': {'self': 'https://10.72.2.28/api/records/tj9ak-4aj61/draft/files/2760.pdf', 'content': 'https://10.72.2.28/api/records/tj9ak-4aj61/draft/files/2760.pdf/content', 'commit': 'https://10.72.2.28/api/records/tj9ak-4aj61/draft/files/2760.pdf/commit'}, 'updated': '2022-04-20T21:27:32.646936+00:00', 'created': '2022-04-20T21:27:32.644529+00:00'}




Commit file 2760.pdf {'key': '2760.pdf', 'status': 'completed', 'file_id': 'dd561cc6-bf1a-4d39-b4ae-0974cea5b68d', 'checksum': 'md5:57ea5bdc1733472994f3b03ea81f1121', 'storage_class': 'S', 'mimetype': 'application/pdf', 'size': 926350.0, 'version_id': 'bd1fb483-03b1-45a3-a8c3-cdfc73d8380e', 'bucket_id': '5bfbbb08-30b1-4259-a29d-13ae7ddc9cd1', 'metadata': None, 'links': {'self': 'https://10.72.2.28/api/records/tj9ak-4aj61/draft/files/2760.pdf', 'content': 'https://10.72.2.28/api/records/tj9ak-4aj61/draft/files/2760.pdf/content', 'commit': 'https://10.72.2.28/api/records/tj9ak-4aj61/draft/files/2760.pdf/commit'}, 'updated': '2022-04-20T21:28:10.082077+00:00', 'created': '2022-04-20T21:27:32.644529+00:00'}




Pushed file merc_Unified_Geologic_Map_of_The_Moon_1024.jpg {'key': 'merc_Unified_Geologic_Map_of_The_Moon_1024.jpg', 'status': 'pending', 'metadata': None, 'links': {'self': 'https://10.72.2.28/api/records/tj9ak-4aj61/draft/files/merc_Unified_Geologic_Map_of_The_Moon_1024.jpg', 'content': 'https://10.72.2.28/api/records/tj9ak-4aj61/draft/files/merc_Unified_Geologic_Map_of_The_Moon_1024.jpg/content', 'commit': 'https://10.72.2.28/api/records/tj9ak-4aj61/draft/files/merc_Unified_Geologic_Map_of_The_Moon_1024.jpg/commit'}, 'updated': '2022-04-20T21:27:32.652609+00:00', 'created': '2022-04-20T21:27:32.650553+00:00'}
Commit file merc_Unified_Geologic_Map_of_The_Moon_1024.jpg {'key': 'merc_Unified_Geologic_Map_of_The_Moon_1024.jpg', 'status': 'completed', 'file_id': 'f0a64aa4-ee0c-4c18-bfe4-ba80c7807536', 'checksum': 'md5:0602a41849b0a4ee33a4b3bc07ff2535', 'storage_class': 'S', 'mimetype': 'image/jpeg', 'size': 1103510.0, 'version_id': '9cccbf17-b518-4bee-9447-c0a1be327780', 'bucket_id': '5b



{'id': 'tj9ak-4aj61',
 'revision_id': 4,
 'expires_at': '2022-04-20 21:27:31.875749',
 'is_published': False,
 'versions': {'index': 1, 'is_latest_draft': True, 'is_latest': False},
 'access': {'status': 'metadata-only',
  'files': 'public',
  'record': 'public',
  'embargo': {'active': False, 'reason': None}},
 'parent': {'id': 'jxzpc-7sv76',
  'access': {'links': [], 'owned_by': [{'user': 2}]}},
 'files': {'enabled': True, 'order': []},
 'metadata': {'creators': [{'person_or_org': {'given_name': 'C.M.',
     'type': 'personal',
     'name': 'Fortezzo, C.M.',
     'family_name': 'Fortezzo'}},
   {'person_or_org': {'given_name': 'P. D.',
     'type': 'personal',
     'name': 'Spudis, P. D.',
     'family_name': 'Spudis'}},
   {'person_or_org': {'given_name': 'S. L.',
     'type': 'personal',
     'name': 'Harrel, S. L.',
     'family_name': 'Harrel'}}],
  'description': 'This new work represents a seamless, globally consistent, 1:5,000,000-scale geologic map derived from  the six digit

In [53]:
# Open a browser tab at just-created record

import webbrowser

url = res['links']['self_html']
webbrowser.open(url)

True

In [45]:
# r = requests.get('https://10.72.2.28/api/vocabularies/resourcetypes', verify=False)
# # r = requests.get('https://10.72.2.28/api/subjects', verify=False)

# r.json()

In [55]:
pub.publish_draft(res['id'])



{'id': 'tj9ak-4aj61',
 'revision_id': 3,
 'is_published': True,
 'versions': {'index': 1, 'is_latest_draft': True, 'is_latest': True},
 'access': {'status': 'open',
  'files': 'public',
  'record': 'public',
  'embargo': {'active': False, 'reason': None}},
 'parent': {'id': 'jxzpc-7sv76',
  'access': {'links': [], 'owned_by': [{'user': 2}]}},
 'files': {'enabled': True, 'order': []},
 'metadata': {'creators': [{'person_or_org': {'given_name': 'C.M.',
     'type': 'personal',
     'name': 'Fortezzo, C.M.',
     'family_name': 'Fortezzo'}},
   {'person_or_org': {'given_name': 'P. D.',
     'type': 'personal',
     'name': 'Spudis, P. D.',
     'family_name': 'Spudis'}},
   {'person_or_org': {'given_name': 'S. L.',
     'type': 'personal',
     'name': 'Harrel, S. L.',
     'family_name': 'Harrel'}}],
  'description': 'This new work represents a seamless, globally consistent, 1:5,000,000-scale geologic map derived from  the six digitally renovated geologic maps (see Source Online Linkage 