Skip to content

Commit

Permalink
loaders: handle non-OAI-PMH XML dumps
Browse files Browse the repository at this point in the history
  • Loading branch information
slint committed Jul 30, 2018
1 parent a9a06b0 commit f991adc
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 12 deletions.
1 change: 1 addition & 0 deletions invenio_openaire/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
}

OPENAIRE_OAIPMH_NAMESPACES = {
'dri': 'http://www.driver-repository.eu/namespace/dri',
'oai': 'http://www.openarchives.org/OAI/2.0/',
'oaf': 'http://namespace.openaire.eu/oaf',
}
Expand Down
25 changes: 14 additions & 11 deletions invenio_openaire/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,13 +176,20 @@ def fundertree2json(self, tree, oai_id):
def grantxml2json(self, grant_xml):
"""Convert OpenAIRE grant XML into JSON."""
tree = etree.fromstring(grant_xml)
ptree = self.get_subtree(
tree, '/oai:record/oai:metadata/oaf:entity/oaf:project')[0]
# XML harvested from OAI-PMH has a different format/structure
if tree.prefix == 'oai':
ptree = self.get_subtree(
tree, '/oai:record/oai:metadata/oaf:entity/oaf:project')[0]
header = self.get_subtree(tree, '/oai:record/oai:header')[0]
oai_id = self.get_text_node(header, 'oai:identifier')
modified = self.get_text_node(header, 'oai:datestamp')
else:
ptree = self.get_subtree(
tree, '/record/result/metadata/oaf:entity/oaf:project')[0]
header = self.get_subtree(tree, '/record/result/header')[0]
oai_id = self.get_text_node(header, 'dri:objIdentifier')
modified = self.get_text_node(header, 'dri:dateOfTransformation')

oai_id = self.get_text_node(
tree, '/oai:record/oai:header/oai:identifier')
modified = self.get_text_node(
tree, '/oai:record/oai:header/oai:datestamp')
url = self.get_text_node(ptree, 'websiteurl')
code = self.get_text_node(ptree, 'code')
title = self.get_text_node(ptree, 'title')
Expand Down Expand Up @@ -271,14 +278,10 @@ def _count(self):
def iter_grants(self, as_json=True):
"""Fetch records from the SQLite database."""
self._connect()
n_grants, = self.db_connection.cursor().execute(
"SELECT COUNT(1) from grants").fetchone()
n_grants = self._count()
result = self.db_connection.cursor().execute(
"SELECT data, format FROM grants"
)
for _ in range(n_grants):
data, data_format = result.fetchone()
for data, data_format in result:
if (not as_json) and data_format == 'json':
raise Exception("Cannot convert JSON source to XML output.")
elif as_json and data_format == 'xml':
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@
from flask.cli import ScriptInfo
from flask_login import LoginManager
from invenio_celery import InvenioCelery
from invenio_db import db as db_
from invenio_db import InvenioDB
from invenio_db import db as db_
from invenio_indexer import InvenioIndexer
from invenio_indexer.api import RecordIndexer
from invenio_jsonschemas import InvenioJSONSchemas
Expand Down

0 comments on commit f991adc

Please sign in to comment.