-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
issue: #10 Mint DOI on record publication [more]
- Serialize record metadata for DataCite - Fixed hidden bug: RequestError not imported - Mint DOI externally via asynchronous Celery task * Set PIDStatus to REGISTERED * Set retrieved DOI in PersistentIdentifier - Display DOI on Record Page - Load different environment variables in test and live - Set Flask recognized configuration variables - Make invenio_{records,deposit}_ui.recid url generation be request independent - Closes: #10 ; Closes: #269 ; Closes: #277
- Loading branch information
Showing
28 changed files
with
1,011 additions
and
341 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
"""Digital Object Identifier module.""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# -*- coding: utf-8 -*- | ||
# | ||
# Copyright (C) 2018 NU,FSM,GHSL. | ||
# | ||
# This is free software; you can redistribute it and/or modify it | ||
# under the terms of the MIT License; see LICENSE file for more details. | ||
|
||
"""Flask extension for DOI.""" | ||
|
||
|
||
class DigitalObjectIdentifier(object): | ||
"""Digital Object Identifier extension.""" | ||
|
||
def __init__(self, app=None): | ||
"""Extension initialization.""" | ||
if app: | ||
self.init_app(app) | ||
|
||
def init_app(self, app): | ||
"""Flask application initialization.""" | ||
app.extensions['cd2h-doi'] = self |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
"""DOI links.""" | ||
|
||
|
||
def doi_url_for(doi_value): | ||
"""Return the URL for the DOI.""" | ||
return 'https://doi.org/' + str(doi_value).strip('/') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
"""Local DOI minter.""" | ||
|
||
from flask import current_app | ||
from invenio_pidstore.models import PersistentIdentifier, PIDStatus | ||
|
||
|
||
def mint_record_doi(record_uuid, data): | ||
"""Mint doi PersistentIdentifier in an initial New state. | ||
Because DOIs are minted by an external service, we create a PID for | ||
tracking purposes, but do not mark it as Registered NOR do we specify the | ||
final DOI value upfront. A unique and temporary doi value is put in the DB | ||
until the real (final) DOI value is minted and provided to us by the | ||
external service. For that reason, we don't pass that DOI value back in | ||
`data`. | ||
An asynchronous task will update this PID with results from the external | ||
service. | ||
A doi PersistentIdentifier can only be minted if the Record has an | ||
associated recid PersistentIdentifier and it has not been doi minted | ||
before. | ||
:param record_uuid: Record object uuid | ||
:param data: Record object as dict (or dict-like). | ||
:returns: doi PersistentIdentifier | ||
""" | ||
recid_field = current_app.config['PIDSTORE_RECID_FIELD'] | ||
assert recid_field in data and 'doi' not in data | ||
pid = PersistentIdentifier.create( | ||
'doi', | ||
data['id'], # This is a purposefully unique but temporary value | ||
pid_provider='datacite', | ||
object_type='rec', | ||
object_uuid=record_uuid, | ||
status=PIDStatus.NEW, | ||
) | ||
data['doi'] = '' | ||
return pid |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
"""JSON Schemas.""" | ||
from datetime import date | ||
|
||
from flask import current_app | ||
from marshmallow import Schema, fields | ||
|
||
|
||
class DataCiteResourceTypeSchemaV4(Schema): | ||
"""ResourceType schema.""" | ||
|
||
resourceTypeGeneral = fields.Method('get_general_resource_type') | ||
resourceType = fields.Method('get_specific_resource_type') | ||
|
||
def get_general_resource_type(self, resource_type): | ||
"""Extract general_resource_type. | ||
TODO: Settle on general resource types and use those. | ||
We just provide a default for now. | ||
""" | ||
return resource_type.get('general', 'Dataset') | ||
|
||
def get_specific_resource_type(self, resource_type): | ||
"""Extract specific resource type. | ||
TODO: Settle on specific resource types (if any) and use those. | ||
We just provide a default for now. | ||
""" | ||
return resource_type.get('specific', 'Dataset') | ||
|
||
|
||
class DataCiteTitleSchemaV4(Schema): | ||
"""Title schema.""" | ||
|
||
title = fields.Str() | ||
|
||
|
||
class DataCiteCreatorSchemaV4(Schema): | ||
"""Creator schema.""" | ||
|
||
# Note: Marshmallow doesn't try to automatically extract a field | ||
# corresponding to a fields.Method. | ||
creatorName = fields.Method('get_creator_name') | ||
# TODO optional: | ||
# givenName | ||
# familyName | ||
|
||
def get_creator_name(self, author): | ||
"""Extract creator name.""" | ||
name_parts = author.strip().split() | ||
if len(name_parts) >= 2: | ||
return "{last_name}, {first_name}".format( | ||
last_name=name_parts[-1], first_name=name_parts[0]) | ||
else: | ||
return '' | ||
|
||
|
||
class DataCiteSchemaV4(Schema): | ||
"""Schema for DataCite Metadata. | ||
For now, only the minimum required fields are implemented. In the future, | ||
we may want to include optional fields as well. | ||
Fields and subfields are based on | ||
schema.datacite.org/meta/kernel-4.1/doc/DataCite-MetadataKernel_v4.1.pdf | ||
""" | ||
|
||
identifier = fields.Method( | ||
'get_identifier', | ||
attribute='metadata.doi', | ||
dump_only=True) | ||
creators = fields.List( | ||
fields.Nested(DataCiteCreatorSchemaV4), | ||
attribute='metadata.author', | ||
dump_only=True) | ||
titles = fields.List( | ||
fields.Nested(DataCiteTitleSchemaV4), | ||
attribute='metadata', | ||
dump_only=True) | ||
publisher = fields.Method('get_publisher', dump_only=True) | ||
publicationYear = fields.Method('get_year', dump_only=True) | ||
resourceType = fields.Nested( | ||
DataCiteResourceTypeSchemaV4, | ||
attribute='metadata', # TODO: 'metadata.resource_type' when added | ||
dump_only=True) | ||
|
||
def get_identifier(self, obj): | ||
"""Get record main identifier.""" | ||
return { | ||
'identifier': obj['metadata'].get('doi', ''), | ||
'identifierType': 'DOI' | ||
} | ||
|
||
def get_publisher(self, data): | ||
"""Extract publisher.""" | ||
return current_app.config['DOI_PUBLISHER'] | ||
|
||
def get_year(self, data): | ||
"""Extract year. | ||
Current year for now. | ||
TODO: Revisit when dealing with embargo. | ||
""" | ||
return date.today().year |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"""DOI serializer for external service.""" | ||
|
||
from invenio_records_rest.serializers.datacite import DataCite41Serializer | ||
|
||
from .schemas import DataCiteSchemaV4 | ||
|
||
# Datacite format serializer | ||
datacite_v41 = DataCite41Serializer(DataCiteSchemaV4, replace_refs=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
"""CD2H Celery tasks.""" | ||
import re | ||
|
||
from celery import shared_task | ||
from datacite.client import DataCiteMDSClient | ||
from datacite.errors import DataCiteError, HttpError | ||
from elasticsearch.exceptions import RequestError | ||
from flask import current_app, url_for | ||
from invenio_db import db | ||
from invenio_indexer.api import RecordIndexer | ||
from invenio_pidstore.models import PersistentIdentifier, PIDStatus | ||
from invenio_records_files.api import Record | ||
|
||
from cd2h_repo_project.modules.doi.serializers import datacite_v41 | ||
from cd2h_repo_project.modules.records.links import ( | ||
url_for_record_ui_recid_external | ||
) | ||
from cd2h_repo_project.modules.records.resolvers import record_resolver | ||
|
||
|
||
def extract_doi(status_str): | ||
"""Extract minted DOI from response. | ||
Exceptions percolate. | ||
""" | ||
return re.search("\(([-.a-zA-Z0-9\/]+)\)$", status_str).group(1) | ||
|
||
|
||
@shared_task(ignore_result=True, max_retries=6, default_retry_delay=10 * 60, | ||
rate_limit='100/m') | ||
def register_doi(recid_pid_value): | ||
"""External DOI registration task. | ||
This asynchronous task mints a DOI with the external service and | ||
stores it in the local doi PID. It will retry a `max_retries` number of | ||
times if the service is down. It will not retry for other errors | ||
(internal ones). | ||
`default_retry_delay` is in seconds. | ||
:param recid_pid_value: pid_value of recid PID for the target record. | ||
Note that this pid_value is also the pid_value of | ||
the doi PID associated with the target record if | ||
it has not been DOI minted yet. | ||
""" | ||
try: | ||
pid, record = record_resolver.resolve(str(recid_pid_value)) | ||
|
||
doi_pid_value = record.get('doi') or recid_pid_value | ||
doi_pid = PersistentIdentifier.get( | ||
pid_type='doi', pid_value=doi_pid_value) | ||
|
||
client = DataCiteMDSClient( | ||
username=current_app.config['PIDSTORE_DATACITE_USERNAME'], | ||
password=current_app.config['PIDSTORE_DATACITE_PASSWORD'], | ||
prefix=current_app.config['PIDSTORE_DATACITE_DOI_PREFIX'], | ||
test_mode=current_app.config['PIDSTORE_DATACITE_TESTMODE'], | ||
url=current_app.config['PIDSTORE_DATACITE_URL'] | ||
) | ||
|
||
# Mint DOI | ||
serialized_record = datacite_v41.serialize(doi_pid, record) | ||
result = client.metadata_post(serialized_record) | ||
|
||
# Associate landing page to DOI on DataCite if new | ||
if doi_pid.status == PIDStatus.NEW: | ||
minted_doi = extract_doi(result) | ||
landing_url = url_for_record_ui_recid_external(recid_pid_value) | ||
result = client.doi_post(minted_doi, landing_url) | ||
|
||
# Update doi_pid | ||
doi_pid.pid_value = minted_doi | ||
doi_pid.register() | ||
record['doi'] = minted_doi | ||
record.commit() | ||
# Necessary but unclear why: (TODO: Investigate) | ||
# - call to record.commit() is done above and | ||
# - tests don't need it | ||
db.session.commit() | ||
|
||
# Re-index record | ||
RecordIndexer().index(record) | ||
|
||
except (HttpError, DataCiteError) as e: | ||
register_doi.retry(exc=e) | ||
except RequestError: | ||
current_app.logger.exception('Could not index {}.'.format(record)) |
Oops, something went wrong.