Skip to content

Commit

Permalink
providers: provide DataCite-like DOI locally
Browse files Browse the repository at this point in the history
- Generate a random, configurable length,
  base32, URI-friendly, hyphen-separated,
  optionally checksummed DOI
  • Loading branch information
fenekku committed Nov 4, 2019
1 parent 8060c1b commit 058dc1c
Show file tree
Hide file tree
Showing 7 changed files with 153 additions and 81 deletions.
21 changes: 15 additions & 6 deletions invenio_pidstore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,20 +210,29 @@
Providers
---------
Providers adds extra functionality persistent identifiers. Use cases for this
includes automatically creating the persistent identifier or retrieving the
persistent identifier from an external service.
Providers wrap the creation of persistent identifiers with extra functionality.
Use cases for this include automatically creating the persistent identifier or
retrieving the persistent identifier from an external service.
PIDStore comes by default with two providers:
:py:class:`invenio_pidstore.providers.recordid.RecordIdProvider` which
creates Invenio legacy integer record identifiers and
:py:class:`invenio_pidstore.providers.datacite.DataCiteProvider` which
creates checksummed random alphanumeric 10-character strings:
PIDStore comes by default with a
:py:class:`invenio_pidstore.providers.recordid.RecordIdProvider` which will
create Invenio legacy integer record identifiers:
>>> from invenio_pidstore.providers.recordid import RecordIdProvider
>>> provider = RecordIdProvider.create()
>>> provider.pid.pid_type
'recid'
>>> provider.pid.pid_value
'1'
>>> from invenio_pidstore.providers.datacite import DataCiteProvider
>>> provider = DataCiteProvider.create()
>>> provider.pid.pid_type
'doi'
>>> provider.pid.pid_value
'3sbk2-5j060'
Creating your own provider
~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
7 changes: 7 additions & 0 deletions invenio_pidstore/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,10 @@
This name will be used by the fetcher, to retrieve the record ID value from the
JSON, and by the minter, to store it inside the JSON.
"""


PIDSTORE_DATACITE_DOI_OPTIONS = {
'suffix_length': 10,
'split_every': 5,
'checksum': True
}
55 changes: 53 additions & 2 deletions invenio_pidstore/providers/datacite.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2015-2018 CERN.
# Copyright (C) 2015-2019 CERN.
# Copyright (C) 2019 Northwestern University.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
Expand All @@ -10,6 +11,11 @@

from __future__ import absolute_import

import os
import random
import re

from base32_lib import base32
from datacite import DataCiteMDSClient
from datacite.errors import DataCiteError, DataCiteGoneError, \
DataCiteNoContentError, DataCiteNotFoundError, HttpError
Expand All @@ -31,20 +37,65 @@ class DataCiteProvider(BaseProvider):
default_status = PIDStatus.NEW
"""Default status for newly created PIDs by this provider."""

doi_prefix_regexp = re.compile(
r"10\.\d+(\.\d+)*$"
)

@classmethod
def create(cls, pid_value, **kwargs):
def valid_doi_prefix(cls, prefix):
"""Matches if prefix is a DOI prefix.
NOTE: This was done here to prevent relying on idutils.
"""
return cls.doi_prefix_regexp.match(prefix)

@classmethod
def generate_doi(cls, options=None):
"""Generate DOI with random suffix."""
passed_options = options or {}
# WHY: A new dict needs to be created to prevent side-effects
options = dict(current_app.config.get(
'PIDSTORE_DATACITE_DOI_OPTIONS', {}
))
options.update(passed_options)
prefix = options.get(
'prefix',
current_app.config.get('PIDSTORE_DATACITE_DOI_PREFIX')
)
suffix_length = options.get('suffix_length')
split_every = options.get('split_every')
checksum = options.get('checksum')

if not cls.valid_doi_prefix(prefix):
raise ValueError("Invalid DOI prefix: {}".format(prefix))

suffix = base32.generate(
length=suffix_length,
split_every=split_every,
checksum=checksum
)

return prefix + "/" + suffix

@classmethod
def create(cls, pid_value=None, doi_options=None, **kwargs):
"""Create a new record identifier.
For more information about parameters,
see :meth:`invenio_pidstore.providers.BaseProvider.create`.
:param pid_value: Persistent identifier value.
:param doi_options: ``dict`` with optional keys:
``"prefix", "suffix_length", "checksum", "checksum"``
:params **kwargs: See
:meth:`invenio_pidstore.providers.base.BaseProvider.create` extra
parameters.
:returns: A :class:`invenio_pidstore.providers.DataCiteProvider`
instance.
"""
if pid_value is None:
pid_value = cls.generate_doi(doi_options)

return super(DataCiteProvider, cls).create(
pid_value=pid_value, **kwargs)

Expand Down
4 changes: 3 additions & 1 deletion run-tests.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#!/usr/bin/env sh
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2015-2018 CERN.
# Copyright (C) 2015-2019 CERN.
# Copyright (C) 2019 Northwestern University.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
Expand Down
7 changes: 5 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2015-2018 CERN.
# Copyright (C) 2015-2019 CERN.
# Copyright (C) 2019 Northwestern University.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
Expand All @@ -25,7 +26,7 @@
'Flask-Menu>=0.5.1',
'invenio-access>=1.0.0',
'invenio-accounts>=1.0.0',
'mock>=1.3.0',
'mock>=3.0.0',
'pydocstyle>=1.0.0',
'pytest-cov>=1.8.0',
'pytest-pep8>=1.0.6',
Expand Down Expand Up @@ -70,6 +71,8 @@
install_requires = [
'Flask-BabelEx>=0.9.3',
'Flask>=0.11.1',
'six>=1.12.0',
'base32-lib>=1.0.0a1'
]

packages = find_packages()
Expand Down
69 changes: 0 additions & 69 deletions tests/test_examples_app.py

This file was deleted.

71 changes: 70 additions & 1 deletion tests/test_providers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2015-2018 CERN.
# Copyright (C) 2015-2019 CERN.
# Copyright (C) 2019 Northwestern University.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
Expand Down Expand Up @@ -241,3 +242,71 @@ def test_datacite_sync(logger, app, db):
assert provider.pid.status == PIDStatus.NEW
assert logger.exception.call_args[0][0] == \
"Failed to sync status from DataCite"


def test_datacite_valid_doi_prefix():
assert DataCiteProvider.valid_doi_prefix('10.1234')
assert DataCiteProvider.valid_doi_prefix('10.12.34')
assert not DataCiteProvider.valid_doi_prefix('101234')
assert not DataCiteProvider.valid_doi_prefix('101234.')
assert not DataCiteProvider.valid_doi_prefix('10.1234/')
assert not DataCiteProvider.valid_doi_prefix('100.1234')
assert not DataCiteProvider.valid_doi_prefix('10.12E45')


@patch('invenio_pidstore.providers.datacite.base32')
def test_datacite_generate_doi_calls_base32_generate(patched_base32, app):
original_prefix = app.config.get('PIDSTORE_DATACITE_DOI_PREFIX')
app.config['PIDSTORE_DATACITE_DOI_PREFIX'] = '10.4321'
original_doi_options = app.config.get('PIDSTORE_DATACITE_DOI_OPTIONS')
app.config['PIDSTORE_DATACITE_DOI_OPTIONS'] = {
'suffix_length': 8,
'split_every': 4,
'checksum': False
}

DataCiteProvider.generate_doi()

patched_base32.generate.assert_called_with(
length=8,
split_every=4,
checksum=False
)

app.config['PIDSTORE_DATACITE_DOI_PREFIX'] = original_prefix
app.config['PIDSTORE_DATACITE_DOI_OPTIONS'] = original_doi_options


def test_datacite_generate_doi_prefix(app):
original_doi_options = app.config.get('PIDSTORE_DATACITE_DOI_OPTIONS')
app.config['PIDSTORE_DATACITE_DOI_OPTIONS'] = {
'suffix_length': 8,
'split_every': 4,
'checksum': False
}
# explicit prefix
doi = DataCiteProvider.generate_doi(options={'prefix': '10.1234'})

assert doi.startswith('10.1234/')

# configuration prefix
original_prefix = app.config.get('PIDSTORE_DATACITE_DOI_PREFIX')
app.config['PIDSTORE_DATACITE_DOI_PREFIX'] = '10.4321'

doi = DataCiteProvider.generate_doi()

assert doi.startswith('10.4321/')

app.config['PIDSTORE_DATACITE_DOI_OPTIONS'] = original_doi_options
app.config['PIDSTORE_DATACITE_DOI_PREFIX'] = original_prefix


def test_datacite_provider_create_calls_generate(app, db):
with app.app_context():
provider = DataCiteProvider.create(doi_options={'prefix': '10.1234'})
assert provider.pid.status == PIDStatus.NEW
assert provider.pid.pid_provider == 'datacite'
assert provider.pid.pid_value.startswith('10.1234')

with pytest.raises(ValueError):
DataCiteProvider.create(doi_options={'prefix': '10.123fd4'})

0 comments on commit 058dc1c

Please sign in to comment.