Skip to content

Commit

Permalink
Merge 95c55d4 into 60fca5e
Browse files Browse the repository at this point in the history
  • Loading branch information
tmorrell committed Aug 16, 2019
2 parents 60fca5e + 95c55d4 commit 6b31aed
Show file tree
Hide file tree
Showing 34 changed files with 2,495 additions and 1,484 deletions.
14 changes: 14 additions & 0 deletions CHANGES.rst
@@ -1,5 +1,19 @@
Changes
=======
Version v1.1.0 (TBD):

- Adds full support for DataCite Metadata Schema v4.2 XML generation.
- Uses Official DataCite JSON Schema, which has the following notable changes
from the previous schema:

- Uses "identifiers" which is a combination of the XML "identifier" and
"alternativeIdentifiers" elements
- "creatorName" is now "name"
- "contributorName" is now "name"
- "affiliations" is now "affiliation" (is still an array)
- There is no longer a funder identifier object (the identifier and type are just
elements)

Version v1.0.1 (released 2018-03-08):

- Fixes schema location url for DataCite v4.1
Expand Down
8 changes: 8 additions & 0 deletions README.rst
Expand Up @@ -53,3 +53,11 @@ Running the test suite is as simple as: ::

pip install -e .[all]
./run-tests.sh

Some tests require a DataCite Test Account.
Set the following environment variables
$DATACITE_USER, $DATACITE_PW, $DATACITE_PREFIX
with your account information for doi.test.datacite.org and
run: ::

./run-tests-pw.sh
3 changes: 2 additions & 1 deletion datacite/__init__.py
Expand Up @@ -14,6 +14,7 @@
from __future__ import absolute_import, print_function

from .client import DataCiteMDSClient
from .rest_client import DataCiteRESTClient
from .version import __version__

__all__ = ('DataCiteMDSClient', '__version__')
__all__ = ('DataCiteMDSClient', 'DataCiteRESTClient','__version__')
5 changes: 5 additions & 0 deletions datacite/request.py
Expand Up @@ -13,6 +13,7 @@

from __future__ import absolute_import, print_function

import json
import ssl

import requests
Expand Down Expand Up @@ -89,6 +90,10 @@ def request(self, url, method='GET', body=None, params=None, headers=None):
self.data = res.content
if not isinstance(body, string_types):
self.data = self.data.decode('utf8')
try:
self.json = json.loads(self.data)
except ValueError as e:
self.json = None
except RequestException as e:
raise HttpError(e)
except ssl.SSLError as e:
Expand Down
252 changes: 252 additions & 0 deletions datacite/rest_client.py
@@ -0,0 +1,252 @@
# -*- coding: utf-8 -*-
#
# This file is part of DataCite.
#
# Copyright (C) 2015 CERN.
#
# DataCite is free software; you can redistribute it and/or modify it
# under the terms of the Revised BSD License; see LICENSE file for
# more details.

"""Python API client wrapper for the DataCite Metadata Store API.
API documentation is available on https://mds.datacite.org/static/apidoc.
"""

from __future__ import absolute_import, print_function

from .errors import DataCiteError
from .request import DataCiteRequest


class DataCiteRESTClient(object):
"""DataCite REST API client wrapper."""

def __init__(self, username=None, password=None, url=None, prefix=None,
test_mode=False, api_ver="2", timeout=None):
"""Initialize the REST client wrapper.
:param username: DataCite username.
:param password: DataCite password.
:param url: DataCite API base URL. Defaults to
https://api.datacite.org/.
:param test_mode: Set to True to change base url to
https://api.test.datacite.prg. Defaults to False.
:param prefix: DOI prefix (or CFG_DATACITE_DOI_PREFIX).
:param api_ver: DataCite API version. Currently has no effect.
Default to 2.
:param timeout: Connect and read timeout in seconds. Specify a tuple
(connect, read) to specify each timeout individually.
"""
self.username = username
self.password = password
self.prefix = prefix
self.api_ver = api_ver # Currently not used

if test_mode:
self.api_url = 'https://doi.test.datacite.org/'
else:
self.api_url = url or 'https://doi.datacite.org/'
if self.api_url[-1] != '/':
self.api_url = self.api_url + "/"
print(self.api_url)

self.timeout = timeout

def __repr__(self):
"""Create string representation of object."""
return '<DataCiteMDSClient: {0}>'.format(self.username)

def _request_factory(self):
"""Create a new Request object."""
params = {}

return DataCiteRequest(
base_url=self.api_url,
username=self.username,
password=self.password,
default_params=params,
timeout=self.timeout,
)

def doi_get(self, doi):
"""Get the URL where the resource pointed by the DOI is located.
:param doi: DOI name of the resource.
"""
r = self._request_factory()
r.get("dois/" + doi)
if r.code == 200:
print(r.json)
return r.json['data']['attributes']['url']
else:
raise DataCiteError.factory(r.code, r.data)

def check_doi(doi, prefix):
"""Check doi structure.
Check that the doi has a form
12.12345/123 with the prefix defined
"""
#If prefix is in doi
if '/' in doi:
split = doi.split('/')
if split[0] != prefix:
#Provided a DOI with the wrong prefix
raise ValueError
else:
doi = prefix +'/'+doi
return doi


def post_doi(self, data):
"""Post a JSON payload to DataCite."""
headers = {'Content-Type': 'content-type: application/vnd.api+json'}
r = self._request_factory()
r.post("dois", body={"data":data}, headers=headers)

if r.code == 201:
return r.data
else:
raise DataCiteError.factory(r.code, r.data)


def draft_doi(self, doi=None):
"""Create a draft doi.
A draft DOI can be deleted
If new_doi is not provided, DataCite
will automatically create a DOI with a random,
recommended DOI suffix
"""
data = {"attributes":{"prefix":self.prefix}}
if doi:
doi = self.check_doi(doi,self.prefix)
data = {"attributes":{"doi":doi}}

return self.post_doi(data)

def publish_doi(self, metadata, doi=None):
"""Publish a doi.
This DOI will be public and cannot be deleted
If doi is not provided, DataCite
will automatically create a DOI with a random,
recommended DOI suffix
Metadata should follow the DataCite Metadata Schema:
http://schema.datacite.org/
:param metadata: JSON format of the metadata.
:return:
"""
data = {"attributes":metadata}
data["attributes"]["prefix"] =self.prefix
data["attributes"]["event"]="publish"
if doi:
doi = self.check_doi(doi)
data["attributes"]["doi"] = doi

return self.post_doi(data)


def private_doi(self, metadata, doi=None):
"""Publish a doi in a registered state.
A DOI generated by this method will
not be found in DataCite Search
This DOI cannot be deleted
If doi is not provided, DataCite
will automatically create a DOI with a random,
recommended DOI suffix
Metadata should follow the DataCite Metadata Schema:
http://schema.datacite.org/
:param metadata: JSON format of the metadata.
:return:
"""
data = {"attributes":metadata}
data["attributes"]["prefix"] =self.prefix
data["attributes"]["event"]="register"
if doi:
doi = self.check_doi(doi)
data["attributes"]["doi"] = doi

return self.post_doi(data)


def hide_doi(self, doi):
"""Hide a previously registered DOI.
This DOI will no
longer be found in DataCite Search
:param doi: DOI to hide e.g. 10.12345/1.
:return:
"""
data = {"attributes":{"event":"hide"}}
if doi:
doi = self.check_doi(doi)
data["attributes"]["doi"] = doi

return self.post_doi(data)

### VVV TODO
def metadata_get(self, doi):
"""Get the JSON metadata associated to a DOI name.
:param doi: DOI name of the resource.
"""
headers = {'Accept': 'application/xml',
'Accept-Encoding': 'UTF-8'}

r = self._request_factory()
r.get("metadata/" + doi, headers=headers)

if r.code == 200:
return r.data
else:
raise DataCiteError.factory(r.code, r.data)

def media_get(self, doi):
"""Get list of pairs of media type and URLs associated with a DOI.
:param doi: DOI name of the resource.
"""
r = self._request_factory()
r.get("media/" + doi)

if r.code == 200:
values = {}
for line in r.data.splitlines():
mimetype, url = line.split("=", 1)
values[mimetype] = url
return values
else:
raise DataCiteError.factory(r.code, r.data)

def media_post(self, doi, media):
"""Add/update media type/urls pairs to a DOI.
Standard domain restrictions check will be performed.
:param media: Dictionary of (mime-type, URL) key/value pairs.
:return: "OK"
"""
headers = {'Content-Type': 'text/plain;charset=UTF-8'}

# Use \r\n for HTTP client data.
body = "\r\n".join(["%s=%s" % (k, v) for k, v in media.items()])

r = self._request_factory()
r.post("media/" + doi, body=body, headers=headers)

if r.code == 200:
return r.data
else:
raise DataCiteError.factory(r.code, r.data)
56 changes: 34 additions & 22 deletions datacite/schema42.py
Expand Up @@ -56,13 +56,40 @@ def validate(data):
return validator.is_valid(data)


@rules.rule('identifier')
def identifier(path, value):
"""Transform identifier."""
return E.identifier(
value['identifier'],
identifierType=value['identifierType']
)
@rules.rule('identifiers')
def identifiers(root, values):
"""Transform identifiers to alternateIdenftifiers and identifier."""
"""
We assume there will only be 1 DOI identifier for the record.
Any other identifiers are alternative identifiers.
"""
root = ''
doi = ''
for value in values:
print(value['identifierType'])
print(value['identifierType'] == 'DOI')
if value['identifierType'] == 'DOI':
if doi != '':
#Don't know what to do with two DOIs
#Which is the actual identifier?
raise TypeError
doi = E.identifier(
value['identifier'],
identifierType='DOI'
)
else:
if root == '':
root = E.alternateIdentifiers()
elem = E.alternateIdentifier(value['identifier'])
elem.set('alternateIdentifierType', value['identifierType'])
root.append(elem)
print("LOOP")
print(root)
if root == '':
#If we only have the DOI
return doi
else:
return (root,doi)


def affiliations(root, values):
Expand Down Expand Up @@ -231,21 +258,6 @@ def resource_type(path, value):
return elem


@rules.rule('alternateIdentifiers')
def alternate_identifiers(path, values):
"""Transform alternateIdenftifiers."""
if not values:
return

root = E.alternateIdentifiers()
for value in values:
elem = E.alternateIdentifier(value['alternateIdentifier'])
elem.set('alternateIdentifierType', value['alternateIdentifierType'])
root.append(elem)

return root


@rules.rule('relatedIdentifiers')
def related_identifiers(path, values):
"""Transform relatedIdentifiers."""
Expand Down

0 comments on commit 6b31aed

Please sign in to comment.