# Literature Metadata Retrieval

Given a DOI, use doi.org and crossref.org to look up metadata

In [1]:
import requests
#doi_ex = "10.1038/ng1201-365"
doi_ex = "10.1186/2041-1480-5-5"
doi_endpoint = "http://doi.org/api/handles/"
crossref_endpoint = "http://api.crossref.org/works/"

## DOI Service

doi.org lists a bit of API documentation: https://www.doi.org/factsheets/DOIProxy.html 

In [2]:
requests.get(doi_endpoint + doi_ex).json()

{'handle': '10.1186/2041-1480-5-5',
 'responseCode': 1,
 'values': [{'data': {'format': 'string',
    'value': 'http://www.jbiomedsem.com/content/5/1/5'},
   'index': 1,
   'timestamp': '2014-04-07T20:05:11Z',
   'ttl': 86400,
   'type': 'URL'},
  {'data': {'format': 'string', 'value': '20140407165738'},
   'index': 700050,
   'timestamp': '2014-04-07T20:05:11Z',
   'ttl': 86400,
   'type': '700050'},
  {'data': {'format': 'admin',
    'value': {'handle': '0.na/10.1186',
     'index': 200,
     'permissions': '111111110010'}},
   'index': 100,
   'timestamp': '2014-04-07T20:05:11Z',
   'ttl': 86400,
   'type': 'HS_ADMIN'}]}

## CrossRef 

CrossRef lists API Documentation https://github.com/CrossRef/rest-api-doc/blob/master/rest_api.md

In [3]:
res = requests.get(crossref_endpoint + doi_ex).json()
res

{'message': {'DOI': '10.1186/2041-1480-5-5',
  'ISSN': ['2041-1480'],
  'URL': 'http://dx.doi.org/10.1186/2041-1480-5-5',
  'alternative-id': ['2041-1480-5-5'],
  'author': [{'affiliation': [], 'family': 'Katayama', 'given': 'Toshiaki'},
   {'affiliation': [], 'family': 'Wilkinson', 'given': 'Mark D'},
   {'affiliation': [], 'family': 'Aoki-Kinoshita', 'given': 'Kiyoko F'},
   {'affiliation': [], 'family': 'Kawashima', 'given': 'Shuichi'},
   {'affiliation': [], 'family': 'Yamamoto', 'given': 'Yasunori'},
   {'affiliation': [], 'family': 'Yamaguchi', 'given': 'Atsuko'},
   {'affiliation': [], 'family': 'Okamoto', 'given': 'Shinobu'},
   {'affiliation': [], 'family': 'Kawano', 'given': 'Shin'},
   {'affiliation': [], 'family': 'Kim', 'given': 'Jin-Dong'},
   {'affiliation': [], 'family': 'Wang', 'given': 'Yue'},
   {'affiliation': [], 'family': 'Wu', 'given': 'Hongyan'},
   {'affiliation': [], 'family': 'Kano', 'given': 'Yoshinobu'},
   {'affiliation': [], 'family': 'Ono', 'given': 'Hir

In [4]:
res = res['message']

In [7]:
d = {}
d['publisher'] = res['publisher']
d['publication'] = max(res['container-title'], key=len)
d['author'] = "{}, {}".format(res['author'][0]['family'], res['author'][0]['given'])
d['publication_date'] = res['published-print']['date-parts'][0]
if 'subject' in res:
    d['subject'] = res['subject'][0]
if 'title' in res:
    d['title'] = res['title'][0]
d

{'author': 'Katayama, Toshiaki',
 'publication': 'Journal of Biomedical Semantics',
 'publication_date': [2014],
 'publisher': 'Springer Science + Business Media',
 'title': 'BioHackathon series in 2011 and 2012: penetration of ontology and linked data in life science domains'}