# Retrieving information via DOI from crossref

Further information regarding the crossref API can be found [here](https://www.crossref.org/services/metadata-delivery/rest-api/).

In [1]:
import urllib.request
import json

base_url = "https://api.crossref.org/works/"
doi = "10.1371/journal.pcbi.1004668"
full_url = base_url + doi

In [2]:
# Retrieve the data an store as string
doi_json_data = urllib.request.urlopen(full_url).read()

In [3]:
print(doi_json_data)

b'{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2019,5,16]],"date-time":"2019-05-16T07:44:05Z","timestamp":1557992645228},"reference-count":6,"publisher":"Public Library of Science (PLoS)","issue":"1","license":[{"URL":"http:\\/\\/creativecommons.org\\/licenses\\/by\\/4.0\\/","start":{"date-parts":[[2016,1,19]],"date-time":"2016-01-19T00:00:00Z","timestamp":1453161600000},"delay-in-days":0,"content-version":"vor"}],"content-domain":{"domain":["www.ploscompbiol.org"],"crossmark-restriction":false},"short-container-title":["PLoS Comput Biol"],"DOI":"10.1371\\/journal.pcbi.1004668","type":"journal-article","created":{"date-parts":[[2016,1,19]],"date-time":"2016-01-19T21:35:29Z","timestamp":1453239329000},"page":"e1004668","update-policy":"http:\\/\\/dx.doi.org\\/10.1371\\/journal.pcbi.corrections_policy","source":"Crossref","is-referenced-by-count":28,"title":["A Quick Introduction to Version Control with Git and GitHub"],"prefix":"10.

In [4]:
# Parse json string
doi_data = json.loads(doi_json_data)

In [5]:
type(doi_data)

dict

In [6]:
doi_data.keys()

dict_keys(['status', 'message-type', 'message-version', 'message'])

In [7]:
# print in a prettier way
print(json.dumps(doi_data, indent=2))

{
  "status": "ok",
  "message-type": "work",
  "message-version": "1.0.0",
  "message": {
    "indexed": {
      "date-parts": [
        [
          2018,
          10,
          31
        ]
      ],
      "date-time": "2018-10-31T18:02:37Z",
      "timestamp": 1541008957562
    },
    "reference-count": 6,
    "publisher": "Public Library of Science (PLoS)",
    "issue": "1",
    "license": [
      {
        "URL": "http://creativecommons.org/licenses/by/4.0/",
        "start": {
          "date-parts": [
            [
              2016,
              1,
              19
            ]
          ],
          "date-time": "2016-01-19T00:00:00Z",
          "timestamp": 1453161600000
        },
        "delay-in-days": 0,
        "content-version": "vor"
      }
    ],
    "content-domain": {
      "domain": [
        "www.ploscompbiol.org"
      ],
      "crossmark-restriction": false
    },
    "short-container-title": [
      "PLoS Comput Biol"
    ],
    "DOI": "10.1371/journal.pcb

In [8]:
# Access the title of the publication
print(doi_data["message"]["title"])

['A Quick Introduction to Version Control with Git and GitHub']


In [9]:
# Access the journal name
doi_data["message"]["short-container-title"]

['PLoS Comput Biol']

In [10]:
# Same approach as above for a larger set of DOIs
dois = ["10.1371/journal.pcbi.1004668",
        "10.21105/joss.01035",
        "10.1038/35057062",
        "10.21105/joss.01006"]

for doi in dois:
    doi_json_data = urllib.request.urlopen(base_url + doi).read()
    doi_data = json.loads(doi_json_data)
    print(doi)
    print("- ", doi_data["message"]["title"][0])
    print("- ", doi_data["message"]["short-container-title"][0])
    print()

10.1371/journal.pcbi.1004668
-  A Quick Introduction to Version Control with Git and GitHub
-  PLoS Comput Biol

10.21105/joss.01035
-  nasapower: A NASA POWER Global Meteorology, Surface Solar Energy and Climatology Data Client for R
-  JOSS

10.1038/35057062
-  Initial sequencing and analysis of the human genome
-  Nature

10.21105/joss.01006
-  SeqTools: A python package for easy transformation, combination and evaluation of large datasets.
-  JOSS

