In [1]:
# https://habanero.readthedocs.io/en/latest/modules/crossref.html
# https://github.com/CrossRef/rest-api-doc

# "When you search with query terms, on Crossref servers they are not searching full text, 
# or even abstracts of articles, but only what is available in the data that is returned to you. 
# That is, they search article titles, authors, etc."
# For some discussion on this, see https://github.com/CrossRef/rest-api-doc/issues/101

# Full Listing of all publications: https://api.crossref.org/works/
# total-results: 112,913,307
# e.g. https://api.crossref.org/works/10.1021/acs.est.8b07227

import pandas as pd
from habanero import Crossref
cr = Crossref()

In [2]:
# https://habanero.readthedocs.io/en/latest/modules/crossref.html#habanero.Crossref.works
# https://api.crossref.org/works/10.1021/acs.est.8b07227

# From Taylor's spreadsheet: 2019 ACS Publications/ Environmental Science and Technology
results = cr.works(ids = '10.1021/acs.est.8b07227')

results

{'status': 'ok',
 'message-type': 'work',
 'message-version': '1.0.0',
 'message': {'indexed': {'date-parts': [[2020, 4, 9]],
   'date-time': '2020-04-09T16:41:29Z',
   'timestamp': 1586450489704},
  'reference-count': 67,
  'publisher': 'American Chemical Society (ACS)',
  'issue': '9',
  'funder': [{'DOI': '10.13039/100007149',
    'name': 'U.S. Bureau of Land Management',
    'doi-asserted-by': 'publisher',
    'award': []},
   {'DOI': '10.13039/100000203',
    'name': 'U.S. Geological Survey',
    'doi-asserted-by': 'publisher',
    'award': []}],
  'content-domain': {'domain': [], 'crossmark-restriction': False},
  'short-container-title': ['Environ. Sci. Technol.'],
  'published-print': {'date-parts': [[2019, 5, 7]]},
  'DOI': '10.1021/acs.est.8b07227',
  'type': 'journal-article',
  'created': {'date-parts': [[2019, 3, 29]],
   'date-time': '2019-03-29T10:53:35Z',
   'timestamp': 1553856815000},
  'page': '5396-5405',
  'source': 'Crossref',
  'is-referenced-by-count': 1,
  'tit

In [3]:
# Get date
results["message"]["published-online"]["date-parts"][0][0]

2019

In [4]:
# Get year from date
results["message"]["published-online"]["date-parts"][0][0]

2019

In [5]:
# From Taylor's spreadsheet: 2017 Changes in Community
results = cr.works(ids = 'https://doi.org/10.1007/s13157-017-0895-3')
results

{'status': 'ok',
 'message-type': 'work',
 'message-version': '1.0.0',
 'message': {'indexed': {'date-parts': [[2020, 4, 23]],
   'date-time': '2020-04-23T06:44:03Z',
   'timestamp': 1587624243029},
  'reference-count': 89,
  'publisher': 'Springer Science and Business Media LLC',
  'issue': '4',
  'license': [{'URL': 'http://creativecommons.org/licenses/by/4.0',
    'start': {'date-parts': [[2017, 3, 17]],
     'date-time': '2017-03-17T00:00:00Z',
     'timestamp': 1489708800000},
    'delay-in-days': 0,
    'content-version': 'unspecified'}],
  'funder': [{'DOI': '10.13039/100000203',
    'name': 'U.S. Geological Survey',
    'doi-asserted-by': 'publisher',
    'award': ['WaterSMART Program']}],
  'content-domain': {'domain': ['link.springer.com'],
   'crossmark-restriction': False},
  'short-container-title': ['Wetlands'],
  'published-print': {'date-parts': [[2017, 8]]},
  'DOI': '10.1007/s13157-017-0895-3',
  'type': 'journal-article',
  'created': {'date-parts': [[2017, 3, 17]],


In [6]:
# Get year
results["message"]["published-online"]["date-parts"][0][0]

2017

In [7]:
# Reference-count was accurate: 89 from https://link.springer.com/article/10.1007/s13157-017-0895-3#Bib1 
# Funder DOI for USGS 10.13039
results["message"]["references-count"]

89

In [8]:
# From Taylor's spreadsheet: 2017 Hydrogeologic Characteristics (usgs pub)
results = cr.works(ids = '10.3133/sim3378')
results

{'status': 'ok',
 'message-type': 'work',
 'message-version': '1.0.0',
 'message': {'indexed': {'date-parts': [[2020, 3, 28]],
   'date-time': '2020-03-28T15:24:34Z',
   'timestamp': 1585409074261},
  'reference-count': 0,
  'publisher': 'US Geological Survey',
  'content-domain': {'domain': [], 'crossmark-restriction': False},
  'short-container-title': [],
  'DOI': '10.3133/sim3378',
  'type': 'other',
  'created': {'date-parts': [[2017, 5, 16]],
   'date-time': '2017-05-16T15:41:22Z',
   'timestamp': 1494949282000},
  'source': 'Crossref',
  'is-referenced-by-count': 0,
  'title': ['Hydrogeologic characteristics and geospatial analysis of water-table changes in the alluvium of the lower Arkansas River Valley, southeastern Colorado, 2002, 2008, and 2015'],
  'prefix': '10.3133',
  'author': [{'given': 'Michael J.',
    'family': 'Holmberg',
    'sequence': 'first',
    'affiliation': []}],
  'member': '1689',
  'published-online': {'date-parts': [[2017]]},
  'container-title': ['Scie

In [9]:
# Create empty list to store information that will be converted to dataframe
list_to_df = []

In [10]:
# Add items to list
list_to_df.append([results["message"]["published-online"]["date-parts"][0][0], 
                  results["message"]["title"],
                  results["message"]["publisher"]])

In [11]:
# Convert list to dataframe
df = pd.DataFrame(list_to_df, columns=["year", "title", "publisher"])

df

Unnamed: 0,year,title,publisher
0,2017,[Hydrogeologic characteristics and geospatial ...,US Geological Survey
