# Exploration Notebook to Compare USGS DOI Tool API to DataCite API
Please be sure to review the other notebook in the GitHub repo before working with this notebook.

In [None]:
# https://support.datacite.org/docs/api-queries

In [34]:
import requests
import json
import pprint

In [None]:
# Queries by default search all fields, but a specific field can be provided in the query.
data_cite_query = requests.get('https://api.datacite.org/dois?query=10.5066')
json_data = json.loads(data_cite_query.text)
for iDOI in json_data['data']:
    pprint.pprint(iDOI)

In [None]:
data_cite_query = requests.get('https://api.datacite.org/dois?provider-id=usgs')
data_cite_query.text

## Compare response from DataCite to USGS DOI tool

### DataCite API

In [62]:
#Not caps senstive, note available fields
datacite_query = requests.get('https://api.datacite.org/dois?query=10.5066/p9vrv6us')
datacite_json = json.loads(datacite_query.text)
datacite_json['data'][0]

{'id': '10.5066/p9vrv6us',
 'type': 'dois',
 'attributes': {'doi': '10.5066/p9vrv6us',
  'identifiers': [{'identifier': 'https://doi.org/10.5066/p9vrv6us',
    'identifierType': 'DOI'}],
  'creators': [{'name': 'Clune, John',
    'nameType': 'Personal',
    'givenName': 'John',
    'familyName': 'Clune',
    'nameIdentifiers': [{'nameIdentifier': 'https://orcid.org/0000-0002-3563-1975',
      'nameIdentifierScheme': 'ORCID'}]}],
  'titles': [{'title': 'Compilation of data not available in the National Water Information System for domestic wells sampled by the U.S. Geological Survey in Bradford County, Pennsylvania, May-August 2016'}],
  'publisher': 'U.S. Geological Survey',
  'container': {},
  'publicationYear': 2018,
  'subjects': [{'subject': 'Geochemistry,Water Quality,Water Resources'}],
  'contributors': [],
  'dates': [{'date': '2018', 'dateType': 'Issued'}],
  'language': None,
  'types': {'ris': 'DATA',
   'bibtex': 'misc',
   'citeproc': 'dataset',
   'schemaOrg': 'Dataset',

In [67]:
x = datacite_json['data'][0]
x.keys()

dict_keys(['id', 'type', 'attributes', 'relationships'])

In [69]:
x = datacite_json['data'][0]['attributes']
for key in x:
    print (key)

doi
identifiers
creators
titles
publisher
container
publicationYear
subjects
contributors
dates
language
types
relatedIdentifiers
sizes
formats
version
rightsList
descriptions
geoLocations
fundingReferences
url
contentUrl
metadataVersion
schemaVersion
source
isActive
state
reason
created
registered
published
updated


### USGS Data Tools DOI API

In [50]:
import getpass
from usgs_datatools import doi

#DoiSession = doi.DoiSession(env='production')  # Production
#DoiSession = doi.DoiSession(env='staging')  # Staging
#*Note: User must be on the USGS network or VPN to successfully use the staging environment.*
DoiSession = doi.DoiSession(env='production')

In [52]:
username = 'dignizio@usgs.gov'
password = getpass.getpass('USGS AD Password: ')
print('*Complete*')

USGS AD Password: ········
*Complete*


In [53]:
DoiSession.doi_authenticate(username, password)
print ("Successfully authenticated.")

Successfully authenticated.


In [58]:
# Note the raw URL being accessed under the hood with the function.
# This is worth noting when comparing to the documentation for REST endpoint.
# ('https://www1.usgs.gov/csas/dmapi/doi/doi:10.5066/P9VRV6US') Caps sensitive. Uses colon.
usgs_doi = DoiSession.get_doi('doi:10.5066/F7W0944J')
usgs_doi

{'doi': 'doi:10.5066/F7W0944J',
 'title': 'North American Breeding Bird Survey Dataset 1966 - 2016, version 2016.0',
 'pubDate': '2017',
 'url': 'ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/Archivefiles/Version2016v0/',
 'resourceType': 'Dataset',
 'date': '1966/2016',
 'dateType': 'Collected',
 'description': 'The 1966-2016 North American Breeding Bird Survey dataset contains avian point count data for more than 700 North American bird taxa (primarily species, but also some races and unidentified species groupings).  These data are collected annually during the breeding season, primarily June and May, along thousands of randomly established roadside survey routes in the United States and Canada. Routes are about 24.5 miles (39.2 km) long with counting locations placed at regular intervals, for a total of 50 stops. At each stop, a person highly skilled in avian identification conducts a 3-minute point count, recording every bird seen within a quarter-mile (400-m) radius and every bird h

In [59]:
for field in usgs_doi.keys():
    print (field)

doi
title
pubDate
url
resourceType
date
dateType
description
subject
username
status
noDataReleaseAvailableReason
noPublicationIdAvailable
dataSourceId
dataSourceName
linkCheckingStatus
formatTypes
authors
users
relatedIdentifiers
ipdsNumbers
created
modified
