This notebook uses the existing Wyoming Landscape Conservation Initiative (WLIC) literature database and the eXtract Dark Data database (xDD, also known as GeoDeepDive: https://geodeepdive.org/) to build a list of unique specie names that have been published on in WLCI efforts.

In [20]:
#Import needed packages
import requests
import pandas as pd
import json
import bispy


bis_utils = bispy.bis.Utils()

In [21]:
#The WLCI literature database was exported as CSL JSON and posted to this ScienceBase Item: https://www.sciencebase.gov/catalog/item/4f4e476fe4b07f02db47e19f 
#as file wlci_lit_20190702.json.  The file can be accessed via 

#File download url
lit_json_file = "https://www.sciencebase.gov/catalog/file/get/4f4e476fe4b07f02db47e19f?f=__disk__ad%2F2e%2Fcc%2Fad2ecc69ef3957ef9cd23d4cba1c7c68b983acad"
#Request and return JSON of literature data
lit_json = requests.get(lit_json_file).json()

In [22]:
len(lit_json)

303

In [23]:
#Set function for accessing xDD database
def xdd_api(route, params):
    """Create list of docs mentioning a term of interest
    Parameters : see https://geodeepdive.org/api for more detail
    ----------
    routes : str of available api routes for xDD 
    params : str of key value pairs of paramaters:values separated by &
    """
    base_url = 'https://geodeepdive.org/api'
    search = (base_url + '/' + route + '?' + str(params))
    #print (search)
    try:
        r=requests.get(search)
        if r.status_code == 200 and 'success' in r.json():
            json_r = r.json()
            data = json_r['success']['data']
            return data
        elif r.status_code == 200:
            data = []
            return data
        else:
            raise Exception('xDD API returning: {}'.format(r.status_code))
    except Exception as e:
        raise Exception(e)

In [24]:
#Create list of relationship information between WLCI database and xDD database
wlci_xdd = []
for ref in lit_json:
    wlci_id = ref['id']
    if 'DOI' in ref:
        doi = ref['DOI']
    elif 'note' in ref and 'OCLC' not in ref['note']:
        doi = ref['note']
    route = 'articles'
    param = 'max=1&doi='+str(doi)
    xdd_data = xdd_api(route, param)
    if xdd_data:
        xdd_id = xdd_data[0]['_gddid']
        wlci_xdd.append(
            {
                'wlci_id': wlci_id, 
                'relation': 'doi_match', 
                'xdd_id':xdd_id, 
                'param':param, 
                'title': ref['title']
            }
        )
    

In [25]:
with open('cache/WLCI Literature xDD Matches.json', 'w') as f:
    f.write(json.dumps(wlci_xdd, indent=4))
    f.close()

In [26]:
#Create list of species that are mentioned in the WLCI efforts
specie_mentions = list()
for xdd_record in wlci_xdd:
    route = 'terms'
    param = f"docid={xdd_record['xdd_id']}&dictionary=ITIS"
    xdd_data = xdd_api(route, param)
    for x in xdd_data:
        if len(x['term'].split()) > 1:
            specie = x['term']
            hits = str(x['n_hits'])
            specie_mentions.append(
                {'Scientific Name':specie, 
                 'xdd_id':xdd_id, 
                 'wlci_id':wlci_id, 
                 'n_hits':hits
                }
            ) 

In [27]:
# Cache the array of retrieved documents and return/display a random sample for verification
display(bis_utils.doc_cache("cache/WLCI Species List from Literature.json", specie_mentions))

{'Doc Cache File': 'cache/WLCI Species List from Literature.json',
 'Number of Documents in Cache': 287,
 'Document Number 193': {'Scientific Name': 'Populus tremuloides',
  'xdd_id': '5c4e3f571faed655489408c3',
  'wlci_id': 'http://zotero.org/groups/2341914/items/KP3R7Q33',
  'n_hits': '7'}}