This notebook uses the existing Wyoming Landscape Conservation Initiative (WLIC) literature database and the eXtract Dark Data database (xDD, also known as GeoDeepDive: https://geodeepdive.org/) to build a list of unique specie names that have been published on in WLCI efforts.

In [1]:
#Import needed packages
import requests
import pandas as pd
import json
import bispy
from IPython.display import display

bis_utils = bispy.bis.Utils()

In [2]:
#The WLCI literature database was exported as CSL JSON and posted to this ScienceBase Item: https://www.sciencebase.gov/catalog/item/4f4e476fe4b07f02db47e19f 
#as file wlci_lit_20190702.json.  The file can be accessed via 

#File download url
lit_json_file = "https://www.sciencebase.gov/catalog/file/get/4f4e476fe4b07f02db47e19f?f=__disk__ad%2F2e%2Fcc%2Fad2ecc69ef3957ef9cd23d4cba1c7c68b983acad"

#Request and return JSON of literature data
lit_json = requests.get(lit_json_file).json()

In [3]:
lit_json

[{'id': 'http://zotero.org/groups/2341914/items/R3NSCC9R',
  'type': 'article-journal',
  'title': 'Greater Sage-Grouse Population Trends Across Wyoming: WY Sage-grouse Population Viability Analysis',
  'container-title': 'The Journal of Wildlife Management',
  'page': '397-412',
  'volume': '82',
  'issue': '2',
  'source': 'DOI.org (Crossref)',
  'abstract': "The scale at which analyses are performed can have an effect on model results and often one scale does not accurately describe the ecological phenomena of interest (e.g., population trends) for wide‐ranging species: yet, most ecological studies are performed at a single, arbitrary scale. To best determine local and regional trends for greater sage‐grouse (Centrocercus urophasianus) in Wyoming, USA, we modeled density‐independent and ‐dependent population growth across multiple spatial scales relevant to management and conservation (Core Areas [habitat encompassing approximately 83% of the sage‐grouse population on ∼24% of surfac

In [4]:
len(lit_json)

303

In [5]:
#Set function for accessing xDD database
def xdd_api(route, params):
    """Create list of docs mentioning a term of interest
    Parameters : see https://geodeepdive.org/api for more detail
    ----------
    routes : str of available api routes for xDD 
    params : str of key value pairs of paramaters:values separated by &
    """
    base_url = 'https://geodeepdive.org/api'
    search = (base_url + '/' + route + '?' + str(params))
    #print (search)
    try:
        r=requests.get(search)
        if r.status_code == 200 and 'success' in r.json():
            json_r = r.json()
            data = json_r['success']['data']
            return data
        elif r.status_code == 200:
            data = []
            return data
        else:
            raise Exception('xDD API returning: {}'.format(r.status_code))
    except Exception as e:
        raise Exception(e)

In [6]:
#Create list of relationship information between WLCI database and xDD database
wlci_xdd = []
for ref in lit_json:
    wlci_id = ref['id']
    if 'DOI' in ref:
        doi = ref['DOI']
    elif 'note' in ref and 'OCLC' not in ref['note']:
        doi = ref['note']
    route = 'articles'
    param = 'max=1&doi='+str(doi)
    xdd_data = xdd_api(route, param)
    if xdd_data:
        xdd_id = xdd_data[0]['_gddid']
        wlci_xdd.append(
            {
                'wlci_id': wlci_id, 
                'relation': 'doi_match', 
                'xdd_id':xdd_id, 
                'param':param, 
                'title': ref['title']
            }
        )

In [7]:
with open('sources/WLCI Literature xDD Matches.json', 'w') as f:
    f.write(json.dumps(wlci_xdd, indent=4))
    f.close()

In [8]:
wlci_xdd

[{'wlci_id': 'http://zotero.org/groups/2341914/items/R3NSCC9R',
  'relation': 'doi_match',
  'xdd_id': '5c2c30a41faed655488b299b',
  'param': 'max=1&doi=10.1002/jwmg.21386',
  'title': 'Greater Sage-Grouse Population Trends Across Wyoming: WY Sage-grouse Population Viability Analysis'},
 {'wlci_id': 'http://zotero.org/groups/2341914/items/Z7TMPV6Q',
  'relation': 'doi_match',
  'xdd_id': '579f4458cf58f123c56623f8',
  'param': 'max=1&doi=10.1016/j.ecolind.2015.03.002',
  'title': 'Forecasting Sagebrush Ecosystem Components and Greater Sage-grouse Habitat for 2050: Learning from Past Climate Patterns and Landsat Imagery to Predict the Future'},
 {'wlci_id': 'http://zotero.org/groups/2341914/items/XYW5M83E',
  'relation': 'doi_match',
  'xdd_id': '5d4384980b45c76cafa2c0eb',
  'param': 'max=1&doi=10.1002/jwmg.1050',
  'title': 'Effects of Lek Count Protocols on Greater Sage-grouse Population Trend Estimates: Lek Count Timing and Trend Estimates'},
 {'wlci_id': 'http://zotero.org/groups/234

In [9]:
#Create list of species that are mentioned in the WLCI efforts
specie_mentions = list()
for xdd_record in wlci_xdd:
    route = 'terms'
    param = f"docid={xdd_record['xdd_id']}&dictionary=ITIS"
    xdd_data = xdd_api(route, param)
    for x in xdd_data:
        if len(x['term'].split()) > 1:
            specie = x['term']
            hits = str(x['n_hits'])
            specie_mentions.append(
                {'Scientific Name':specie, 
                 'xdd_id':xdd_id, 
                 'wlci_id':wlci_id, 
                 'n_hits':hits
                }
            ) 

In [10]:
specie_mentions

[{'Scientific Name': 'Branta canadensis',
  'xdd_id': '5c4e3f571faed655489408c3',
  'wlci_id': 'http://zotero.org/groups/2341914/items/KP3R7Q33',
  'n_hits': '1'},
 {'Scientific Name': 'Castor canadensis',
  'xdd_id': '5c4e3f571faed655489408c3',
  'wlci_id': 'http://zotero.org/groups/2341914/items/KP3R7Q33',
  'n_hits': '1'},
 {'Scientific Name': 'Haliaeetus leucocephalus',
  'xdd_id': '5c4e3f571faed655489408c3',
  'wlci_id': 'http://zotero.org/groups/2341914/items/KP3R7Q33',
  'n_hits': '6'},
 {'Scientific Name': 'Macaca sylvanus',
  'xdd_id': '5c4e3f571faed655489408c3',
  'wlci_id': 'http://zotero.org/groups/2341914/items/KP3R7Q33',
  'n_hits': '1'},
 {'Scientific Name': 'Artemisia tridentata',
  'xdd_id': '5c4e3f571faed655489408c3',
  'wlci_id': 'http://zotero.org/groups/2341914/items/KP3R7Q33',
  'n_hits': '1'},
 {'Scientific Name': 'Centrocercus urophasianus',
  'xdd_id': '5c4e3f571faed655489408c3',
  'wlci_id': 'http://zotero.org/groups/2341914/items/KP3R7Q33',
  'n_hits': '5'},


In [11]:
# Cache the array of retrieved documents and return/display a random sample for verification
display(bis_utils.doc_cache("sources/WLCI Species List from Literature.json", specie_mentions))

{'Doc Cache File': 'sources/WLCI Species List from Literature.json',
 'Number of Documents in Cache': 346,
 'Document Number 256': {'Scientific Name': 'Alces alces',
  'xdd_id': '5c4e3f571faed655489408c3',
  'wlci_id': 'http://zotero.org/groups/2341914/items/KP3R7Q33',
  'n_hits': '2'}}