This notebook uses the existing Wyoming Landscape Conservation Initiative (WLIC) literature database and the eXtract Dark Data database (xDD, also known as GeoDeepDive: https://geodeepdive.org/) to build a list of unique specie names that have been published on in WLCI efforts.

In [1]:
#Import Needed packages
import requests
import pandas as pd

In [2]:
#The WLCI literature database was exported as CSL JSON and posted to this ScienceBase Item: https://www.sciencebase.gov/catalog/item/4f4e476fe4b07f02db47e19f 
#as file wlci_lit_20190702.json.  The file can be accessed via 

#File download url
lit_json_file = "https://www.sciencebase.gov/catalog/file/get/4f4e476fe4b07f02db47e19f?f=__disk__ad%2F2e%2Fcc%2Fad2ecc69ef3957ef9cd23d4cba1c7c68b983acad"
#Request and return JSON of literature data
lit_json = requests.get(lit_json_file).json()


In [3]:
#Set function for accessing xDD database
def xdd_api(route, params):
    """Create list of docs mentioning a term of interest
    Parameters : see https://geodeepdive.org/api for more detail
    ----------
    routes : str of available api routes for xDD 
    params : str of key value pairs of paramaters:values separated by &
    """
    base_url = 'https://geodeepdive.org/api'
    search = (base_url + '/' + route + '?' + str(params))
    #print (search)
    try:
        r=requests.get(search)
        if r.status_code == 200 and 'success' in r.json():
            json_r = r.json()
            data = json_r['success']['data']
            return data
        elif r.status_code == 200:
            data = []
            return data
        else:
            raise Exception('xDD API returning: {}'.format(r.status_code))
    except Exception as e:
        raise Exception(e)

In [4]:
#Create list of relationship information between WLCI database and xDD database
wlci_xdd = []
for ref in lit_json:
    wlci_id = ref['id']
    if 'DOI' in ref:
        doi = ref['DOI']
    elif 'note' in ref and 'OCLC' not in ref['note']:
        doi = ref['note']
    route = 'articles'
    param = 'max=1&doi='+str(doi)
    xdd_data = xdd_api(route, param)
    if xdd_data:
        xdd_id = xdd_data[0]['_gddid']
        wlci_xdd.append({'wlci_id': wlci_id, 'relation': 'doi_match', 'xdd_id':xdd_id, 'param':param, 'title': ref['title']})
    

In [5]:
#Turn list of matched articles into Pandas dataframe for visual
wlci_xdd_df = pd.DataFrame(wlci_xdd)
wlci_xdd_df

Unnamed: 0,param,relation,title,wlci_id,xdd_id
0,max=1&doi=10.1002/jwmg.21386,doi_match,Greater Sage-Grouse Population Trends Across W...,http://zotero.org/groups/2341914/items/R3NSCC9R,5c2c30a41faed655488b299b
1,max=1&doi=10.1016/j.ecolind.2015.03.002,doi_match,Forecasting Sagebrush Ecosystem Components and...,http://zotero.org/groups/2341914/items/Z7TMPV6Q,579f4458cf58f123c56623f8
2,max=1&doi=10.1002/jwmg.155,doi_match,The Importance of Within-Year Repeated Counts ...,http://zotero.org/groups/2341914/items/JFV5GDKM,5c2c1b8a1faed655488b27d9
3,max=1&doi=10.1002/eap.1512,doi_match,Patterns in Greater Sage-grouse population dyn...,http://zotero.org/groups/2341914/items/Q5TCVPYX,58e2c641cf58f1697d39b86b
4,max=1&doi=10.1002/jwmg.21179,doi_match,Investigating Impacts of Oil and Gas Developme...,http://zotero.org/groups/2341914/items/W5K4UAZ8,5c2c22901faed655488b289b
5,max=1&doi=10.1080/2150704X.2015.1072289,doi_match,Mapping Forest Functional Type in a Forest-Shr...,http://zotero.org/groups/2341914/items/X3I66DJ8,594407d1cf58f1b3059ba5a4
6,max=1&doi=10.1016/j.ecolind.2017.12.033,doi_match,Vegetation Responses to Sagebrush-reduction Tr...,http://zotero.org/groups/2341914/items/822S7HY8,5ad055c3cf58f1a9152a8fd5
7,max=1&doi=10.1080/15420353.2014.885925,doi_match,Geospatial Considerations for a Multiorganizat...,http://zotero.org/groups/2341914/items/9N87KUPL,5c7442c01faed65548974980
8,max=1&doi=10.1080/15420353.2014.885925,doi_match,Map Service: Historical Oil and Gas Exploratio...,http://zotero.org/groups/2341914/items/RUFHZ2GV,5c7442c01faed65548974980
9,max=1&doi=10.1080/15420353.2014.885925,doi_match,Interactive Mapping Application of Oil and Gas...,http://zotero.org/groups/2341914/items/I7C83EUR,5c7442c01faed65548974980


In [6]:
# Save csv documenting xdd and wlci literature relationship
wlci_xdd_df.to_csv('sources/wlci_xdd_lit.csv', sep=',')

In [8]:
#Create list of species that are mentioned in the WLCI efforts
specie_mentions = []
for xdd_record in wlci_xdd:
    wlci_id = xdd_record['wlci_id']
    xdd_id = xdd_record['xdd_id']
    route = 'terms'
    param = f"docid={xdd_id}&dictionary=ITIS"
    xdd_data = xdd_api(route, param)
    for x in xdd_data:
        if len(x['term'].split()) >1:
            specie = x['term']
            hits = str(x['n_hits'])
            #specie_mentions.append({'specie':specie, 'xdd_id':xdd_id, 'wlci_id':wlci_id, 'n_hits':hits}) 
            specie_mentions.append(specie)

In [13]:
with open('sources/wlci_xdd_specie_list.txt', 'w') as file_handler:
    file_handler.write("\n".join(str(item) for item in specie_mentions))