# Use NatureServe Global Element ID to retrieve and cache relevant information

This notebook builds on previous tircache notebooks for the Species of Greatest Conservation Need system. This one uses previously cached NatureServe identifiers to get the comprehensive species report from NatureServe and cache relevant attributes for the SGCN app. We can tweak this later to get and cache more information into the Taxonomic Information Registry Cache (tircache). For now, we get the following:

* Global status rank code
* Description of the rounded global status rank
* Date the global status was last reviewed
* US national status rank code
* Date the US national status rank was last reviewed
* Each US state where there is a rank code listed

When working with this information, it is important to use the date/time stamp for the records pulled and cached from NatureServe to let people know when the information was retrieved. Once we get the microservices architecture working, we will build this out into something that fires off periodically to go and get new information from NatureServe for our uses.

In [9]:
import requests,datetime,configparser
from lxml import etree
from io import StringIO, BytesIO

In [10]:
# Set defaults
dt = datetime.datetime.utcnow().isoformat()

In [11]:
# Get API keys and any other config details from a file that is external to the code.
config = configparser.RawConfigParser()
config.read_file(open(r'/Users/sky/Documents/configs/stuff.py'))

# Build base URL with API key using input the external config.
def getBaseURLGC2():
    baseURL_BCB = config.get('baseURLs','gc2SQLAPI').replace('"','')
    apiKey_BCB = config.get('apiKeys','apiKey_GC2_BCB').replace('"','')
    return baseURL_BCB+"bcb?key="+apiKey_BCB

def getBaseURLNatureServe():
    baseURL_NatureServe = config.get('baseURLs', 'natureServeSpeciesQueryBaseURL').replace('"','')
    apiKey_NatureServe = config.get('apiKeys','apiKey_NatureServe').replace('"','')
    return baseURL_NatureServe+"?NSAccessKeyId="+apiKey_NatureServe

In [12]:
# Basic function to insert subject ID, property, and value into tircache
def insertTupleInTirCache(subjectid,prop,value):
    # Build query string
    insertSQL = "INSERT INTO tircache (subjectid,property,value) VALUES ('"+subjectid+"','"+prop+"','"+value+"')"
    # Execute query
    r = requests.get(getBaseURLGC2()+"&q="+insertSQL)

In [13]:
# Retrieve NatureServe Global Element IDs from tircache to process
def getSubjectIDs():
    # Build query string to retrieve data
    targetDataSQL = "SELECT value AS subjectid \
        FROM tircache \
        WHERE property = 'NatureServe:globalSpeciesUid' \
        AND value NOT IN \
        (SELECT subjectid \
            FROM tircache \
            WHERE property = 'NatureServe:RetrieveComprehensive:PositiveResponse') \
        LIMIT 20"
    # Get Data
    targetData = requests.get(getBaseURLGC2()+"&q="+targetDataSQL).json()
    return targetData

In [16]:
# Loop through the target data, extract Subject IDs, retrieve data from service, and insert results in tircache
def getNatureServeDataFromUID(targetData):
    for feature in targetData['features']:
        # Set the subjectid for query and recording
        strSubjectId = feature['properties']['subjectid']
        
        # Set the URL path for the API query and retrieve data with requests
        natureServeQueryURL = getBaseURLNatureServe()+"&uid="+strSubjectId
        natureServeData = requests.get(natureServeQueryURL)

        # Clean up the XML returned from NatureServe API for better processing, read it as IO, and create a tree for parsing
        strNatureServeData = natureServeData.text
        strNatureServeData = strNatureServeData.replace('<?xml version="1.0" encoding="utf-8"?>\r\n\r\n', '')
        strNatureServeData = strNatureServeData.replace('\r\n    xsi:schemaLocation="http://services.natureserve.org/docs/schemas/biodiversityDataFlow/1 http://services.natureserve.org/docs/schemas/biodiversityDataFlow/1/"\r\n    xmlns="http://services.natureserve.org/docs/schemas/biodiversityDataFlow/1"\r\n    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\r\n    schemaVersion="1.1"', '')
        f = StringIO(strNatureServeData)
        tree = etree.parse(f)
        root = tree.getroot()
        docLength = len(root.getchildren())

        # Test the response because I've found that not everything with a global element ID seems to come back with a response here
        if docLength > 0:
            # Grab out the specific elements we want to cache
            insertTupleInTirCache(strSubjectId,"NatureServe:RetrieveComprehensive:PositiveResponse",dt)
            insertTupleInTirCache(strSubjectId,"NatureServe:GlobalStatusRank",tree.xpath('/globalSpeciesList/globalSpecies/conservationStatus/natureServeStatus/globalStatus/rank/code')[0].text)
            insertTupleInTirCache(strSubjectId,"NatureServe:roundedGlobalStatusRankDescription",tree.xpath('/globalSpeciesList/globalSpecies/conservationStatus/natureServeStatus/globalStatus/roundedRank/description')[0].text)
            try:
                insertTupleInTirCache(strSubjectId,"NatureServe:globalStatusLastReviewed",tree.xpath('/globalSpeciesList/globalSpecies/conservationStatus/natureServeStatus/globalStatus/statusLastReviewed')[0].text)
            except:
                print ("FAIL: "+strSubjectId+" ON globalStatusLastReviewed")
            insertTupleInTirCache(strSubjectId,"NatureServe:usNationalStatusRankCode",tree.xpath("//nationalStatus[@nationCode='US']/rank/code")[0].text)
            try:
                insertTupleInTirCache(strSubjectId,"NatureServe:usNationalStatusLastReviewed",tree.xpath("//nationalStatus[@nationCode='US']/statusLastReviewed")[0].text)
            except:
                print ("FAIL: "+strSubjectId+" ON usNationalStatusLastReviewed")

            # Loop through US states in the "subnationalStatuses" and put state names and codes into the tircache
            usStatesTree = etree.ElementTree(tree.xpath("//nationalStatus[@nationCode='US']/subnationalStatuses")[0])
            for elem in usStatesTree.iter():
                stateName = elem.attrib.get('subnationName')
                if isinstance(stateName, str):
                    insertTupleInTirCache(strSubjectId,"NatureServe:StateCode:"+stateName,tree.xpath("//subnationalStatus[@subnationName='"+stateName+"']/rank/code")[0].text)
        else:
            insertTupleInTirCache(strSubjectId,"NatureServe:RetrieveComprehensive:NegativeResponse",dt)
            print (docLength, strSubjectId)

In [17]:
# Run the process by firing functions
getNatureServeDataFromUID(getSubjectIDs())

0 ELEMENT_GLOBAL.2.111247
0 ELEMENT_GLOBAL.2.113177
0 ELEMENT_GLOBAL.2.103412
0 ELEMENT_GLOBAL.2.111247
0 ELEMENT_GLOBAL.2.113177
0 ELEMENT_GLOBAL.2.103412
0 ELEMENT_GLOBAL.2.111247
0 ELEMENT_GLOBAL.2.113177
0 ELEMENT_GLOBAL.2.103412
0 ELEMENT_GLOBAL.2.111247
0 ELEMENT_GLOBAL.2.113177
0 ELEMENT_GLOBAL.2.103412
0 ELEMENT_GLOBAL.2.111247
0 ELEMENT_GLOBAL.2.113177
0 ELEMENT_GLOBAL.2.103412
0 ELEMENT_GLOBAL.2.111247
0 ELEMENT_GLOBAL.2.113177
0 ELEMENT_GLOBAL.2.103412
0 ELEMENT_GLOBAL.2.111247
0 ELEMENT_GLOBAL.2.113177
