# Get NatureServe global ID and cache it in tircache

This notebook builds on previous tircache notebooks. This one uses the same basic methods to get information from the NatureServe web services and caching to the tircache.

In [1]:
import requests,datetime,configparser
from lxml import etree
from io import StringIO, BytesIO

In [2]:
# Set defaults
targetData = "sgcn"
dt = datetime.datetime.utcnow().isoformat()
natureServeServicesURL = "https://services.natureserve.org"
natureServeSpeciesQueryBaseURL = natureServeServicesURL+"/idd/rest/v1/globalSpecies/list/nameSearch?name="

In [3]:
# Get API keys and any other config details from a file that is external to the code.
config = configparser.RawConfigParser()
config.read_file(open(r'/Users/sky/Documents/configs/stuff.py'))

# Build base URL with API key using input the external config.
def getBaseURL():
    gc2APIKey = config.get('apiKeys','apiKey_GC2_BCB').replace('"','')
    apiBaseURL = "https://gc2.mapcentia.com/api/v1/sql/bcb?key="+gc2APIKey
    return apiBaseURL

In [4]:
# Basic function to insert subject ID, property, and value into tircache
def insertTupleInTirCache(subjectid,prop,value):
    # Build query string
    insertSQL = "INSERT INTO tircache (subjectid,property,value) VALUES ('"+subjectid+"','"+prop+"','"+value+"')"
    # Execute query
    r = requests.get(getBaseURL()+"&q="+insertSQL)

In [5]:
# Retrieve target data (species name)
def getSubjectIDs(targetData):
    if targetData == 'sgcn':
        # Build query string to retrieve data
        targetDataSQL = "SELECT DISTINCT sgcn.scientificname_accepted AS subjectid \
            FROM sgcn \
            WHERE sgcn.scientificname_accepted NOT IN \
            (SELECT tircache.subjectid \
                FROM tircache \
                WHERE tircache.property LIKE 'NatureServe:RecordCheck%') \
            LIMIT 1000"
        # Get Data
        targetData = requests.get(getBaseURL()+"&q="+targetDataSQL).json()
        return targetData

In [6]:
# Loop through the target data, extract Subject IDs, retrieve data from service, and insert results in tircache
def getNatureServeElementIDFromSpeciesName(targetData):
    for feature in targetData['features']:
        # Set the subjectid for query and recording
        strSubjectId = feature['properties']['subjectid']
        
        # Set the URL path to get the common names from TSN via one of the ITIS JSON service end points, get the response JSON, and pull out the common names structure
        natureServeQueryURL = natureServeSpeciesQueryBaseURL+strSubjectId
        natureServeData = requests.get(natureServeQueryURL)
        
        if natureServeData.text.find('<speciesSearchResultList>\r\n</speciesSearchResultList>') > 0:
            insertTupleInTirCache(strSubjectId,"NatureServe:RecordCheck:NegativeResponse",dt)
        else:
            insertTupleInTirCache(strSubjectId,"NatureServe:RecordCheck:PositiveResponse",dt)
            rawXML = natureServeData.text.replace('<?xml version="1.0" encoding="utf-8"?>\r\n\r\n', '')
            rawXML = rawXML.replace(' \r\n    xsi:schemaLocation="http://services.natureserve.org/docs/schemas/biodiversityDataFlow/1 http://services.natureserve.org/docs/schemas/biodiversityDataFlow/1/" \r\n    xmlns="http://services.natureserve.org/docs/schemas/biodiversityDataFlow/1" \r\n    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" \r\n    schemaVersion="1.1"', '')
            f = StringIO(rawXML)
            tree = etree.parse(f)
            insertTupleInTirCache(strSubjectId,"NatureServe:globalSpeciesUid",tree.xpath('/speciesSearchReport/speciesSearchResultList/speciesSearchResult/globalSpeciesUid')[0].text)

In [None]:
# Run the process by firing functions
getNatureServeElementIDFromSpeciesName(getSubjectIDs("sgcn"))