In [1]:
import requests,configparser,re
from IPython.display import display

In [2]:
# Get API keys and any other config details from a file that is external to the code.
config = configparser.RawConfigParser()
config.read_file(open(r'../config/stuff.py'))

In [3]:
# Build base URL with API key using input from the external config.
def getBaseURL():
    gc2APIKey = config.get('apiKeys','apiKey_GC2_BCB').replace('"','')
    apiBaseURL = "https://gc2.mapcentia.com/api/v1/sql/bcb?key="+gc2APIKey
    return apiBaseURL

In [4]:
# There are a few things that we've found in the name strings that, if removed or modified, will result in a valid taxon name string for the ITIS service
def cleanScientificName(scientificname):
    # Get rid of "pop." from the string to enable the search to find a match without whatever population indicator is in the string
    if scientificname.find("pop."):
        nameParts = scientificname.split()
        regex = re.compile(r'pop\.[0-9]')
        scientificname = ' '.join([i for i in nameParts if not regex.search(i)])

    # Get rid of "spp." from the string, and it should find a genus match
    if scientificname.find("spp."):
        nameParts = scientificname.split()
        regex = re.compile(r'spp\.')
        scientificname = ' '.join([i for i in nameParts if not regex.search(i)])
    
    # Clean up all upper case strings because the ITIS service doesn't like them
    if any(x.isupper() for x in scientificname[-(len(scientificname)-1):]):
        scientificname = scientificname.lower().capitalize()
    
    # Get rid of text in parens and brackets; this is a design decision to potentially do away with information that might be important, but it gets retained in the original records
    scientificname = re.sub("[\(\[].*?[\)\]]", "", scientificname)
    scientificname = scientificname.replace("  "," ")

    return scientificname

In [12]:
def getWoRMSSearchURL(searchType,target):
    if searchType == "ExactName":
        return  "http://www.marinespecies.org/rest/AphiaRecordsByName/"+target+"?like=false"
    elif searchType == "FuzzyName":
        return  "http://www.marinespecies.org/rest/AphiaRecordsByName/"+target+"?like=true"
    elif searchType == "AphiaID":
        return "http://www.marinespecies.org/rest/AphiaRecordByAphiaID/"+target

In [5]:
# Pair worms properties that we want to cache
def packageWoRMSPairs(matchMethod,wormsData):
    import datetime
    dt = datetime.datetime.utcnow().isoformat()
    wormsPairs = '"cacheDate"=>"'+dt+'"'
    wormsPairs = wormsPairs+',"wormsMatchMethod"=>"'+matchMethod+'"'

    if type(wormsData) is int:
        return wormsPairs
    else:
        try:
            wormsPairs = wormsPairs+',"AphiaID"=>"'+str(wormsData['AphiaID'])+'"'
            wormsPairs = wormsPairs+',"scientificname"=>"'+wormsData['scientificname']+'"'
            wormsPairs = wormsPairs+',"status"=>"'+wormsData['status']+'"'
            wormsPairs = wormsPairs+',"rank"=>"'+wormsData['rank']+'"'
            wormsPairs = wormsPairs+',"valid_name"=>"'+wormsData['valid_name']+'"'
            try:
                wormsPairs = wormsPairs+',"valid_AphiaID"=>"'+str(wormsData['valid_AphiaID'])+'"'
            except:
                pass
            wormsPairs = wormsPairs+',"kingdom"=>"'+wormsData['kingdom']+'"'
            wormsPairs = wormsPairs+',"phylum"=>"'+wormsData['phylum']+'"'
            wormsPairs = wormsPairs+',"class"=>"'+wormsData['class']+'"'
            wormsPairs = wormsPairs+',"order"=>"'+wormsData['order']+'"'
            wormsPairs = wormsPairs+',"family"=>"'+wormsData['family']+'"'
            wormsPairs = wormsPairs+',"genus"=>"'+wormsData['genus']+'"'
            wormsPairs = wormsPairs+',"lsid"=>"'+wormsData['lsid']+'"'
            wormsPairs = wormsPairs+',"isMarine"=>"'+str(wormsData['isMarine'])+'"'
            wormsPairs = wormsPairs+',"isBrackish"=>"'+str(wormsData['isBrackish'])+'"'
            wormsPairs = wormsPairs+',"isFreshwater"=>"'+str(wormsData['isFreshwater'])+'"'
            wormsPairs = wormsPairs+',"isTerrestrial"=>"'+str(wormsData['isTerrestrial'])+'"'
            wormsPairs = wormsPairs+',"isExtinct"=>"'+str(wormsData['isExtinct'])+'"'
            wormsPairs = wormsPairs+',"match_type"=>"'+wormsData['match_type']+'"'
            wormsPairs = wormsPairs+',"modified"=>"'+wormsData['modified']+'"'
        except:
            display (wormsData)

        return wormsPairs

In [6]:
# Basic function to insert subject ID, property, and value into tircache
def cacheToTIR(gid,infotype,pairs):
    import requests
    updateQ = "UPDATE tir.tir2 SET "+infotype+" = '"+pairs+"' WHERE gid = "+str(gid)
    r = requests.get(getBaseURL()+"&q="+updateQ).json()
    return r

In [10]:
# Query for the registered names we want to run through the system
uniqueNames  = requests.get(getBaseURL()+"&q=SELECT gid,registration->'SGCN_ScientificName_Submitted' AS scientificname,itis->'nameWOInd' AS itisNameWOInd,itis->'nameWInd' AS itisNameWInd FROM tir.tir2 WHERE worms IS NULL AND itis->'itisMatchMethod' NOT LIKE 'NotMatched%' LIMIT 25").json()



In [13]:
for feature in uniqueNames["features"]:
    # Set up a local data structure for storage and processing
    thisRecord = {}
    
    # Set data from query results
    thisRecord["gid"] = feature["properties"]["gid"]
    thisRecord["scientificname_submitted"] = feature["properties"]["scientificname"]
    thisRecord["scientificname_search"] = cleanScientificName(thisRecord["scientificname_submitted"])
    thisRecord["itisNameWOInd"] = feature["properties"]["itisnamewoind"]
    thisRecord["itisNameWInd"] = feature["properties"]["itisnamewind"]
    
    # Set defaults for thisRecord
    thisRecord["matchMethod"] = "NotMatched:"+thisRecord["scientificname_search"]
    thisRecord["wormsPairs"] = packageWoRMSPairs(thisRecord["matchMethod"],0)

    # Handle the cases where there is enough interesting stuff in the scientific name string that it comes back blank from the cleaners
    if len(thisRecord["scientificname_search"]) != 0:
        thisRecord["wormsExactSearchURL"] = getWoRMSSearchURL("ExactName",thisRecord["scientificname_search"])
        print (thisRecord["wormsExactSearchURL"])
        
        wormsSearchResults = requests.get(thisRecord["wormsExactSearchURL"]).json()
        display (wormsSearchResults)

    display (thisRecord)


http://www.marinespecies.org/rest/AphiaRecordsByName/Batrachoseps stebbinsi?like=false


JSONDecodeError: Expecting value: line 1 column 1 (char 0)