This notebook runs through submitted species names, looks for matches in the World Register of Marine Species, and processes out specific properties that we want associated with discovered taxa in the Taxonomic Information Registry. 

In [1]:
%reload_ext autoreload
%autoreload 2

import tirutils,requests,configparser,re
from IPython.display import display

config = configparser.RawConfigParser()
config.read_file(open(r'../config/stuff.py'))
gc2APIKey = config.get('apiKeys','apiKey_GC2_BCB').replace('"','')
apiBaseURL = "https://gc2.mapcentia.com/api/v1/sql/bcb?key="+gc2APIKey

wormsRecordsByNameBaseURL = "http://www.marinespecies.org/rest/AphiaRecordsByName/"
wormsRecordByAphiaIDBaseURL = "http://www.marinespecies.org/rest/AphiaRecordByAphiaID/"

In [2]:
# Pair worms properties that we want to cache
def packageWoRMSPairs(matchMethod,wormsData):
    import datetime
    dt = datetime.datetime.utcnow().isoformat()
    wormsPairs = '"cacheDate"=>"'+dt+'"'
    try:
        wormsPairs = wormsPairs+',"AphiaID"=>"'+str(wormsData['AphiaID'])+'"'
        wormsPairs = wormsPairs+',"wormsMatchMethod"=>"'+matchMethod+'"'
        wormsPairs = wormsPairs+',"scientificname"=>"'+wormsData['scientificname']+'"'
        wormsPairs = wormsPairs+',"status"=>"'+wormsData['status']+'"'
        wormsPairs = wormsPairs+',"rank"=>"'+wormsData['rank']+'"'
        wormsPairs = wormsPairs+',"valid_name"=>"'+wormsData['valid_name']+'"'
        try:
            wormsPairs = wormsPairs+',"valid_AphiaID"=>"'+str(wormsData['valid_AphiaID'])+'"'
        except:
            pass
        wormsPairs = wormsPairs+',"kingdom"=>"'+wormsData['kingdom']+'"'
        wormsPairs = wormsPairs+',"phylum"=>"'+wormsData['phylum']+'"'
        wormsPairs = wormsPairs+',"class"=>"'+wormsData['class']+'"'
        wormsPairs = wormsPairs+',"order"=>"'+wormsData['order']+'"'
        wormsPairs = wormsPairs+',"family"=>"'+wormsData['family']+'"'
        wormsPairs = wormsPairs+',"genus"=>"'+wormsData['genus']+'"'
        wormsPairs = wormsPairs+',"lsid"=>"'+wormsData['lsid']+'"'
        wormsPairs = wormsPairs+',"isMarine"=>"'+str(wormsData['isMarine'])+'"'
        wormsPairs = wormsPairs+',"isBrackish"=>"'+str(wormsData['isBrackish'])+'"'
        wormsPairs = wormsPairs+',"isFreshwater"=>"'+str(wormsData['isFreshwater'])+'"'
        wormsPairs = wormsPairs+',"isTerrestrial"=>"'+str(wormsData['isTerrestrial'])+'"'
        wormsPairs = wormsPairs+',"isExtinct"=>"'+str(wormsData['isExtinct'])+'"'
        wormsPairs = wormsPairs+',"match_type"=>"'+wormsData['match_type']+'"'
        wormsPairs = wormsPairs+',"modified"=>"'+wormsData['modified']+'"'
    except:
        display (wormsData)

    return wormsPairs

In [3]:
# Get both the registered scientific name (SGCN only at this point) and any species name from ITIS so that we can run a couple of options for WoRMS matches
targetDataSQL = "SELECT gid, \
    registration -> 'SGCN_ScientificName_Submitted' AS scientificname, \
    itis -> 'Species' AS speciesname_itis \
    FROM tir.tir2 \
    WHERE worms IS NULL \
    ORDER BY gid"

targetData = requests.get(apiBaseURL+"&q="+targetDataSQL).json()

In [4]:
# Set this flag to true to go ahead and write data to the database
commitData = False

for feature in targetData['features']:
    gid = feature['properties']['gid']
    scientificname = feature['properties']['scientificname']
    speciesname_itis = feature['properties']['speciesname_itis']
    numFoundExact = 0
    numFoundFuzzy = 0

    if scientificname.find("pop."):
        nameParts = scientificname.split()
        regex = re.compile(r'pop\.[0-9]')
        scientificname = ' '.join([i for i in nameParts if not regex.search(i)])
    
    if scientificname.find("spp."):
        nameParts = scientificname.split()
        regex = re.compile(r'spp\.')
        scientificname = ' '.join([i for i in nameParts if not regex.search(i)])
    
    if any(x.isupper() for x in scientificname[-(len(scientificname)-1):]):
        scientificname = scientificname.lower().capitalize()
    
    wormsFuzzyMatchURL = wormsRecordsByNameBaseURL+scientificname+"?like=true&marine_only=false"
    worms = str()
    try:
        wormsFuzzyMatchR = requests.get(wormsFuzzyMatchURL).json()
    except:
        continue
    if len(wormsFuzzyMatchR) == 1:
        if wormsFuzzyMatchR[0]['AphiaID'] == wormsFuzzyMatchR[0]['valid_AphiaID']:
            wormsMatchMethod = "found match on scientific name"
            worms = packageWoRMSPairs(wormsMatchMethod,wormsFuzzyMatchR[0])
        else:
            aphiaIDURL = wormsRecordByAphiaIDBaseURL+str(wormsFuzzyMatchR[0]['valid_AphiaID'])
            try:
                aphiaR = requests.get(aphiaIDURL).json()
            except:
                continue
            wormsMatchMethod = "found match by following valid aphiaid"
            worms = packageWoRMSPairs(wormsMatchMethod,aphiaR)
    
    # If we failed to get anything on the originally submitted name, try the name from ITIS (if available)
    if len(worms) == 0 and type(speciesname_itis) != None and speciesname_itis != scientificname:
        wormsFuzzyMatchURL = wormsRecordsByNameBaseURL+speciesname_itis+"?like=true&marine_only=false"
        try:
            wormsFuzzyMatchR = requests.get(wormsFuzzyMatchURL).json()
        except:
            continue
        if len(wormsFuzzyMatchR) == 1:
            if wormsFuzzyMatchR[0]['AphiaID'] == wormsFuzzyMatchR[0]['valid_AphiaID']:
                wormsMatchMethod = "found match on ITIS name"
                worms = packageWoRMSPairs(wormsMatchMethod,str(wormsFuzzyMatchR[0]))
            else:
                aphiaIDURL = wormsRecordByAphiaIDBaseURL+str(wormsFuzzyMatchR[0]['valid_AphiaID'])
                try:
                    aphiaR = requests.get(aphiaIDURL).json()
                except:
                    continue
                wormsMatchMethod = "found match by following valid AphiaID from ITIS name"
                worms = packageWoRMSPairs(wormsMatchMethod,aphiaR)

    if len(worms) > 0 and commitData:
        print (tirutils.cacheToTIR(apiBaseURL,gid,"worms",worms))
    elif len(worms) > 0 and not commitData:
        print (gid, worms)


"{'isFreshwater': 1, 'isMarine': 0, 'order': 'Unionida', 'genus': 'Ortmanniana', 'url': 'http://www.marinespecies.org/aphia.php?p=taxdetails&id=857466', 'match_type': 'like', 'kingdom': 'Animalia', 'family': 'Unionidae', 'isBrackish': 0, 'lsid': 'urn:lsid:marinespecies.org:taxname:857466', 'status': 'accepted', 'isTerrestrial': 0, 'valid_authority': '(Lamarck, 1819)', 'rank': 'Species', 'modified': '2015-10-16T13:46:27Z', 'unacceptreason': None, 'AphiaID': 857466, 'valid_AphiaID': 857466, 'class': 'Bivalvia', 'phylum': 'Mollusca', 'citation': 'Bieler, R. (2015). Ortmanniana ligamentina (Lamarck, 1819). In:  MolluscaBase (2017). Accessed through:  World Register of Marine Species at http://www.marinespecies.org/aphia.php?p=taxdetails&id=857466 on 2017-05-03', 'valid_name': 'Ortmanniana ligamentina', 'authority': '(Lamarck, 1819)', 'scientificname': 'Ortmanniana ligamentina', 'isExtinct': None}"

67 "cacheDate"=>"2017-05-03T00:21:06.769243"


"{'isFreshwater': 1, 'isMarine': 0, 'order': 'Unionida', 'genus': 'Ortmanniana', 'url': 'http://www.marinespecies.org/aphia.php?p=taxdetails&id=857466', 'match_type': 'like', 'kingdom': 'Animalia', 'family': 'Unionidae', 'isBrackish': 0, 'lsid': 'urn:lsid:marinespecies.org:taxname:857466', 'status': 'accepted', 'isTerrestrial': 0, 'valid_authority': '(Lamarck, 1819)', 'rank': 'Species', 'modified': '2015-10-16T13:46:27Z', 'unacceptreason': None, 'AphiaID': 857466, 'valid_AphiaID': 857466, 'class': 'Bivalvia', 'phylum': 'Mollusca', 'citation': 'Bieler, R. (2015). Ortmanniana ligamentina (Lamarck, 1819). In:  MolluscaBase (2017). Accessed through:  World Register of Marine Species at http://www.marinespecies.org/aphia.php?p=taxdetails&id=857466 on 2017-05-03', 'valid_name': 'Ortmanniana ligamentina', 'authority': '(Lamarck, 1819)', 'scientificname': 'Ortmanniana ligamentina', 'isExtinct': None}"

68 "cacheDate"=>"2017-05-03T00:21:07.785141"
69 "cacheDate"=>"2017-05-03T00:21:08.861451","AphiaID"=>"208956","wormsMatchMethod"=>"found match on scientific name","scientificname"=>"Actinopyga mauritiana","status"=>"accepted","rank"=>"Species","valid_name"=>"Actinopyga mauritiana","valid_AphiaID"=>"208956","kingdom"=>"Animalia","phylum"=>"Echinodermata","class"=>"Holothuroidea","order"=>"Aspidochirotida","family"=>"Holothuriidae","genus"=>"Actinopyga","lsid"=>"urn:lsid:marinespecies.org:taxname:208956","isMarine"=>"1","isBrackish"=>"0","isFreshwater"=>"0","isTerrestrial"=>"0","isExtinct"=>"None","match_type"=>"like","modified"=>"2010-10-14T16:30:08Z"
72 "cacheDate"=>"2017-05-03T00:21:10.001779","AphiaID"=>"279504","wormsMatchMethod"=>"found match on scientific name","scientificname"=>"Acyrtops beryllinus","status"=>"accepted","rank"=>"Species","valid_name"=>"Acyrtops beryllinus","valid_AphiaID"=>"279504","kingdom"=>"Animalia","phylum"=>"Chordata","class"=>"Actinopterygii","order"=>"Gob

{'AphiaID': 413409,
 'authority': '(Röding, 1798)',
 'citation': 'Bouchet, P. (2011). Lithopoma phoebium (Röding, 1798). In:  MolluscaBase (2017). Accessed through:  World Register of Marine Species at http://www.marinespecies.org/aphia.php?p=taxdetails&id=413409 on 2017-05-03',
 'class': 'Gastropoda',
 'family': 'Turbinidae',
 'genus': 'Lithopoma',
 'isBrackish': None,
 'isExtinct': None,
 'isFreshwater': None,
 'isMarine': 1,
 'isTerrestrial': None,
 'kingdom': 'Animalia',
 'lsid': 'urn:lsid:marinespecies.org:taxname:413409',
 'match_type': 'exact',
 'modified': '2011-10-05T21:30:45Z',
 'order': None,
 'phylum': 'Mollusca',
 'rank': 'Species',
 'scientificname': 'Lithopoma phoebium',
 'status': 'accepted',
 'unacceptreason': None,
 'url': 'http://www.marinespecies.org/aphia.php?p=taxdetails&id=413409',
 'valid_AphiaID': 413409,
 'valid_authority': '(Röding, 1798)',
 'valid_name': 'Lithopoma phoebium'}

484 "cacheDate"=>"2017-05-03T00:24:27.779240","AphiaID"=>"413409","wormsMatchMethod"=>"found match by following valid aphiaid","scientificname"=>"Lithopoma phoebium","status"=>"accepted","rank"=>"Species","valid_name"=>"Lithopoma phoebium","valid_AphiaID"=>"413409","kingdom"=>"Animalia","phylum"=>"Mollusca","class"=>"Gastropoda"
485 "cacheDate"=>"2017-05-03T00:24:28.168694","AphiaID"=>"157967","wormsMatchMethod"=>"found match on scientific name","scientificname"=>"Astrangia poculata","status"=>"accepted","rank"=>"Species","valid_name"=>"Astrangia poculata","valid_AphiaID"=>"157967","kingdom"=>"Animalia","phylum"=>"Cnidaria","class"=>"Anthozoa","order"=>"Scleractinia","family"=>"Rhizangiidae","genus"=>"Astrangia","lsid"=>"urn:lsid:marinespecies.org:taxname:157967","isMarine"=>"1","isBrackish"=>"None","isFreshwater"=>"0","isTerrestrial"=>"0","isExtinct"=>"0","match_type"=>"like","modified"=>"2010-09-25T14:49:11Z"
487 "cacheDate"=>"2017-05-03T00:24:28.959475","AphiaID"=>"279772","wormsMat

{'AphiaID': 419403,
 'authority': 'Schwengel, 1951',
 'citation': 'Rosenberg, G. (2012). Calliostoma adelae. In:  MolluscaBase (2017). Accessed through:  World Register of Marine Species at http://www.marinespecies.org/aphia.php?p=taxdetails&id=419403 on 2017-05-03',
 'class': 'Gastropoda',
 'family': 'Calliostomatidae',
 'genus': 'Calliostoma',
 'isBrackish': None,
 'isExtinct': None,
 'isFreshwater': None,
 'isMarine': 1,
 'isTerrestrial': None,
 'kingdom': 'Animalia',
 'lsid': 'urn:lsid:marinespecies.org:taxname:419403',
 'match_type': 'like',
 'modified': '2012-05-20T19:14:29Z',
 'order': None,
 'phylum': 'Mollusca',
 'rank': 'Species',
 'scientificname': 'Calliostoma adelae',
 'status': 'accepted',
 'unacceptreason': None,
 'url': 'http://www.marinespecies.org/aphia.php?p=taxdetails&id=419403',
 'valid_AphiaID': 419403,
 'valid_authority': 'Schwengel, 1951',
 'valid_name': 'Calliostoma adelae'}

722 "cacheDate"=>"2017-05-03T00:26:32.565975","AphiaID"=>"419403","wormsMatchMethod"=>"found match on scientific name","scientificname"=>"Calliostoma adelae","status"=>"accepted","rank"=>"Species","valid_name"=>"Calliostoma adelae","valid_AphiaID"=>"419403","kingdom"=>"Animalia","phylum"=>"Mollusca","class"=>"Gastropoda"
724 "cacheDate"=>"2017-05-03T00:26:33.808009","AphiaID"=>"716284","wormsMatchMethod"=>"found match on scientific name","scientificname"=>"Callitriche hermaphroditica","status"=>"accepted","rank"=>"Species","valid_name"=>"Callitriche hermaphroditica","valid_AphiaID"=>"716284","kingdom"=>"Plantae","phylum"=>"Tracheophyta","class"=>"Magnoliopsida","order"=>"Lamiales","family"=>"Plantaginaceae","genus"=>"Callitriche","lsid"=>"urn:lsid:marinespecies.org:taxname:716284","isMarine"=>"0","isBrackish"=>"0","isFreshwater"=>"1","isTerrestrial"=>"None","isExtinct"=>"None","match_type"=>"like","modified"=>"2013-01-21T12:01:30Z"
752 "cacheDate"=>"2017-05-03T00:26:48.398693","AphiaID

TypeError: Can't convert 'NoneType' object to str implicitly