This notebook explores the records retrieved from the consultation between the WLCI scientific species name list and the ITIS API. The cached ITIS results are examined and next steps are determined.

In [1]:
#Import needed packages
import json
import bispy
import requests
from IPython.display import display
from joblib import Parallel, delayed
#import logging
#logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')



bis_utils = bispy.bis.Utils()

In [2]:
#Open cache ITIS list created from WLCI Species List from Literature.ipynb
with open("cache/itis.json", "r") as f:
    itis_cache = json.loads(f.read())

In [3]:
#Print ITIS list 
itis_cache

[{'processing_metadata': {'status': 'success',
   'date_processed': '2019-08-15T15:34:50.612983',
   'status_message': 'Exact Match',
   'details': [{'Exact Match': 'https://services.itis.gov/?wt=json&rows=10&q=nameWOInd:Artemisiospiza\\%20nevadensis'}]},
  'itis_data': [{'tsn': '997723',
    'nameWInd': 'Artemisiospiza nevadensis',
    'nameWOInd': 'Artemisiospiza nevadensis',
    'unit1': 'Artemisiospiza',
    'unit2': 'nevadensis',
    'usage': 'valid',
    'kingdom': 'Animalia',
    'parentTSN': '997695',
    'rank': 'Species',
    'synonyms': ['997723:$Amphispiza belli nevadensis$Artemisiospiza belli nevadensi$'],
    'synonymTSNs': ['997723:$179403$998356$'],
    'date_created': '2015-10-28 14:07:42',
    'date_modified': '2015-10-28 00:00:00',
    'biological_taxonomy': [{'rank': 'Kingdom', 'name': 'Animalia'},
     {'rank': 'Subkingdom', 'name': 'Bilateria'},
     {'rank': 'Infrakingdom', 'name': 'Deuterostomia'},
     {'rank': 'Phylum', 'name': 'Chordata'},
     {'rank': 'Subp

In [11]:
#Filter the ITIS cache to only list the ITIS TSN records
itis_tsn = []
for record in itis_cache:
    for item in record['itis_data']:
        itis_tsn.append(item["tsn"])    

In [12]:
itis_tsn

['997723',
 '35454',
 '180706',
 '180575',
 '926246',
 '926145',
 '24806',
 '20531',
 '175134',
 '180711',
 '180017',
 '176176',
 '20512',
 '997724',
 '179402',
 '175838',
 '41223',
 '677540',
 '175603',
 '175622',
 '180596',
 '180701',
 '175407',
 '22843',
 '180118',
 '176419',
 '23512',
 '503283',
 '163909',
 '625114',
 '180703',
 '183343',
 '167237',
 '178515',
 '528646',
 '173856',
 '25134',
 '551209',
 '180016',
 '41062',
 '179725',
 '183623',
 '20518',
 '174474',
 '180599',
 '161979',
 '950052',
 '503283',
 '822851',
 '503290',
 '40913',
 '40816',
 '175855',
 '175170',
 '180305',
 '171677',
 '114919',
 '202223',
 '183311',
 '554256',
 '502264',
 '20707',
 '182660',
 '174712',
 '180001',
 '40524',
 '23601',
 '960160',
 '968200',
 '625063',
 '174479',
 '175130',
 '183291',
 '183327',
 '21920',
 '25290',
 '632855',
 '174999',
 '180276',
 '180195',
 '785041',
 '179333',
 '175420',
 '503433',
 '180604',
 '784287',
 '180607',
 '180170',
 '35498',
 '20662',
 '175147',
 '194820',
 '55270

In [6]:
# Set function for accessing the ITIS database using ITIS TSN numbers
def itis_api(itis_tsn):
    base_url = 'http://www.itis.gov/ITISWebService/jsonservice/ITISService/getGeographicDivisionsFromTSN'
    param = '?tsn=' + str(itis_tsn)
    get = (base_url + str(param))
    #logging.debug('This is a log message.'+str(get))
    try:
        r=requests.get(get)
        if r.status_code == 200 and 'success' in r.json():
            json_r = r.json()
            data = json_r['success']['data']
            return data
        elif r.status_code == 200:
            data = []
            return data
        else:
            raise Exception('itis API returning: {0}'.format(r.status_code))
    except Exception as e:
        raise Exception(e)

In [8]:
#Create list with data and call the ITIS API
itis_data_list= []
for item in itis_tsn:
    itis_data_list.append(itis_api(item)) 

[[],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 []]

In [10]:
display(itis_data_list)

[[],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 []]

In some cases more than one ITIS record was returned from the function. In every case, the “processing_metadata” structure indicates that the scientific species names used at the point of discovery were accepted. However, the "itis_data" structure indicates that potentially invalid names were provided from the WLCI literature database or xDD source information yet have a valid record in ITIS. The following code block lets us examine what is going on in these cases. 

The “processing_metadata” structure provides information about what the function does. It includes the URLs to the ITIS API that resulted in some action. Both the valid/accepted and invalid/unaccepted names from ITIS are recorded. We reach back to the itis_cahce to show that record.

In [None]:
#Select ITIS records with more than one record 
validate =[i for i in itis_cache if "itis_data" in i.keys() and len(i["itis_data"]) > 1]

In [None]:
validate

This list indicates a few things:
1. There is one case where a source name was misspelled, Equus burchelli should have been Equus quagga burchellii. In this case, the search used the TSN of the misspelled species name to correctly identify the species. ITIS considers the record containing the misspelled name to be invalid and the other record valid, even though they are for the same name.  
2. In three cases, the names provided were considered invalid or not accepted because ITIS considered the names to be synonyms or junior synonyms of the correct species names. 
3. In one case, ITIS considered the name provided to be invalid because it was an  “original name/combination”.
4. In one case, ITIS considered the name provided to be invalid because it was a “subsequent name/combination”.
    
In the case of any disagreement between WLCI scientists and the taxonomic authority, both the invalid/not accepted and valid/accepted ITIS names from the WLCI species names list will be used for further consultations with additional systems. Two seperate lists, invalid_itis and valid_itis, will be used to distinguish the invalid/not accepted and valid/accepted ITIS names in these consultations. Any information that results from consultations using the invalid_itis list will be cached separately and evaluated for their utility.  

In [None]:
#Create list containing sciencitific names and ITIS usage classifications
itis_explore = list()
for itis_doc_set in itis_cache:
    for itis_doc in itis_doc_set["itis_data"]:
        itis_explore.append({"scientific_name": itis_doc["nameWInd"],"itis_usage": itis_doc["usage"]})

In [None]:
#Display the ITIS list of species names and ITIS usage (valid/invalid and accepted/not accepted)
itis_explore

In [None]:
#Create list of specie names considered invalid or not accepted by ITIS
invalid_itis=[i for i in itis_explore if i['itis_usage'] == 'invalid']
invalid_itis.extend([i for i in itis_explore if i['itis_usage'] == 'not accepted'])

In [None]:
#Display list of specie names considered invalid or not accepted by ITIS
invalid_itis

In [None]:
# Cache the array of retrieved documents and return/display a random sample for verification
display(bis_utils.doc_cache("cache/invalid_itis.json", invalid_itis))

In [None]:
#Create an updated list of species names considered valid or accepted by ITIS
updated_itis=[e for e in itis_explore if e['scientific_name'] not in {'Amphispiza belli','Thrichomys fosteri','Tetrao tetrix','Brucella abortus','Equus burchelli','Bassia prostrata'}]
len(updated_itis)

In [None]:
# Cache the array of retrieved documents and return/display a random sample for verification
display(bis_utils.doc_cache("cache/valid_itis.json", updated_itis))