Consult the ITIS API to retrieve information and cache for later processing. This is done via the ITIS module in the bispy package that uses various search mechanisms against ITIS and slightly repackages the returned data for use.

In [9]:
#Import needed packages
import json
import bispy
from IPython.display import display
from joblib import Parallel, delayed
import jsonschema

itis = bispy.itis.Itis()
bis_utils = bispy.bis.Utils()

In [10]:
# Open source WLCI list created from build-specie-list.ipynb
with open("sources/WLCI Species List from Literature.json", "r") as f:
    sp_list = json.loads(f.read())
sci_name_list = list(set([spp["Scientific Name"] for spp in sp_list]))

In [11]:
#print initial list
sci_name_list

['Falco mexicanus',
 'Agave americana',
 'Gavia stellata',
 'Passer domesticus',
 'Hesperostipa comata',
 'Branta canadensis',
 'Connochaetes taurinus',
 'Prunus virginiana',
 'Lanius ludovicianus',
 'Castor canadensis',
 'Buteo regalis',
 'Sphaeralcea coccinea',
 'Descurainia sophia',
 'Abies lasiocarpa',
 'Ericameria nauseosa',
 'Lasiurus cinereus semotus',
 'Clupea pallasii',
 'Oncorhynchus mykiss',
 'Clupea pallasi',
 'Thrichomys fosteri',
 'Pseudoroegneria spicata',
 'Brucella abortus',
 'Grus canadensis',
 'Artemisia tridentata',
 'Pterodroma sandwichensis',
 'Brachylagus idahoensis',
 'Ictidomys tridecemlineatus',
 'Pinus albicaulis',
 'Cervus elaphus',
 'Zea mays',
 'Peromyscus keeni',
 'Clangula hyemalis',
 'Juniperus communis',
 'Cercocarpus ledifolius',
 'Pascopyrum smithii',
 'Amorpha canescens',
 'Centrocercus minimus',
 'Eremophila alpestris',
 'Fringilla montifringilla',
 'Dendroctonus pseudotsugae',
 'Canis latrans',
 'Sarcobatus vermiculatus',
 'Orgyia pseudotsugata',


In [12]:
# Use joblib to run multiple requests for ITIS records in parallel via scientific species names from the WLCI Species List from Literature
itis_list = Parallel(n_jobs=8)(delayed(itis.search)(name) for name in sci_name_list)
len(itis_list)

162

In [13]:
# Filter to give just cases where ITIS records successfully matched with the WLCI scientific species names
itis_cache=[i for i in itis_list if i['processing_metadata']['status'] == 'success']
len(itis_cache)

162

In [14]:
# Cache the array of retrieved documents and return/display a random sample for verification
display(bis_utils.doc_cache("cache/itis.json", itis_cache))

{'Doc Cache File': 'cache/itis.json',
 'Number of Documents in Cache': 162,
 'Document Number 124': {'processing_metadata': {'status': 'success',
   'date_processed': '2019-08-13T14:47:35.013275',
   'status_message': 'Exact Match',
   'details': [{'Exact Match': 'https://services.itis.gov/?wt=json&rows=10&q=nameWOInd:Gymnogyps\\%20californianus'}]},
  'itis_data': [{'tsn': '175274',
    'nameWInd': 'Gymnogyps californianus',
    'nameWOInd': 'Gymnogyps californianus',
    'unit1': 'Gymnogyps',
    'unit2': 'californianus',
    'usage': 'valid',
    'kingdom': 'Animalia',
    'parentTSN': '175273',
    'rank': 'Species',
    'synonyms': ['175274:$Vultur californianus$'],
    'synonymTSNs': ['175274:$175275$'],
    'date_created': '1996-06-13 14:51:08',
    'date_modified': '2011-09-22 00:00:00',
    'biological_taxonomy': [{'rank': 'Kingdom', 'name': 'Animalia'},
     {'rank': 'Subkingdom', 'name': 'Bilateria'},
     {'rank': 'Infrakingdom', 'name': 'Deuterostomia'},
     {'rank': 'Phy