Consult the ITIS API to retrieve information and cache for later processing. This is done via the ITIS module in the bispy package that uses various search mechanisms against ITIS and slightly repackages the returned data for use.

In [3]:
#Import needed packages
import json
import bispy
from IPython.display import display
from joblib import Parallel, delayed
import jsonschema

itis = bispy.itis.Itis()
bis_utils = bispy.bis.Utils()

import warnings
warnings.filterwarnings('ignore')

In [4]:
url='http://www.itis.gov/ITISWebService/services/ITISService/getGeographicValues'

In [7]:
# Open source WLCI list created from build-specie-list.ipynb
with open("sources/WLCI Species List from Literature.json", "r") as f:
    sp_list = json.loads(f.read())
sci_name_list = list(set([spp["Scientific Name"] for spp in sp_list]))

In [8]:
#print initial list
sci_name_list

['Microtus californicus',
 'Amphispiza belli',
 'Ammodytes marinus',
 'Gavia stellata',
 'Xerospermophilus mohavensis',
 'Circus cyaneus',
 'Lanius ludovicianus',
 'Tamiasciurus hudsonicus',
 'Setophaga discolor',
 'Artemisia tridentata',
 'Myotis septentrionalis',
 'Dendroctonus pseudotsugae',
 'Krascheninnikovia lanata',
 'Vulpes vulpes',
 'Pterodroma sandwichensis',
 'Poa secunda',
 'Cottus bairdii',
 'Centrocercus minimus',
 'Artemisiospiza nevadensis',
 'Bombus griseocollis',
 'Buteo jamaicensis',
 'Amorpha canescens',
 'Kochia prostrata',
 'Branta leucopsis',
 'Hesperostipa comata',
 'Pelecanus occidentalis',
 'Glaucomys volans',
 'Peromyscus maniculatus',
 'Corvus corax',
 'Oncorhynchus tshawytscha',
 'Purshia tridentata',
 'Arctostaphylos patula',
 'Populus tremuloides',
 'Pseudotsuga menziesii',
 'Atriplex gardneri',
 'Melanitta perspicillata',
 'Prunus virginiana',
 'Sarcobatus vermiculatus',
 'Achnatherum hymenoides',
 'Ericameria nauseosa',
 'Falco sparverius',
 'Agave amer

In [9]:
# Use joblib to run multiple requests for ITIS documents in parallel via known ITIS TSNs
itis_list = Parallel(n_jobs=8)(delayed(itis.search)(name) for name in sci_name_list)
len(itis_list)

162

In [10]:
#Select records listed as successful
itis_cache=[i for i in itis_list if i['processing_metadata']['status'] == 'success']
len(itis_cache)

162

In [13]:
# Cache the array of retrieved documents and return/display a random sample for verification
display(bis_utils.doc_cache("cache/itis.json", itis_cache))

{'Doc Cache File': 'cache/itis.json',
 'Number of Documents in Cache': 116,
 'Document Number 108': {'processing_metadata': {'status': 'success',
   'date_processed': '2019-08-06T20:57:44.942788',
   'status_message': 'Exact Match',
   'details': [{'Exact Match': 'https://services.itis.gov/?wt=json&rows=10&q=nameWOInd:Artemisia\\%20tridentata'}]},
  'itis_data': [{'tsn': '35498',
    'nameWInd': 'Artemisia tridentata',
    'nameWOInd': 'Artemisia tridentata',
    'unit1': 'Artemisia',
    'unit2': 'tridentata',
    'usage': 'accepted',
    'kingdom': 'Plantae',
    'parentTSN': '35431',
    'rank': 'Species',
    'date_created': '1996-06-13 14:51:08',
    'date_modified': '2010-10-05 00:00:00',
    'biological_taxonomy': [{'rank': 'Kingdom', 'name': 'Plantae'},
     {'rank': 'Subkingdom', 'name': 'Viridiplantae'},
     {'rank': 'Infrakingdom', 'name': 'Streptophyta'},
     {'rank': 'Superdivision', 'name': 'Embryophyta'},
     {'rank': 'Division', 'name': 'Tracheophyta'},
     {'rank':