Consult the ITIS API to retrieve information and cache for later processing. This is done via the ITIS module in the bispy package that uses various search mechanisms against ITIS and slightly repackages the returned data for use.

In [1]:
#Import needed packages
import json
import bispy
from IPython.display import display
from joblib import Parallel, delayed
import jsonschema

itis = bispy.itis.Itis()
bis_utils = bispy.bis.Utils()

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Open source WLCI list created from build-specie-list.ipynb
with open("cache/WLCI Species List from Literature.json", "r") as f:
    sp_list = json.loads(f.read())
sci_name_list = list(set([spp["Scientific Name"] for spp in sp_list]))

In [3]:
#print initial list
sci_name_list

['Festuca idahoensis',
 'Ursus americanus',
 'Pseudotsuga menziesii',
 'Tamiasciurus hudsonicus',
 'Otis tarda',
 'Melanitta perspicillata',
 'Taxidea taxus',
 'Poa secunda',
 'Amphispiza belli',
 'Rangifer tarandus groenlandicus',
 'Urocitellus armatus',
 'Centrocercus urophasianus',
 'Procapra gutturosa',
 'Juniperus communis',
 'Cronartium ribicola',
 'Spizella breweri',
 'Atriplex argentea',
 'Atriplex canescens',
 'Leymus cinereus',
 'Oncorhynchus clarkii',
 'Sarcobatus vermiculatus',
 'Pinus contorta',
 'Centrocercus minimus',
 'Pinus flexilis',
 'Ericameria nauseosa',
 'Sphaeralcea coccinea',
 'Thinopyrum ponticum',
 'Ictidomys tridecemlineatus',
 'Bassia prostrata',
 'Castor canadensis',
 'Dendroctonus pseudotsugae',
 'Medicago sativa',
 'Lepus townsendii',
 'Antilocapra americana',
 'Potorous tridactylus',
 'Elymus elymoides',
 'Microtus californicus',
 'Bromus tectorum',
 'Prunus virginiana',
 'Vaccinium membranaceum',
 'Pseudoroegneria spicata',
 'Salvelinus namaycush',
 'Ca

In [11]:
%%time
# Use joblib to run multiple requests for ITIS documents in parallel via known ITIS TSNs
itis_list = Parallel(n_jobs=8)(delayed(itis.search)(name) for name in sci_name_list)
len(itis_list)

116

In [12]:
#Select records listed as successful
itis_cache=[i for i in itis_list if i['processing_metadata']['status'] == 'success']
len(itis_cache)

116

In [13]:
# Cache the array of retrieved documents and return/display a random sample for verification
display(bis_utils.doc_cache("cache/itis.json", itis_cache))

{'Doc Cache File': 'cache/itis.json',
 'Number of Documents in Cache': 116,
 'Document Number 108': {'processing_metadata': {'status': 'success',
   'date_processed': '2019-08-06T20:57:44.942788',
   'status_message': 'Exact Match',
   'details': [{'Exact Match': 'https://services.itis.gov/?wt=json&rows=10&q=nameWOInd:Artemisia\\%20tridentata'}]},
  'itis_data': [{'tsn': '35498',
    'nameWInd': 'Artemisia tridentata',
    'nameWOInd': 'Artemisia tridentata',
    'unit1': 'Artemisia',
    'unit2': 'tridentata',
    'usage': 'accepted',
    'kingdom': 'Plantae',
    'parentTSN': '35431',
    'rank': 'Species',
    'date_created': '1996-06-13 14:51:08',
    'date_modified': '2010-10-05 00:00:00',
    'biological_taxonomy': [{'rank': 'Kingdom', 'name': 'Plantae'},
     {'rank': 'Subkingdom', 'name': 'Viridiplantae'},
     {'rank': 'Infrakingdom', 'name': 'Streptophyta'},
     {'rank': 'Superdivision', 'name': 'Embryophyta'},
     {'rank': 'Division', 'name': 'Tracheophyta'},
     {'rank':