Consult the ITIS API to retrieve information and cache for later processing. This is done via the ITIS module in the bispy package that uses various search mechanisms against ITIS and slightly repackages the returned data for use.

In [2]:
#Import needed packages
import json
import bispy
from IPython.display import display
from joblib import Parallel, delayed
import jsonschema

itis = bispy.itis.Itis()
bis_utils = bispy.bis.Utils()

import warnings
warnings.filterwarnings('ignore')

In [3]:
# Open source WLCI list created from build-specie-list.ipynb
with open("cache/WLCI Species List from Literature.json", "r") as f:
    sp_list = json.loads(f.read())
sci_name_list = list(set([spp["Scientific Name"] for spp in sp_list]))

In [5]:
#print initial list
sci_name_list

['Pinus albicaulis',
 'Brachylagus idahoensis',
 'Lepidochelys olivacea',
 'Centrocercus minimus',
 'Microtus agrestis',
 'Kochia prostrata',
 'Shepherdia canadensis',
 'Potorous tridactylus',
 'Ursus arctos',
 'Achnatherum hymenoides',
 'Reithrodontomys megalotis',
 'Pseudotsuga menziesii',
 'Colinus virginianus',
 'Artemisia tridentata ssp. wyomingensis',
 'Lanius ludovicianus',
 'Populus tremuloides',
 'Rangifer tarandus',
 'Elymus elymoides',
 'Salvelinus namaycush',
 'Procapra gutturosa',
 'Pinus contorta',
 'Dendroctonus ponderosae',
 'Spizella breweri',
 'Medicago sativa',
 'Haliaeetus leucocephalus',
 'Purshia tridentata',
 'Juniperus communis',
 'Tetrao tetrix',
 'Orgyia pseudotsugata',
 'Falco sparverius',
 'Clupea pallasii',
 'Otis tarda',
 'Abies lasiocarpa',
 'Festuca idahoensis',
 'Bison bison',
 'Sarcobatus vermiculatus',
 'Peromyscus maniculatus',
 'Rangifer tarandus groenlandicus',
 'Cronartium ribicola',
 'Oncorhynchus clarkii pleuriticus',
 'Oreoscoptes montanus',
 '

In [11]:
# Use joblib to run multiple requests for ITIS documents in parallel via known ITIS TSNs
itis_list = Parallel(n_jobs=8)(delayed(itis.search)(name) for name in sci_name_list)
len(itis_list)

116

In [12]:
#Select records listed as successful
itis_cache=[i for i in itis_list if i['processing_metadata']['status'] == 'success']
len(itis_cache)

116

In [13]:
# Cache the array of retrieved documents and return/display a random sample for verification
display(bis_utils.doc_cache("cache/itis.json", itis_cache))

{'Doc Cache File': 'cache/itis.json',
 'Number of Documents in Cache': 116,
 'Document Number 108': {'processing_metadata': {'status': 'success',
   'date_processed': '2019-08-06T20:57:44.942788',
   'status_message': 'Exact Match',
   'details': [{'Exact Match': 'https://services.itis.gov/?wt=json&rows=10&q=nameWOInd:Artemisia\\%20tridentata'}]},
  'itis_data': [{'tsn': '35498',
    'nameWInd': 'Artemisia tridentata',
    'nameWOInd': 'Artemisia tridentata',
    'unit1': 'Artemisia',
    'unit2': 'tridentata',
    'usage': 'accepted',
    'kingdom': 'Plantae',
    'parentTSN': '35431',
    'rank': 'Species',
    'date_created': '1996-06-13 14:51:08',
    'date_modified': '2010-10-05 00:00:00',
    'biological_taxonomy': [{'rank': 'Kingdom', 'name': 'Plantae'},
     {'rank': 'Subkingdom', 'name': 'Viridiplantae'},
     {'rank': 'Infrakingdom', 'name': 'Streptophyta'},
     {'rank': 'Superdivision', 'name': 'Embryophyta'},
     {'rank': 'Division', 'name': 'Tracheophyta'},
     {'rank':