Consult the ITIS API to retrieve information and cache for later processing. This is done via the ITIS module in the bispy package that uses various search mechanisms against ITIS and slightly repackages the returned data for use.

In [1]:
import json
import bispy
from IPython.display import display
from joblib import Parallel, delayed
import jsonschema

itis = bispy.itis.Itis()
bis_utils = bispy.bis.Utils()

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Open source WLCI list created from build-specie-list.ipynb
with open("cache/WLCI Species List from Literature.json", "r") as f:
    sp_list = json.loads(f.read())
sci_name_list = list(set([spp["Scientific Name"] for spp in sp_list]))

In [3]:
#print initial list
sci_name_list

['Sphaeralcea coccinea',
 'Betula nana',
 'Vulpes vulpes',
 'Arctostaphylos patula',
 'Cronartium ribicola',
 'Juniperus communis var. depressa',
 'Canis latrans',
 'Atriplex gardneri',
 'Artemisiospiza nevadensis',
 'Haliaeetus leucocephalus',
 'Canis lupus',
 'Ictidomys tridecemlineatus',
 'Amphispiza belli',
 'Centrocercus minimus',
 'Lepus townsendii',
 'Rangifer tarandus groenlandicus',
 'Hesperostipa comata',
 'Prunus virginiana',
 'Atriplex canescens',
 'Zea mays',
 'Glaucomys volans',
 'Elymus lanceolatus',
 'Atriplex argentea',
 'Ovis canadensis',
 'Juniperus communis',
 'Brucella abortus',
 'Thrichomys fosteri',
 'Bassia prostrata',
 'Urocitellus armatus',
 'Lepidochelys olivacea',
 'Corvus corax',
 'Brachylagus idahoensis',
 'Dendroctonus ponderosae',
 'Pinus albicaulis',
 'Pascopyrum smithii',
 'Oncorhynchus nerka',
 'Colinus virginianus',
 'Artemisia tridentata',
 'Fringilla montifringilla',
 'Picea engelmannii',
 'Cercocarpus montanus',
 'Reithrodontomys megalotis',
 'Ran

In [4]:
%%time
# Use joblib to run multiple requests for ITIS documents in parallel via known ITIS TSNs
itis_cache = Parallel(n_jobs=8)(delayed(itis.search)(name) for name in sci_name_list)

CPU times: user 377 ms, sys: 86.3 ms, total: 464 ms
Wall time: 7.61 s


In [5]:
# Cache the array of retrieved documents and return/display a random sample for verification
display(bis_utils.doc_cache("cache/itis.json", itis_cache))

{'Doc Cache File': 'cache/itis.json',
 'Number of Documents in Cache': 116,
 'Document Number 36': {'processing_metadata': {'status': 'success',
   'date_processed': '2019-07-31T13:45:33.392860',
   'status_message': 'Exact Match',
   'details': [{'Exact Match': 'https://services.itis.gov/?wt=json&rows=10&q=nameWOInd:Colinus\\%20virginianus'}]},
  'itis_data': [{'tsn': '175863',
    'nameWInd': 'Colinus virginianus',
    'nameWOInd': 'Colinus virginianus',
    'unit1': 'Colinus',
    'unit2': 'virginianus',
    'usage': 'valid',
    'kingdom': 'Animalia',
    'parentTSN': '175862',
    'rank': 'Species',
    'date_created': '1996-06-13 14:51:08',
    'date_modified': '2005-06-08 00:00:00',
    'biological_taxonomy': [{'rank': 'Kingdom', 'name': 'Animalia'},
     {'rank': 'Subkingdom', 'name': 'Bilateria'},
     {'rank': 'Infrakingdom', 'name': 'Deuterostomia'},
     {'rank': 'Phylum', 'name': 'Chordata'},
     {'rank': 'Subphylum', 'name': 'Vertebrata'},
     {'rank': 'Infraphylum', 'n