Consult the ITIS API to retrieve information and cache for later processing. This is done via the ITIS module in the bispy package that uses various search mechanisms against ITIS and slightly repackages the returned data for use.

In [1]:
#Import needed packages
import json
import requests
import bispy
from IPython.display import display
from joblib import Parallel, delayed

itis = bispy.itis.Itis()
bis_utils = bispy.bis.Utils()

In [2]:
# Open source WLCI list created from build-specie-list.ipynb
with open("sources/WLCI Species List from Literature.json", "r") as f:
    sp_list = json.loads(f.read())
sci_name_list = list(set([spp["Scientific Name"] for spp in sp_list]))

In [3]:
#print initial list
sci_name_list

['Artemisiospiza nevadensis',
 'Artemisia cana',
 'Bison bison',
 'Procyon lotor',
 'Equus burchelli',
 'Prunus virginiana',
 'Atriplex gardneri',
 'Aythya affinis',
 'Ovis canadensis',
 'Lasiurus cinereus',
 'Grus americana',
 'Atriplex argentea',
 'Amphispiza belli',
 'Tympanuchus pallidicinctus',
 'Poa secunda',
 'Centrocercus minimus',
 'Falco mexicanus',
 'Falco sparverius',
 'Canis lupus',
 'Rangifer tarandus',
 'Aquila chrysaetos',
 'Descurainia sophia',
 'Lepus townsendii',
 'Otis tarda',
 'Arctostaphylos patula',
 'Kochia prostrata',
 'Catostomus platyrhynchus',
 'Procapra gutturosa',
 'Alces alces',
 'Pinus flexilis',
 'Cottus bairdii',
 'Lanius ludovicianus',
 'Juniperus communis var. depressa',
 'Gopherus agassizii',
 'Cercocarpus ledifolius',
 'Clupea pallasii',
 'Lasiurus borealis',
 'Phleum pratense',
 'Corvus corax',
 'Medicago sativa',
 'Atriplex canescens',
 'Gavia stellata',
 'Canis latrans',
 'Oncorhynchus nerka',
 'Setophaga discolor',
 'Bassia prostrata',
 'Krasch

In [70]:
itis_list=Parallel(n_jobs=8)(delayed(itis.search)(name) for name in sci_name_list)

In [69]:
# Filter to give just cases where ITIS records successfully matched with the WLCI scientific species names
itis_cache=[i for i in itis_list if i['processing_metadata']['status'] == 'success']
itis_cache

162

In [53]:
# Cache the array of retrieved documents and return/display a random sample for verification
display(bis_utils.doc_cache("cache/itis.json", itis_cache))

{'Doc Cache File': 'cache/itis.json',
 'Number of Documents in Cache': 162,
 'Document Number 17': {'processing_metadata': {'status': 'success',
   'date_processed': '2019-08-15T15:34:51.359754',
   'status_message': 'Exact Match',
   'details': [{'Exact Match': 'https://services.itis.gov/?wt=json&rows=10&q=nameWOInd:Falco\\%20sparverius'}]},
  'itis_data': [{'tsn': '175622',
    'nameWInd': 'Falco sparverius',
    'nameWOInd': 'Falco sparverius',
    'unit1': 'Falco',
    'unit2': 'sparverius',
    'usage': 'valid',
    'kingdom': 'Animalia',
    'parentTSN': '175598',
    'rank': 'Species',
    'date_created': '1996-06-13 14:51:08',
    'date_modified': '2011-09-22 00:00:00',
    'biological_taxonomy': [{'rank': 'Kingdom', 'name': 'Animalia'},
     {'rank': 'Subkingdom', 'name': 'Bilateria'},
     {'rank': 'Infrakingdom', 'name': 'Deuterostomia'},
     {'rank': 'Phylum', 'name': 'Chordata'},
     {'rank': 'Subphylum', 'name': 'Vertebrata'},
     {'rank': 'Infraphylum', 'name': 'Gnath