This notebook uses a set of functions and module within the bispy package to interact with the GBIF API to pull back information on WLCI species.

The GBIF records are retrieved and cached based on valid/accepted ITIS species names and the invalid/not accepted ITIS species names.

In [1]:
#Import needed packages
import json
import bispy
from IPython.display import display
from joblib import Parallel, delayed

gbif = bispy.gbif.Gbif()
bis_utils = bispy.bis.Utils()

In [2]:
# Open up the cached explore ITIS list with species names from ITIS Exploration.ipynb
with open("cache/itis_explore.json", "r") as f:
    itis_explore = json.loads(f.read())

In [3]:
# Use joblib to run multiple requests for GBIF records in parallel via the ITIS scientific names
gbif_results = Parallel(n_jobs=8)(delayed(gbif.summarize_us_species)(name) for name in [r["scientific_name"] for r in itis_explore])

In [4]:
# Filter to give just cases where GBIF species name matched with ITIS names
success_gbif = [i for i in gbif_results if i["processing_metadata"]["status"] == "success"]
success_gbif

[{'processing_metadata': {'status': 'success',
   'date_processed': '2019-08-27T20:42:11.781176',
   'status_message': 'Matched',
   'api': ['https://api.gbif.org/v1/species/suggest?q=Ictidomys tridecemlineatus',
    'https://api.gbif.org/v1/occurrence/search?country=US&limit=0&facet=institutionCode&facet=year&facet=basisOfRecord&scientificName=Ictidomys tridecemlineatus']},
  'parameters': {'Scientific Name': 'Ictidomys tridecemlineatus'},
  'gbif_species': {'key': 7994322,
   'resolvable_identifier': 'http://api.gbif.org/v1/species/7994322',
   'biological_taxonomy': [{'rank': 'kingdom', 'name': 'Animalia'},
    {'rank': 'phylum', 'name': 'Chordata'},
    {'rank': 'class', 'name': 'Mammalia'},
    {'rank': 'order', 'name': 'Rodentia'},
    {'rank': 'family', 'name': 'Sciuridae'},
    {'rank': 'genus', 'name': 'Ictidomys'},
    {'rank': 'species', 'name': 'Ictidomys tridecemlineatus'}],
   'Scientific Name': 'Ictidomys tridecemlineatus',
   'name_with_source': 'Ictidomys tridecemlinea

In [5]:
# Cache the array of retrieved documents and return/display a random sample for verification
display(bis_utils.doc_cache("cache/gbif.json", success_gbif))

{'Doc Cache File': 'cache/gbif.json',
 'Number of Documents in Cache': 168,
 'Document Number 10': {'processing_metadata': {'status': 'success',
   'date_processed': '2019-08-27T20:42:11.781176',
   'status_message': 'Matched',
   'api': ['https://api.gbif.org/v1/species/suggest?q=Clupea pallasii',
    'https://api.gbif.org/v1/occurrence/search?country=US&limit=0&facet=institutionCode&facet=year&facet=basisOfRecord&scientificName=Clupea pallasii']},
  'parameters': {'Scientific Name': 'Clupea pallasii'},
  'gbif_species': {'key': 4284072,
   'resolvable_identifier': 'http://api.gbif.org/v1/species/4284072',
   'biological_taxonomy': [{'rank': 'kingdom', 'name': 'Animalia'},
    {'rank': 'phylum', 'name': 'Chordata'},
    {'rank': 'class', 'name': 'Actinopterygii'},
    {'rank': 'order', 'name': 'Clupeiformes'},
    {'rank': 'family', 'name': 'Clupeidae'},
    {'rank': 'genus', 'name': 'Clupea'},
    {'rank': 'species', 'name': 'Clupea pallasii'}],
   'Scientific Name': 'Clupea pallasii

In [6]:
# Open up the cached updated ITIS list with invalid/not accepted species names from ITIS Exploration.ipynb
with open("cache/invalid_itis.json", "r") as f:
    invalid_itis = json.loads(f.read())

In [7]:
# Use joblib to run multiple requests for GBIF documents in parallel via the invalid/not accepted ITIS scientific names
invalid_result = Parallel(n_jobs=8)(delayed(gbif.summarize_us_species)(name) for name in [r["scientific_name"] for r in invalid_itis]) 

In [8]:
#Display result to identify the GBIF record that matched the invalid/not accepted ITIS name
invalid_result

[{'processing_metadata': {'status': 'success',
   'date_processed': '2019-08-27T20:42:11.781176',
   'status_message': 'Matched',
   'api': ['https://api.gbif.org/v1/species/suggest?q=Clupea pallasi',
    'https://api.gbif.org/v1/occurrence/search?country=US&limit=0&facet=institutionCode&facet=year&facet=basisOfRecord&scientificName=Clupea pallasi']},
  'parameters': {'Scientific Name': 'Clupea pallasi'},
  'gbif_species': {'key': 4284072,
   'resolvable_identifier': 'http://api.gbif.org/v1/species/4284072',
   'biological_taxonomy': [{'rank': 'kingdom', 'name': 'Animalia'},
    {'rank': 'phylum', 'name': 'Chordata'},
    {'rank': 'class', 'name': 'Actinopterygii'},
    {'rank': 'order', 'name': 'Clupeiformes'},
    {'rank': 'family', 'name': 'Clupeidae'},
    {'rank': 'genus', 'name': 'Clupea'},
    {'rank': 'species', 'name': 'Clupea pallasii'}],
   'Scientific Name': 'Clupea pallasii',
   'name_with_source': 'Clupea pallasii Valenciennes, 1847',
   'rank': 'SPECIES',
   'TaxonomicSt