This notebook uses a set of functions and module within the bispy package to interact with the GBIF API to pull back information on WLCI species.

The GBIF records are retrieved and cached based on valid/accepted ITIS species names and the invalid/not accepted ITIS species names.

In [1]:
#Import needed packages
import requests
import json
import bispy
from IPython.display import display
from joblib import Parallel, delayed
import random

gbif = bispy.gbif.Gbif()
bis_utils = bispy.bis.Utils()

In [4]:
# Open up the cached updated ITIS list with valid/accepted species names from ITIS Exploration.ipynb
with open("cache/valid_itis.json", "r") as f:
    valid_itis = json.loads(f.read())

In [9]:
len(valid_itis)

116

In [7]:
#%%time
# Use joblib to run multiple requests for GBIF records in parallel via the valid/accepted ITIS scientific names
gbif_results = Parallel(n_jobs=8)(delayed(gbif.summarize_us_species)(name) for name in [r["scientific_name"] for r in valid_itis])
gbif_results

[{'processing_metadata': {'status': 'success',
   'date_processed': '2019-08-09T17:29:28.026329',
   'status_message': 'Matched',
   'api': ['https://api.gbif.org/v1/species/suggest?q=Festuca idahoensis',
    'https://api.gbif.org/v1/occurrence/search?country=US&limit=0&facet=institutionCode&facet=year&facet=basisOfRecord&scientificName=Festuca idahoensis']},
  'parameters': {'Scientific Name': 'Festuca idahoensis'},
  'gbif_species': {'key': 2704957,
   'resolvable_identifier': 'http://api.gbif.org/v1/species/2704957',
   'biological_taxonomy': [{'rank': 'kingdom', 'name': 'Plantae'},
    {'rank': 'phylum', 'name': 'Tracheophyta'},
    {'rank': 'class', 'name': 'Liliopsida'},
    {'rank': 'order', 'name': 'Poales'},
    {'rank': 'family', 'name': 'Poaceae'},
    {'rank': 'genus', 'name': 'Festuca'},
    {'rank': 'species', 'name': 'Festuca idahoensis'}],
   'Scientific Name': 'Festuca idahoensis',
   'name_with_source': 'Festuca idahoensis Elmer',
   'rank': 'SPECIES',
   'TaxonomicSt

In [12]:
# Filter to give just cases where GBIF species name matched with valid/accepted ITIS names
success_gbif = [i for i in gbif_results if i["processing_metadata"]["status"] == "success"]
len(success_gbif)

114

In [14]:
#Display records listed as failure for valid/accepted ITIS specie names
fail_gbif = [i for i in gbif_results if i["processing_metadata"]["status"] == "failure"]
fail_gbif

[{'processing_metadata': {'status': 'failure',
   'date_processed': '2019-08-09T17:29:28.026329',
   'status_message': 'Not Matched',
   'api': ['https://api.gbif.org/v1/species/suggest?q=Artemisia tridentata ssp. wyomingensis']},
  'parameters': {'Scientific Name': 'Artemisia tridentata ssp. wyomingensis'}},
 {'processing_metadata': {'status': 'failure',
   'date_processed': '2019-08-09T17:29:28.026329',
   'status_message': 'Not Matched',
   'api': ['https://api.gbif.org/v1/species/suggest?q=Oncorhynchus clarkii pleuriticus']},
  'parameters': {'Scientific Name': 'Oncorhynchus clarkii pleuriticus'}}]

In [15]:
# Cache the array of retrieved documents and return/display a random sample for verification
display(bis_utils.doc_cache("cache/gbif.json", success_gbif))

{'Doc Cache File': 'cache/gbif.json',
 'Number of Documents in Cache': 114,
 'Document Number 69': {'processing_metadata': {'status': 'success',
   'date_processed': '2019-08-09T17:29:28.026329',
   'status_message': 'Matched',
   'api': ['https://api.gbif.org/v1/species/suggest?q=Clupea pallasii',
    'https://api.gbif.org/v1/occurrence/search?country=US&limit=0&facet=institutionCode&facet=year&facet=basisOfRecord&scientificName=Clupea pallasii']},
  'parameters': {'Scientific Name': 'Clupea pallasii'},
  'gbif_species': {'key': 4284072,
   'resolvable_identifier': 'http://api.gbif.org/v1/species/4284072',
   'biological_taxonomy': [{'rank': 'kingdom', 'name': 'Animalia'},
    {'rank': 'phylum', 'name': 'Chordata'},
    {'rank': 'class', 'name': 'Actinopterygii'},
    {'rank': 'order', 'name': 'Clupeiformes'},
    {'rank': 'family', 'name': 'Clupeidae'},
    {'rank': 'genus', 'name': 'Clupea'},
    {'rank': 'species', 'name': 'Clupea pallasii'}],
   'Scientific Name': 'Clupea pallasii

In [16]:
# Open up the cached updated ITIS list with invalid/not accepted species names from ITIS Exploration.ipynb
with open("cache/invalid_itis.json", "r") as f:
    invalid_itis = json.loads(f.read())

In [17]:
# Use joblib to run multiple requests for GBIF documents in parallel via the invalid/not accepted ITIS scientific names
invalid_result = Parallel(n_jobs=8)(delayed(gbif.summarize_us_species)(name) for name in [r["scientific_name"] for r in invalid_itis]) 

In [20]:
#Display result to identify the GBIF record that matched the invalid/not accepted ITIS name
invalid_result

[{'processing_metadata': {'status': 'success',
   'date_processed': '2019-08-09T17:29:28.026329',
   'status_message': 'Matched',
   'api': ['https://api.gbif.org/v1/species/suggest?q=Amphispiza belli',
    'https://api.gbif.org/v1/occurrence/search?country=US&limit=0&facet=institutionCode&facet=year&facet=basisOfRecord&scientificName=Amphispiza belli']},
  'parameters': {'Scientific Name': 'Amphispiza belli'},
  'gbif_species': {'key': 2491751,
   'resolvable_identifier': 'http://api.gbif.org/v1/species/2491751',
   'biological_taxonomy': [{'rank': 'kingdom', 'name': 'Animalia'},
    {'rank': 'phylum', 'name': 'Chordata'},
    {'rank': 'class', 'name': 'Aves'},
    {'rank': 'order', 'name': 'Passeriformes'},
    {'rank': 'family', 'name': 'Emberizidae'},
    {'rank': 'genus', 'name': 'Artemisiospiza'},
    {'rank': 'species', 'name': 'Artemisiospiza belli'}],
   'Scientific Name': 'Amphispiza belli',
   'name_with_source': 'Amphispiza belli (Cassin, 1850)',
   'rank': 'SPECIES',
   'T

In [21]:
# Cache the retrieved document and return/display a sample for verification
display(bis_utils.doc_cache("cache/Results of Consultations with Invalid ITIS Species Names/invalid_gbif.json", invalid_result))

{'Doc Cache File': 'cache/Results of Consultations with Invalid ITIS Species Names/invalid_gbif.json',
 'Number of Documents in Cache': 6,
 'Document Number 2': {'processing_metadata': {'status': 'success',
   'date_processed': '2019-08-09T17:29:28.026329',
   'status_message': 'Matched',
   'api': ['https://api.gbif.org/v1/species/suggest?q=Thrichomys fosteri',
    'https://api.gbif.org/v1/occurrence/search?country=US&limit=0&facet=institutionCode&facet=year&facet=basisOfRecord&scientificName=Thrichomys fosteri']},
  'parameters': {'Scientific Name': 'Thrichomys fosteri'},
  'gbif_species': {'key': 6066819,
   'resolvable_identifier': 'http://api.gbif.org/v1/species/6066819',
   'biological_taxonomy': [{'rank': 'kingdom', 'name': 'Animalia'},
    {'rank': 'phylum', 'name': 'Chordata'},
    {'rank': 'class', 'name': 'Mammalia'},
    {'rank': 'order', 'name': 'Rodentia'},
    {'rank': 'family', 'name': 'Echimyidae'},
    {'rank': 'genus', 'name': 'Thrichomys'},
    {'rank': 'species', '