In [1]:
import json

taxonomy_lookup = []

In [2]:
def get_wlci_xdd_cache():
    '''
    description: Import WLCI xdd species cached data 
    '''
    with open("../sources/WLCI_Species_List_from_Literature.json", "r") as f:
        wlci_sp_mentions = json.loads(f.read())
    return wlci_sp_mentions

def get_wlci_xdd_info(name):
    '''
    description: Extract summary information for a provided species from the WLCI_Species_List_from_Literature.json file.
    This information summarizes mentions of scientific names from WLCI literature included in the xDD database.
    '''
    wlci_sp_mentions = get_wlci_xdd_cache()
    unique_wlci_xdd_articles = list(set([spp["xdd_id"] for spp in wlci_sp_mentions]))
    
    wlci_xdd_info = dict()
    name_mentions = [i for i in wlci_sp_mentions if i['Scientific Name']==name]
    count_hits = sum(int(mention['n_hits']) for mention in name_mentions)
    doi_list = list(set([mention['doi'] for mention in name_mentions]))
    wlci_xdd_info['scientific_name'] = name
    #Percent of wlci_xdd articles that mention the specie "name"
    wlci_xdd_info['wlci_xdd_per'] = len(doi_list)/len(unique_wlci_xdd_articles)*100
    wlci_xdd_info['wlci_xdd_hits'] = count_hits
    wlci_xdd_info['wlci_xdd_doi_list'] = doi_list
    return wlci_xdd_info
    


In [3]:
#Import ITIS Cache and grabs summary information for a species
def itis_summary(specie_name):
    '''
    description: Extract summary information for a provided species from the itis.json file.
    This information summarizes relevant ITIS information.
    '''
    with open('../cache/itis.json', 'r') as f:
        itis_cache = json.loads(f.read())

    record = [i for i in itis_cache if i['parameters']['Scientific Name']==specie_name][0]
    itis_data = dict()
    for itis_entry in record['data']:
        itis_related_tsn =[]
        if itis_entry['usage'] == 'valid' or itis_entry['usage'] == 'accepted':
            itis_data['itis_tsn'] = itis_entry['tsn']
            itis_data['itis_tsn_usage'] = itis_entry['usage']
            for level in itis_entry['biological_taxonomy']:
                l = level['rank']
                field = f'itis_{l}'
                bio_tax_value = level['name']
                itis_data[field]= bio_tax_value
            if 'commonnames' in itis_entry:
                itis_data['itis_common_names'] = list(set([nm['name'] for nm in itis_entry['commonnames'] if nm['language']=='English']))
        else:
            itis_related_tsn.append(itis_entry['tsn'])
        if len(itis_related_tsn)>0:
            itis_data['itis_related_tsn'] = itis_related_tsn
        if 'geographicDivision' in itis_entry:
            itis_data['itis_geographic_division']= list(set([i['geographic_value'] for i in itis_entry['geographicDivision']]))
        if 'jurisdiction' in itis_entry:
            itis_data['itis_native_to']=list(set([i['jurisdiction_value'] for i in itis_entry['jurisdiction'] if i['origin']=='Native']))

    else:
        itis_tsn = record['data'][0]['tsn']
        
    return itis_data

In [4]:
wlci_sp_mentions = get_wlci_xdd_cache()
sci_name_list = list(set([spp["Scientific Name"] for spp in wlci_sp_mentions]))

for name in sci_name_list:
    specie_document = dict()
    
    wlci_xdd_info = get_wlci_xdd_info(name)
    specie_document.update(wlci_xdd_info)
    
    itis_data = itis_summary(name) 
    specie_document.update(itis_data)
    
    taxonomy_lookup.append(specie_document)

In [5]:
specie_document

{'scientific_name': 'Lepidochelys olivacea',
 'wlci_xdd_per': 3.4482758620689653,
 'wlci_xdd_hits': 1,
 'wlci_xdd_doi_list': ['10.1002/jwmg.155'],
 'itis_tsn': '173840',
 'itis_tsn_usage': 'valid',
 'itis_Kingdom': 'Animalia',
 'itis_Subkingdom': 'Bilateria',
 'itis_Infrakingdom': 'Deuterostomia',
 'itis_Phylum': 'Chordata',
 'itis_Subphylum': 'Vertebrata',
 'itis_Infraphylum': 'Gnathostomata',
 'itis_Superclass': 'Tetrapoda',
 'itis_Class': 'Reptilia',
 'itis_Order': 'Testudines',
 'itis_Suborder': 'Cryptodira',
 'itis_Superfamily': 'Chelonioidea',
 'itis_Family': 'Cheloniidae',
 'itis_Subfamily': 'Carettinae',
 'itis_Genus': 'Lepidochelys',
 'itis_Species': 'Lepidochelys olivacea',
 'itis_common_names': ['Pacific Ridley Sea Turtle',
  'Olive Ridley Sea Turtle',
  'Pacific ridley',
  'Olive Ridley'],
 'itis_geographic_division': ['Southern Asia',
  'Western Atlantic Ocean',
  'Caribbean',
  'Africa',
  'East Pacific',
  'Australia',
  'Eastern Atlantic Ocean',
  'Indo-West Pacific',
 

In [6]:
with open('../cache/itis.json', 'r') as f:
        itis_cache = json.loads(f.read())
itis_cache[0]

{'processing_metadata': {'status': 'success',
  'date_processed': '2019-09-25T16:45:51.362570',
  'status_message': 'Exact Match',
  'details': [{'Exact Match': 'https://services.itis.gov/?wt=json&rows=10&q=nameWOInd:Otis\\%20tarda'}]},
 'parameters': {'Scientific Name': 'Otis tarda'},
 'data': [{'tsn': '176419',
   'nameWInd': 'Otis tarda',
   'nameWOInd': 'Otis tarda',
   'unit1': 'Otis',
   'unit2': 'tarda',
   'usage': 'valid',
   'credibilityRating': 'TWG standards met',
   'taxonAuthor': 'Linnaeus, 1758',
   'kingdom': 'Animalia',
   'parentTSN': '176418',
   'rankID': '220',
   'rank': 'Species',
   '_version_': 1643585546269753353,
   'date_created': '1996-06-13 14:51:08',
   'date_modified': '2006-11-28 00:00:00',
   'expert': [{'reference_type': 'EXP',
     'expert_id': '11',
     'expert_name': 'Alan P. Peterson, M.D.',
     'expert_comment': 'PO Box 1999 Walla Walla, Washington 99362-0999',
     'create_date': '2001-09-28 00:00:00',
     'update_date': ''}],
   'publication