In [2]:
import pandas as pd
from pathlib import Path
import requests
import json
import numpy as np

from process_inat_data import format_inat_data
from create_normalized_taxa import (
     create_taxa_df,   all_ranks, get_row_ranks,
    main_ranks, create_taxon, add_count_column, create_taxa_la_df
)



In [3]:
inat_taxa =  Path('outputs', 'combine_taxa_list_inat_data.csv')
de_taxa =  Path('outputs', 'de_taxa_list.csv')
interactions_path =  Path('outputs', 'interactions.csv')


In [4]:
def log_df(df, nrows=5):
    print(df.shape)
    return df.head(nrows)

In [5]:
raw_data_paths = Path('../data').rglob('observations*.csv')
data_paths = [str(path) for path in raw_data_paths]
print(data_paths)

['../data/gosea/observations-209247.csv', '../data/clarkstown-high-school-north/observations-200303.csv', '../data/cedar-creek-reserve/observations-199064.csv', '../data/los-angeles-bioblitz/observations-190446.csv', '../data/ciencia-ciudadana-peru-bats/observations-199065.csv', '../data/ciencia-ciudadana-peru-bees/observations-199066.csv']


# Add iNat data to LA indicator species

In [34]:
file = '../data/los-angeles-bioblitz/indicator_species.tsv'

indicator_cols = [ 'type', 'taxon_group', 'taxon_id' ]
indicator_df = pd.read_csv(file, sep='\t',  usecols=indicator_cols, dtype=str)
log_df(indicator_df)

(38, 3)


Unnamed: 0,taxon_group,type,taxon_id
0,Birds,Park/Natural Area Species,1409
1,Birds,Park/Natural Area Species,1986
2,Birds,Stream/Riparian Species,4956
3,Birds,Neighborhood Species,5212
4,Birds,Stream/Riparian Species,7109


In [35]:
for index, row in indicator_df.iterrows():
    print(index, end=' ')

    response = requests.get(f'https://api.inaturalist.org/v1/taxa/{row["taxon_id"]}')
    if response.status_code == 200:
        json_data = response.json()['results'][0]
        result = format_inat_data(json_data)
        
        for col in result:
            indicator_df.at[index, col] = result[col]
            
        indicator_df.at[index, 'common_name'] = json_data['preferred_common_name']
        indicator_df.at[index, 'iconic_taxon_name'] = json_data['iconic_taxon_name']
        indicator_df.at[index, 'scientific_name'] = json_data['name']
        indicator_df.at[index, 'image_url'] = json_data['default_photo']['medium_url']
        indicator_df.at[index, 'user_login'] = json_data['default_photo']['attribution']
        indicator_df.at[index, 'parent_id'] = json_data['parent_id']

    

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 

In [36]:
indicator_df['parent_id'] = indicator_df['parent_id'].astype(int)


In [37]:
log_df(indicator_df)

(38, 211)


Unnamed: 0,taxon_group,type,taxon_id,rank,ancestor_ids,species_id,species_wikipedia_url,species_iconic_taxon_name,species_common_name,species_parent_id,...,subspecies_id,subspecies_wikipedia_url,subspecies_iconic_taxon_name,subspecies_common_name,subspecies_parent_id,subspecies_ancestor_ids,subspecies,subspecies_photo_url,subspecies_photo_attribution,subspecies_photo_license_code
0,Birds,Park/Natural Area Species,1409,species,48460|1|2|355675|3|573|1278|1405|1409,1409,http://en.wikipedia.org/wiki/California_quail,Aves,California Quail,1405,...,,,,,,,,,,
1,Birds,Park/Natural Area Species,1986,species,48460|1|2|355675|3|1623|1627|1985|1986,1986,http://en.wikipedia.org/wiki/Greater_roadrunner,Aves,Greater Roadrunner,1985,...,,,,,,,,,,
2,Birds,Stream/Riparian Species,4956,species,48460|1|2|355675|3|67566|4929|597395|4950|4956,4956,http://en.wikipedia.org/wiki/Great_blue_heron,Aves,Great Blue Heron,4950,...,,,,,,,,,,
3,Birds,Neighborhood Species,5212,species,48460|1|2|355675|3|71261|5067|5179|5212,5212,https://en.wikipedia.org/wiki/Red-tailed_hawk,Aves,Red-tailed Hawk,5179,...,,,,,,,,,,
4,Birds,Stream/Riparian Species,7109,species,48460|1|2|355675|3|6888|6912|7108|7109,7109,https://en.wikipedia.org/wiki/Hooded_merganser,Aves,Hooded Merganser,7108,...,,,,,,,,,,


In [38]:
file = '../data/los-angeles-bioblitz/indicator_species_with_inat.csv'
indicator_df.to_csv(file, index=False)

# Add iNat data to GO-SEA field guide


In [42]:
file = '../data/go-sea/field_guide_species.csv'

taxa_df = pd.read_csv(file, dtype=str)
log_df(taxa_df)

(14, 1)


Unnamed: 0,name
0,Porpita
1,Velella
2,Physalia
3,Actinecta
4,Dosima fascicularis


In [47]:
for index, row in taxa_df.iterrows():
    if row["name"] == 'Glaucus':
        taxa_df.at[index, 'taxon_id'] = 50499
    elif row["name"] == 'Janthina':  
        taxa_df.at[index, 'taxon_id'] = 121656
    else: 
        url = f'https://api.inaturalist.org/v1/taxa?q={row["name"]}'
        response = requests.get(url)
        if response.status_code == 200:
            json_data = response.json()['results'][0]        
            taxa_df.at[index, 'taxon_id'] = json_data['id']


        
taxa_df['taxon_id'] = taxa_df['taxon_id'].astype(int)
log_df(taxa_df)

(14, 2)


Unnamed: 0,name,taxon_id
0,Porpita,59679
1,Velella,59699
2,Physalia,117305
3,Actinecta,1210955
4,Dosima fascicularis,462187


In [51]:
for index, row in taxa_df.iterrows():
    print(index, end=' ')

    url = f'https://api.inaturalist.org/v1/taxa/{row["taxon_id"]}'

    response = requests.get(url)
    if response.status_code == 200:
        json_data = response.json()['results'][0]
        result = format_inat_data(json_data)
        
        if 'preferred_common_name' in json_data:
            taxa_df.at[index, 'common_name'] = json_data['preferred_common_name'] 
        taxa_df.at[index, 'scientific_name'] = json_data['name']
        taxa_df.at[index, 'image_url'] = json_data['default_photo']['medium_url']
        taxa_df.at[index, 'user_login'] = json_data['default_photo']['attribution']
        taxa_df.at[index, 'parent_id'] = json_data['parent_id']
        taxa_df.at[index, 'iconic_taxon_name'] = json_data['iconic_taxon_name']

        for col in result:
            taxa_df.at[index, col] = result[col]
            

log_df(taxa_df)

0 1 2 3 4 5 6 7 8 9 10 11 12 13 (14, 180)


Unnamed: 0,name,taxon_id,scientific_name,image_url,user_login,parent_id,iconic_taxon_name,rank,ancestor_ids,genus_id,...,subfamily_id,subfamily_wikipedia_url,subfamily_iconic_taxon_name,subfamily_common_name,subfamily_parent_id,subfamily_ancestor_ids,subfamily,subfamily_photo_url,subfamily_photo_attribution,subfamily_photo_license_code
0,Porpita,59679,Porpita,https://static.inaturalist.org/photos/89090274...,"(c) Tsz-Yan NG, all rights reserved",59692.0,Animalia,genus,48460|1|47534|48921|551473|48922|813988|59692|...,59679,...,,,,,,,,,,
1,Velella,59699,Velella,https://inaturalist-open-data.s3.amazonaws.com...,"(с) Abhishek Jamalabad, некоторые права защище...",59692.0,Animalia,genus,48460|1|47534|48921|551473|48922|813988|59692|...,59699,...,,,,,,,,,,
2,Physalia,117305,Physalia,https://static.inaturalist.org/photos/14273574...,"(c) Flight69, tutti i diritti riservati",117304.0,Animalia,genus,48460|1|47534|48921|551473|152823|777050|11730...,117305,...,,,,,,,,,,
3,Actinecta,1210955,Actinecta,https://inaturalist-open-data.s3.amazonaws.com...,"(c) kmiller34, certains droits réservés (CC BY...",814008.0,Animalia,genus,48460|1|47534|47533|202756|47797|813978|813994...,1210955,...,,,,,,,,,,
4,Dosima fascicularis,462187,Dosima fascicularis,https://static.inaturalist.org/photos/5935878/...,"(c) Donna Eriwata, all rights reserved",462188.0,Animalia,species,48460|1|47120|85493|473790|1091452|144117|2102...,462188,...,,,,,,,,,,


In [52]:
taxa_df.to_csv(file, index=False)