In [37]:
import pandas as pd
from pathlib import Path
import requests
import json
import numpy as np

from process_files import process_inat_data, add_concatenated_columns
from create_normalized_taxa import append_df, create_taxa_df, add_row



In [38]:
all_taxa =  Path('outputs', 'combine_taxa_list_inat_data.csv')

In [3]:
def log_df(df, nrows=5):
    print(df.shape)
    return df.head(nrows)

In [4]:
raw_data_paths = Path('../data').rglob('observations*.csv')
data_paths = [str(path) for path in raw_data_paths]
print(data_paths)

['../data/clarkstown-high-school-north/observations-200303.csv', '../data/cedar-creek-reserve/observations-199064.csv', '../data/los-angeles-bioblitz/observations-190446.csv', '../data/ciencia-ciudadana-peru-bats/observations-199065.csv', '../data/ciencia-ciudadana-peru-bees/observations-199066.csv']


# Add iNat data to LA indicator species

In [181]:
file = '../data/los-angeles-bioblitz/indicator_species.tsv'

indicator_cols = [ 'type', 'taxon_group', 'taxon_id' ]
indicator_df = pd.read_csv(file, sep='\t',  usecols=indicator_cols, dtype=str)
log_df(indicator_df)

(38, 3)


Unnamed: 0,taxon_group,type,taxon_id
0,Birds,Park/Natural Area Species,1409
1,Birds,Park/Natural Area Species,1986
2,Birds,Stream/Riparian Species,4956
3,Birds,Neighborhood Species,5212
4,Birds,Stream/Riparian Species,7109


In [182]:
for index, row in indicator_df.iterrows():
    print(index, end=' ')

    response = requests.get(f'https://api.inaturalist.org/v1/taxa/{row["taxon_id"]}')
    if response.status_code == 200:
        json_data = response.json()['results'][0]
        result = process_inat_data(json_data)
        
        for col in result:
            indicator_df.at[index, col] = result[col]
            
        indicator_df.at[index, 'common_name'] = json_data['preferred_common_name']
        indicator_df.at[index, 'iconic_taxon_name'] = json_data['iconic_taxon_name']
        indicator_df.at[index, 'scientific_name'] = json_data['name']
        indicator_df.at[index, 'image_url'] = json_data['default_photo']['medium_url']
        indicator_df.at[index, 'user_login'] = json_data['default_photo']['attribution']

    

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 

In [183]:
log_df(indicator_df)

(38, 74)


Unnamed: 0,taxon_group,type,taxon_id,rank,species_id,species_wikipedia_url,species_iconic_taxon_name,species_common_name,species,species_photo_url,...,image_url,user_login,subspecies_id,subspecies_wikipedia_url,subspecies_iconic_taxon_name,subspecies_common_name,subspecies,subspecies_photo_url,subspecies_photo_attribution,subspecies_photo_license_code
0,Birds,Park/Natural Area Species,1409,species,1409,http://en.wikipedia.org/wiki/California_quail,Aves,California Quail,Callipepla californica,https://inaturalist-open-data.s3.amazonaws.com...,...,https://inaturalist-open-data.s3.amazonaws.com...,"(c) Anne Parsons, some rights reserved (CC BY-NC)",,,,,,,,
1,Birds,Park/Natural Area Species,1986,species,1986,http://en.wikipedia.org/wiki/Greater_roadrunner,Aves,Greater Roadrunner,Geococcyx californianus,https://static.inaturalist.org/photos/30952802...,...,https://static.inaturalist.org/photos/30952802...,"(c) Kim Moore, all rights reserved",,,,,,,,
2,Birds,Stream/Riparian Species,4956,species,4956,http://en.wikipedia.org/wiki/Great_blue_heron,Aves,Great Blue Heron,Ardea herodias,https://inaturalist-open-data.s3.amazonaws.com...,...,https://inaturalist-open-data.s3.amazonaws.com...,"(c) Dan Roach, some rights reserved (CC BY-NC)",,,,,,,,
3,Birds,Neighborhood Species,5212,species,5212,https://en.wikipedia.org/wiki/Red-tailed_hawk,Aves,Red-tailed Hawk,Buteo jamaicensis,https://inaturalist-open-data.s3.amazonaws.com...,...,https://inaturalist-open-data.s3.amazonaws.com...,"(c) Craig K. Hunt, some rights reserved (CC BY...",,,,,,,,
4,Birds,Stream/Riparian Species,7109,species,7109,https://en.wikipedia.org/wiki/Hooded_merganser,Aves,Hooded Merganser,Lophodytes cucullatus,https://inaturalist-open-data.s3.amazonaws.com...,...,https://inaturalist-open-data.s3.amazonaws.com...,"(c) Ashley M Bradford, some rights reserved (C...",,,,,,,,


In [184]:
add_concatenated_columns(indicator_df)
indicator_df = indicator_df.fillna('')
log_df(indicator_df)

(38, 77)


Unnamed: 0,taxon_group,type,taxon_id,rank,species_id,species_wikipedia_url,species_iconic_taxon_name,species_common_name,species,species_photo_url,...,subspecies_wikipedia_url,subspecies_iconic_taxon_name,subspecies_common_name,subspecies,subspecies_photo_url,subspecies_photo_attribution,subspecies_photo_license_code,taxon_ids,scientific_names,common_names
0,Birds,Park/Natural Area Species,1409,species,1409,http://en.wikipedia.org/wiki/California_quail,Aves,California Quail,Callipepla californica,https://inaturalist-open-data.s3.amazonaws.com...,...,,,,,,,,1|2|3|573|1278|1405|1409,Animalia|Chordata|Aves|Galliformes|Odontophori...,Animals|Chordates|Birds|Landfowl|New World Qua...
1,Birds,Park/Natural Area Species,1986,species,1986,http://en.wikipedia.org/wiki/Greater_roadrunner,Aves,Greater Roadrunner,Geococcyx californianus,https://static.inaturalist.org/photos/30952802...,...,,,,,,,,1|2|3|1623|1627|1985|1986,Animalia|Chordata|Aves|Cuculiformes|Cuculidae|...,Animals|Chordates|Birds|Cuckoos|Cuckoos|Roadru...
2,Birds,Stream/Riparian Species,4956,species,4956,http://en.wikipedia.org/wiki/Great_blue_heron,Aves,Great Blue Heron,Ardea herodias,https://inaturalist-open-data.s3.amazonaws.com...,...,,,,,,,,1|2|3|67566|4929|4950|4956,Animalia|Chordata|Aves|Pelecaniformes|Ardeidae...,"Animals|Chordates|Birds|Pelicans, Herons, Ibis..."
3,Birds,Neighborhood Species,5212,species,5212,https://en.wikipedia.org/wiki/Red-tailed_hawk,Aves,Red-tailed Hawk,Buteo jamaicensis,https://inaturalist-open-data.s3.amazonaws.com...,...,,,,,,,,1|2|3|71261|5067|5179|5212,Animalia|Chordata|Aves|Accipitriformes|Accipit...,"Animals|Chordates|Birds|Hawks, Eagles, Kites, ..."
4,Birds,Stream/Riparian Species,7109,species,7109,https://en.wikipedia.org/wiki/Hooded_merganser,Aves,Hooded Merganser,Lophodytes cucullatus,https://inaturalist-open-data.s3.amazonaws.com...,...,,,,,,,,1|2|3|6888|6912|7108|7109,Animalia|Chordata|Aves|Anseriformes|Anatidae|L...,"Animals|Chordates|Birds|Waterfowl|Ducks, Geese..."


In [185]:
file = '../data/los-angeles-bioblitz/indicator_species_with_inat.csv'
indicator_df.to_csv(file, index=False)

# create observation json

don't covert dtype=str since there are columns (latitude, longitude, taxon_ids) that need to be numbers

In [61]:
inat_ids_cols =['taxon_id', 'taxon_ids']
inat_ids_df = pd.read_csv(all_taxa,  usecols=inat_ids_cols)

# turn floats into intergers
inat_ids_df['taxon_id'] = inat_ids_df['taxon_id'].astype(int)

log_df(inat_ids_df)

(3851, 2)


Unnamed: 0,taxon_id,taxon_ids
0,143452,1|47120|47158|47157|47213|143454|143452
1,47727,47126|211194|47124|47729|58321|47727|
2,53178,47126|211194|47124|48151|50638|50636|53178
3,60307,47126|211194|47163|47162|47434|52809|60307
4,47124,47126|211194|47124||||


In [62]:
all_cols = [
    'time_observed_at',
    'image_url',  
    'latitude', 
    'longitude',
    'user_login', 
    'scientific_name', 
    'common_name',
    'taxon_id',
    'id',
    'geoprivacy',
    'taxon_geoprivacy',
    'coordinates_obscured'
]

In [64]:
# combine multiple observations csvs for a project into one json         

for dir_path in Path().glob('../data/**/'):
    if dir_path.name != 'data':
        dfs = []
        for file_path in  Path().glob('../data/' + dir_path.name + '/observations*.csv'  ):
            print(file_path.name)
            df = pd.read_csv(file_path,  usecols=all_cols)
            df['taxon_id'].fillna(0, inplace=True)
            df['taxon_id'] = df['taxon_id'].astype(int)
    
            df = df.merge(inat_ids_df, on="taxon_id", how="left")
            
            # Safari won't parse dates in the format given by iNaturalist          
            df['time_observed_at'] = df['time_observed_at'].replace('([\d-]+) ([\d:]+) (UTC)', r'\1T\2Z', regex = True) 
            df['time_observed_at'] = df['time_observed_at'].replace('([\d-]+) ([\d:]+) -0700', r'\1T\2Z', regex = True) 

            dfs.append(df)
            
        combine_df = pd.concat(dfs)

         
        new_path = Path('..','app', 'src', 'lib', 'data', dir_path.name)
        new_path.mkdir(parents=True, exist_ok=True)
        combine_df.to_json(new_path/ 'observations.json', orient = "records")


observations-200303.csv
observations-199064.csv
observations-190446.csv
observations-199065.csv
observations-199066.csv


# create taxa json and csv


In [281]:
columns = []
ranks = ['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species']
[columns.append(rank + '_id') for rank in ranks]
[columns.append(rank + '_common_name') for rank in ranks]
columns = ['taxon_id', 'scientific_name', 'common_name',
           'rank', 
           'taxon_ids', 'scientific_names', 'common_names'] + ranks + columns 

inat_names_df = pd.read_csv(all_taxa,  usecols=columns)
log_df(inat_names_df)

(3851, 28)


Unnamed: 0,scientific_name,common_name,taxon_id,kingdom,phylum,class,order,family,genus,species,...,class_common_name,order_id,order_common_name,family_id,family_common_name,genus_id,genus_common_name,taxon_ids,scientific_names,common_names
0,Deidamia inscriptum,Lettered Sphinx,143452,Animalia,Arthropoda,Insecta,Lepidoptera,Sphingidae,Deidamia,Deidamia inscriptum,...,Insects,47157.0,Butterflies and Moths,47213.0,Sphinx Moths,143454.0,,1|47120|47158|47157|47213|143454|143452,Animalia|Arthropoda|Insecta|Lepidoptera|Sphing...,Animals|Arthropods|Insects|Butterflies and Mot...
1,Acer,maples,47727,Plantae,Tracheophyta,Magnoliopsida,Sapindales,Sapindaceae,Acer,,...,dicots,47729.0,"soapberries, cashews, mahoganies, and allies",58321.0,soapberry family,47727.0,maples,47126|211194|47124|47729|58321|47727|,Plantae|Tracheophyta|Magnoliopsida|Sapindales|...,"plants|vascular plants|dicots|soapberries, cas..."
2,Plantago lanceolata,ribwort plantain,53178,Plantae,Tracheophyta,Magnoliopsida,Lamiales,Plantaginaceae,Plantago,Plantago lanceolata,...,dicots,48151.0,"mints, plantains, olives, and allies",50638.0,plantain family,50636.0,plantain,47126|211194|47124|48151|50638|50636|53178,Plantae|Tracheophyta|Magnoliopsida|Lamiales|Pl...,"plants|vascular plants|dicots|mints, plantains..."
3,Poa pratensis,Kentucky bluegrass,60307,Plantae,Tracheophyta,Liliopsida,Poales,Poaceae,Poa,Poa pratensis,...,monocots,47162.0,"grasses, sedges, cattails, and allies",47434.0,grasses,52809.0,Meadow-grasses,47126|211194|47163|47162|47434|52809|60307,Plantae|Tracheophyta|Liliopsida|Poales|Poaceae...,"plants|vascular plants|monocots|grasses, sedge..."
4,Magnoliopsida,dicots,47124,Plantae,Tracheophyta,Magnoliopsida,,,,,...,dicots,,,,,,,47126|211194|47124||||,Plantae|Tracheophyta|Magnoliopsida||||,plants|vascular plants|dicots||||


In [282]:
# combine multiple observations csvs for a project into one json         

for dir_path in Path().glob('../data/**/'):
    if dir_path.name != 'data':
        dfs = []
        for file_path in  Path().glob('../data/' +  dir_path.name + '/observations-*.csv'  ):
            cols = ['taxon_id', 'user_login', 'image_url', 'id']
            df = pd.read_csv(file_path, usecols=cols)
            df = df.dropna(subset=['taxon_id'])
            df = df.merge(inat_names_df, on="taxon_id", how="left")
                                    
            dfs.append(df)


        combine_df = pd.concat(dfs)        
        adjust_df = append_df(combine_df)
        taxa_df = create_taxa_df(adjust_df)
        taxa_df['taxon_id'] = taxa_df['taxon_id'].astype(int)

        new_path = Path('..','app', 'src', 'lib', 'data') /dir_path.name
        new_path.mkdir(parents=True, exist_ok=True)
        print(new_path)
        taxa_df.to_json(new_path/ "taxa.json", orient = "records")


../app/src/lib/data/clarkstown-high-school-north
../app/src/lib/data/cedar-creek-reserve
../app/src/lib/data/los-angeles-bioblitz
../app/src/lib/data/ciencia-ciudadana-peru-bats
../app/src/lib/data/ciencia-ciudadana-peru-bees


# create taxa json for LA indicator species

In [310]:
path = '../app/src/lib/data/los-angeles-bioblitz/taxa.json'
df = pd.read_json(path )
# df['taxon_id'] = df['taxon_id'].astype(int)
df['is_species'] = df['is_species'].astype('boolean')

df['taxon_group'] = np.nan
df['type'] = np.nan

log_df(df, 6)


(4161, 15)


Unnamed: 0,id,taxon_id,common_name,scientific_name,user_login,image_url,rank,taxon_ids,common_names,scientific_names,is_species,observations_count,taxa_count,taxon_group,type
0,97386721,36204,Western Fence Lizard,Sceloporus occidentalis,loganc516,https://inaturalist-open-data.s3.amazonaws.com...,species,1|2|26036|26172|36074|36141|36204,Animals|Chordates|Reptiles|Snakes and Lizards|...,Animalia|Chordata|Reptilia|Squamata|Phrynosoma...,True,342,342,,
1,94055566,199840,House Finch,Haemorhous mexicanus,marty_and_the_mamas,https://inaturalist-open-data.s3.amazonaws.com...,species,1|2|3|7251|9079|199910|199840,Animals|Chordates|Birds|Perching Birds|Finches...,Animalia|Chordata|Aves|Passeriformes|Fringilli...,True,146,146,,
2,94889284,48662,Monarch,Danaus plexippus,belis1,https://static.inaturalist.org/photos/15746072...,species,1|47120|47158|47157|47922|48663|48662,Animals|Arthropods|Insects|Butterflies and Mot...,Animalia|Arthropoda|Insecta|Lepidoptera|Nympha...,True,144,144,,
3,91586172,5212,Red-tailed Hawk,Buteo jamaicensis,ki6h,https://static.inaturalist.org/photos/15151968...,species,1|2|3|71261|5067|5179|5212,"Animals|Chordates|Birds|Hawks, Eagles, Kites, ...",Animalia|Chordata|Aves|Accipitriformes|Accipit...,True,140,142,,
4,92725078,3454,Mourning Dove,Zenaida macroura,jennbastian,https://static.inaturalist.org/photos/15357316...,species,1|2|3|2708|2715|3438|3454,Animals|Chordates|Birds|Pigeons and Doves|Pige...,Animalia|Chordata|Aves|Columbiformes|Columbida...,True,126,126,,
5,97306231,47126,plants,Plantae,zacpeterson,https://inaturalist-open-data.s3.amazonaws.com...,kingdom,47126,plants,Plantae,False,123,4567,,


In [311]:
df['is_species'].unique()


<BooleanArray>
[True, False, <NA>]
Length: 3, dtype: boolean

In [312]:
df['is_species'].dtype

BooleanDtype

In [313]:
file = '../data/los-angeles-bioblitz/indicator_species_with_inat.csv'
indicator_df = pd.read_csv(file  )
indicator_df['taxon_id'] = indicator_df['taxon_id'].astype(int)

log_df(indicator_df)


(38, 77)


Unnamed: 0,taxon_group,type,taxon_id,rank,species_id,species_wikipedia_url,species_iconic_taxon_name,species_common_name,species,species_photo_url,...,subspecies_wikipedia_url,subspecies_iconic_taxon_name,subspecies_common_name,subspecies,subspecies_photo_url,subspecies_photo_attribution,subspecies_photo_license_code,taxon_ids,scientific_names,common_names
0,Birds,Park/Natural Area Species,1409,species,1409.0,http://en.wikipedia.org/wiki/California_quail,Aves,California Quail,Callipepla californica,https://inaturalist-open-data.s3.amazonaws.com...,...,,,,,,,,1|2|3|573|1278|1405|1409,Animalia|Chordata|Aves|Galliformes|Odontophori...,Animals|Chordates|Birds|Landfowl|New World Qua...
1,Birds,Park/Natural Area Species,1986,species,1986.0,http://en.wikipedia.org/wiki/Greater_roadrunner,Aves,Greater Roadrunner,Geococcyx californianus,https://static.inaturalist.org/photos/30952802...,...,,,,,,,,1|2|3|1623|1627|1985|1986,Animalia|Chordata|Aves|Cuculiformes|Cuculidae|...,Animals|Chordates|Birds|Cuckoos|Cuckoos|Roadru...
2,Birds,Stream/Riparian Species,4956,species,4956.0,http://en.wikipedia.org/wiki/Great_blue_heron,Aves,Great Blue Heron,Ardea herodias,https://inaturalist-open-data.s3.amazonaws.com...,...,,,,,,,,1|2|3|67566|4929|4950|4956,Animalia|Chordata|Aves|Pelecaniformes|Ardeidae...,"Animals|Chordates|Birds|Pelicans, Herons, Ibis..."
3,Birds,Neighborhood Species,5212,species,5212.0,https://en.wikipedia.org/wiki/Red-tailed_hawk,Aves,Red-tailed Hawk,Buteo jamaicensis,https://inaturalist-open-data.s3.amazonaws.com...,...,,,,,,,,1|2|3|71261|5067|5179|5212,Animalia|Chordata|Aves|Accipitriformes|Accipit...,"Animals|Chordates|Birds|Hawks, Eagles, Kites, ..."
4,Birds,Stream/Riparian Species,7109,species,7109.0,https://en.wikipedia.org/wiki/Hooded_merganser,Aves,Hooded Merganser,Lophodytes cucullatus,https://inaturalist-open-data.s3.amazonaws.com...,...,,,,,,,,1|2|3|6888|6912|7108|7109,Animalia|Chordata|Aves|Anseriformes|Anatidae|L...,"Animals|Chordates|Birds|Waterfowl|Ducks, Geese..."


In [314]:
indicator_df['rank'].unique()

array(['species', 'family', 'genus', 'subspecies'], dtype=object)

add taxon_group and type to taxa.json data

In [315]:
for index, row in indicator_df.iterrows():
    df.loc[df['taxon_id'] == row['taxon_id'], 'taxon_group'] = row['taxon_group']
    df.loc[df['taxon_id'] == row['taxon_id'], 'type'] = row['type']
                                                                  
log_df(df)
     

(4161, 15)


Unnamed: 0,id,taxon_id,common_name,scientific_name,user_login,image_url,rank,taxon_ids,common_names,scientific_names,is_species,observations_count,taxa_count,taxon_group,type
0,97386721,36204,Western Fence Lizard,Sceloporus occidentalis,loganc516,https://inaturalist-open-data.s3.amazonaws.com...,species,1|2|26036|26172|36074|36141|36204,Animals|Chordates|Reptiles|Snakes and Lizards|...,Animalia|Chordata|Reptilia|Squamata|Phrynosoma...,True,342,342,,
1,94055566,199840,House Finch,Haemorhous mexicanus,marty_and_the_mamas,https://inaturalist-open-data.s3.amazonaws.com...,species,1|2|3|7251|9079|199910|199840,Animals|Chordates|Birds|Perching Birds|Finches...,Animalia|Chordata|Aves|Passeriformes|Fringilli...,True,146,146,,
2,94889284,48662,Monarch,Danaus plexippus,belis1,https://static.inaturalist.org/photos/15746072...,species,1|47120|47158|47157|47922|48663|48662,Animals|Arthropods|Insects|Butterflies and Mot...,Animalia|Arthropoda|Insecta|Lepidoptera|Nympha...,True,144,144,Invertebrates,Neighborhood Species
3,91586172,5212,Red-tailed Hawk,Buteo jamaicensis,ki6h,https://static.inaturalist.org/photos/15151968...,species,1|2|3|71261|5067|5179|5212,"Animals|Chordates|Birds|Hawks, Eagles, Kites, ...",Animalia|Chordata|Aves|Accipitriformes|Accipit...,True,140,142,Birds,Neighborhood Species
4,92725078,3454,Mourning Dove,Zenaida macroura,jennbastian,https://static.inaturalist.org/photos/15357316...,species,1|2|3|2708|2715|3438|3454,Animals|Chordates|Birds|Pigeons and Doves|Pige...,Animalia|Chordata|Aves|Columbiformes|Columbida...,True,126,126,,


get all indicator species that are not in taxa.json 

In [316]:
no_observations_df = indicator_df[indicator_df['taxon_id'].isin(df['taxon_id'].unique()) == False].copy()
no_observations_df.shape

(10, 77)

In [317]:
no_observations_df['id'] = 0


In [318]:
def create_la_taxa_df(df, ids):
    # create a new df with rows for each taxa and eac higher taxa
    new_rows = []
    
    for index, row in df.iterrows():
        for index, rank in enumerate(['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species']):
            if pd.isna(row[rank]):
                continue
            
            # skip if taxon_id is already in taxa.json
            if int(row[rank + '_id']) in ids:
                continue
                
            temp = add_row(row, rank, index)
            
            if row['rank'] == rank:
                temp['taxon_group'] = row['taxon_group']
                temp['type'] = row['type']
            temp['is_species'] = False
            temp['observations_count'] = 0
            temp['taxa_count'] = 0            

            new_rows.append(temp)
            
    new_df =  pd.DataFrame(new_rows) 
    new_df = new_df.drop_duplicates(subset=['taxon_id'])
    
    return new_df


In [319]:
ids = list(df['taxon_id'].unique())
all_taxa_df = create_la_taxa_df(no_observations_df, ids)
all_taxa_df['taxon_id'] = all_taxa_df['taxon_id'].astype(int)

log_df(all_taxa_df)

(19, 15)


Unnamed: 0,id,taxon_id,common_name,scientific_name,user_login,image_url,rank,taxon_ids,common_names,scientific_names,is_species,observations_count,taxa_count,taxon_group,type
0,0,1985,Roadrunners,Geococcyx,"(c) Kim Moore, all rights reserved",https://static.inaturalist.org/photos/30952802...,genus,1|2|3|1623|1627|1985,Animals|Chordates|Birds|Cuckoos|Cuckoos|Roadru...,Animalia|Chordata|Aves|Cuculiformes|Cuculidae|...,False,0,0,,
1,0,1986,Greater Roadrunner,Geococcyx californianus,"(c) Kim Moore, all rights reserved",https://static.inaturalist.org/photos/30952802...,species,1|2|3|1623|1627|1985|1986,Animals|Chordates|Birds|Cuckoos|Cuckoos|Roadru...,Animalia|Chordata|Aves|Cuculiformes|Cuculidae|...,False,0,0,Birds,Park/Natural Area Species
2,0,7108,,Lophodytes,"(c) Ashley M Bradford, some rights reserved (C...",https://inaturalist-open-data.s3.amazonaws.com...,genus,1|2|3|6888|6912|7108,"Animals|Chordates|Birds|Waterfowl|Ducks, Geese...",Animalia|Chordata|Aves|Anseriformes|Anatidae|L...,False,0,0,,
3,0,7109,Hooded Merganser,Lophodytes cucullatus,"(c) Ashley M Bradford, some rights reserved (C...",https://inaturalist-open-data.s3.amazonaws.com...,species,1|2|3|6888|6912|7108|7109,"Animals|Chordates|Birds|Waterfowl|Ducks, Geese...",Animalia|Chordata|Aves|Anseriformes|Anatidae|L...,False,0,0,Birds,Stream/Riparian Species
4,0,9526,Eastern and Western Meadowlarks,Sturnella,"(c) greglasley, algunos derechos reservados (C...",https://inaturalist-open-data.s3.amazonaws.com...,genus,1|2|3|7251|11989|9526,Animals|Chordates|Birds|Perching Birds|New Wor...,Animalia|Chordata|Aves|Passeriformes|Icteridae...,False,0,0,,


In [320]:

# reorder columns to match taxa.json
all_taxa_df = all_taxa_df[df.columns]
log_df(all_taxa_df)

(19, 15)


Unnamed: 0,id,taxon_id,common_name,scientific_name,user_login,image_url,rank,taxon_ids,common_names,scientific_names,is_species,observations_count,taxa_count,taxon_group,type
0,0,1985,Roadrunners,Geococcyx,"(c) Kim Moore, all rights reserved",https://static.inaturalist.org/photos/30952802...,genus,1|2|3|1623|1627|1985,Animals|Chordates|Birds|Cuckoos|Cuckoos|Roadru...,Animalia|Chordata|Aves|Cuculiformes|Cuculidae|...,False,0,0,,
1,0,1986,Greater Roadrunner,Geococcyx californianus,"(c) Kim Moore, all rights reserved",https://static.inaturalist.org/photos/30952802...,species,1|2|3|1623|1627|1985|1986,Animals|Chordates|Birds|Cuckoos|Cuckoos|Roadru...,Animalia|Chordata|Aves|Cuculiformes|Cuculidae|...,False,0,0,Birds,Park/Natural Area Species
2,0,7108,,Lophodytes,"(c) Ashley M Bradford, some rights reserved (C...",https://inaturalist-open-data.s3.amazonaws.com...,genus,1|2|3|6888|6912|7108,"Animals|Chordates|Birds|Waterfowl|Ducks, Geese...",Animalia|Chordata|Aves|Anseriformes|Anatidae|L...,False,0,0,,
3,0,7109,Hooded Merganser,Lophodytes cucullatus,"(c) Ashley M Bradford, some rights reserved (C...",https://inaturalist-open-data.s3.amazonaws.com...,species,1|2|3|6888|6912|7108|7109,"Animals|Chordates|Birds|Waterfowl|Ducks, Geese...",Animalia|Chordata|Aves|Anseriformes|Anatidae|L...,False,0,0,Birds,Stream/Riparian Species
4,0,9526,Eastern and Western Meadowlarks,Sturnella,"(c) greglasley, algunos derechos reservados (C...",https://inaturalist-open-data.s3.amazonaws.com...,genus,1|2|3|7251|11989|9526,Animals|Chordates|Birds|Perching Birds|New Wor...,Animalia|Chordata|Aves|Passeriformes|Icteridae...,False,0,0,,


In [321]:
df.columns == all_taxa_df.columns

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True])

In [322]:
combine_df = pd.concat([df, all_taxa_df])        
log_df(combine_df)

(4180, 15)


Unnamed: 0,id,taxon_id,common_name,scientific_name,user_login,image_url,rank,taxon_ids,common_names,scientific_names,is_species,observations_count,taxa_count,taxon_group,type
0,97386721,36204,Western Fence Lizard,Sceloporus occidentalis,loganc516,https://inaturalist-open-data.s3.amazonaws.com...,species,1|2|26036|26172|36074|36141|36204,Animals|Chordates|Reptiles|Snakes and Lizards|...,Animalia|Chordata|Reptilia|Squamata|Phrynosoma...,True,342,342,,
1,94055566,199840,House Finch,Haemorhous mexicanus,marty_and_the_mamas,https://inaturalist-open-data.s3.amazonaws.com...,species,1|2|3|7251|9079|199910|199840,Animals|Chordates|Birds|Perching Birds|Finches...,Animalia|Chordata|Aves|Passeriformes|Fringilli...,True,146,146,,
2,94889284,48662,Monarch,Danaus plexippus,belis1,https://static.inaturalist.org/photos/15746072...,species,1|47120|47158|47157|47922|48663|48662,Animals|Arthropods|Insects|Butterflies and Mot...,Animalia|Arthropoda|Insecta|Lepidoptera|Nympha...,True,144,144,Invertebrates,Neighborhood Species
3,91586172,5212,Red-tailed Hawk,Buteo jamaicensis,ki6h,https://static.inaturalist.org/photos/15151968...,species,1|2|3|71261|5067|5179|5212,"Animals|Chordates|Birds|Hawks, Eagles, Kites, ...",Animalia|Chordata|Aves|Accipitriformes|Accipit...,True,140,142,Birds,Neighborhood Species
4,92725078,3454,Mourning Dove,Zenaida macroura,jennbastian,https://static.inaturalist.org/photos/15357316...,species,1|2|3|2708|2715|3438|3454,Animals|Chordates|Birds|Pigeons and Doves|Pige...,Animalia|Chordata|Aves|Columbiformes|Columbida...,True,126,126,,


In [298]:
# combine_df['observations_count'] = combine_df['observations_count'].astype(int)
# combine_df['taxa_count'] = combine_df['taxa_count'].astype(int)
# combine_df['id'] = combine_df['id'].astype(int)


In [323]:

new_path = Path('..','app', 'src', 'lib', 'data', 'los-angeles-bioblitz')
new_path.mkdir(parents=True, exist_ok=True)
combine_df.to_json(new_path/ 'taxa.json', orient = "records")

