In [2]:
import pandas as pd
import numpy as np

import requests

In [86]:
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import Point

These are the parameters for the query.  The "place_id" corresponds to DuPage Forest Preserves as defined in iNaturalist

In [None]:
DFP_PlaceID = 159205
iconic_taxa-"Plantae"
d1="2017-05-01"

Query quality_grade=research&identifications=any&iconic_taxa[]=Plantae&place_id=159205&d1=2017-05-01&d2=2022-05-17 Columns id, observed_on_string, observed_on, quality_grade, description, num_identification_agreements, num_identification_disagreements, oauth_application_id, place_guess, latitude, longitude, positioning_device, species_guess, scientific_name, common_name, iconic_taxon_name, taxon_id

In [32]:
query=f"https://api.inaturalist.org/v1/observations?place_id=159205&d1=2017-05-01&iconic_taxa=Plantae&quality_grade=research&order=desc&order_by=created_at&per_page=200&page="

Initialize some control values

In [75]:
page=1
results = [1,2]

In [76]:
reclist = []
while len(results) > 0:
    res = requests.get(query+f"{page}")
    if res.status_code == 200:
        js = res.json()
        results = js["results"]
        for obs in results:
            this = {}
            if obs['taxon']['rank'] == "species":
                this['SciName'] = obs['taxon']['name']
                this['common_name'] = obs['taxon']['preferred_common_name']
            this['longitude'] = obs['geojson']['coordinates'][0]
            this['latitude'] =  obs['geojson']['coordinates'][1]
            this['observed_on'] = obs['observed_on']
            this['who'] = obs['user']['login']
            reclist.append(this)
    page += 1

In [77]:
df = pd.DataFrame(reclist)
df.to_excel('inat_raw_observations.xlsx')

Can use saved observations instead of re-querying iNaturalist

In [None]:
df = pd.read_excel('inat_raw_observations.xlsx')

In [27]:
def get_lb_data_from_soup(soup, sn, symbol):
    #print('Getting Lady Bird data from soup')
    rec = {"SciName": sn, 
           "Symbol": symbol, 
           "Bloom Time:": " ",
           "Jan": 0,
           "Feb": 0,
           "Mar": 0,
           "Apr": 0,
           "May": 0,
           "Jun": 0,
           "Jul": 0,
           "Aug": 0,
           "Sep": 0,
           "Oct": 0,
           "Nov": 0,
           "Dec": 0,
           "Special Value to Native Bees": 0 ,
           "Special Value to Bumble Bees": 0, 
           "Nectar Source:": 0, 
           "Larval Host:": "No",
           "Larval Host(Monarch)": 0,
           "Larval Host(Other)": 0}
    h4s = soup.find_all("h4")
    #print("h4's found: ",h4s)
    for n in h4s:
        #print('\n\n', '####', n, n.string)
        if n.string == "Bloom Information":
            #print("Found Bloom Information")
            sibs = n.find_next_siblings()
            for s in sibs:
                #print(s)
                if s.string == "Bloom Time:":
                    #print(n.parent)
                    #print(s)
                    #print(s.next_sibling)
                    mos = s.next_sibling.split(",")
                    for m in mos:
                        rec[m.strip()] = 1                    
                    rec[s.string] = s.next_sibling
        if n.string == "Benefit":
            sibs = n.find_next_siblings()
            for s in sibs:
                #print(s)
                if s.string == "Nectar Source:":
                    print("found benefit for ", sn, symbol)
                    #print(n.parent)
                    #print(s)
                    #print(s.next_sibling)
                    rec[s.string] = 1
                if s.string == "Larval Host:":
                    print("found benefit for ", sn, symbol)
                    #print(n.parent)
                    #print(s)
                    #print(s.next_sibling)
                    rec[s.string] = s.next_sibling
                    if "onarch" in s.next_sibling:
                        rec["Larval Host(Monarch)"] = 1
                    else: 
                        rec["Larval Host(Other)"] = 1
        if n.string == "Value to Beneficial Insects":
            #print(n.parent.prettify())
            for s in n.find_next_siblings():
                #print(s)
                if s.string: 
                    if (s.string.strip().startswith("Special") and (s.string.find("Honey") == -1)):
                        #print('adding string', s)
                        rec[s.string.replace("\xa0", " ")] = 1
                #print(s.next_sibling)
                if (str(s.next_sibling).strip().startswith("Special") and (str(s.next_sibling).find("Honey") == -1)):
                    #print("adding next sibling", s.next_sibling)
                    rec[str(s.next_sibling.strip().replace("\xa0", " "))] = 1                
    return rec

Run the two above functions once for each species reported, and collect the records into a dataframe

In [None]:
rec_list = []
for plant in dfp_flora.SciName.values:
    #print(plant)
    page, sn, symbol = get_lb_page_for_sci_name(plant)
    rec_list.append(get_lb_data_from_soup(page, sn, symbol))


In [31]:
plant_df = pd.DataFrame(rec_list)

Save the species information to an Excel sheet

In [32]:
plant_df.to_excel("LadyBirdData_iNaturalist_expanded.xlsx")

Can use the saved plant info data if it seems current enough

In [87]:
plant_df = pd.read_excel("LadyBirdData_iNaturalist_expanded.xlsx")

In [94]:
oo = df.drop_duplicates(subset.join(plant_df.reset_index().set_index("SciName"), on="SciName")

In [85]:
df.groupby(['SciName']).count()['common_name']

SciName
Abutilon theophrasti       2
Acalypha rhomboidea        1
Acer negundo              40
Acer nigrum                1
Acer platanoides           3
                          ..
Vitis riparia              8
Xanthium strumarium       12
Yucca filamentosa          1
Zanthoxylum americanum     2
Zizia aurea               31
Name: common_name, Length: 714, dtype: int64

In [88]:
pd.options.display.float_format = '{:,.2f}'.format