In [1]:
import pandas as pd
import numpy as np

import requests

In [2]:
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import Point

These are the parameters for the query.  The "place_id" corresponds to DuPage Forest Preserves as defined in iNaturalist

In [4]:
DFP_PlaceID = 159205
iconic_taxa = "Plantae"
d1="2017-05-01"

Query quality_grade=research&identifications=any&iconic_taxa[]=Plantae&place_id=159205&d1=2017-05-01&d2=2022-05-17 Columns id, observed_on_string, observed_on, quality_grade, description, num_identification_agreements, num_identification_disagreements, oauth_application_id, place_guess, latitude, longitude, positioning_device, species_guess, scientific_name, common_name, iconic_taxon_name, taxon_id

In [5]:
query=f"https://api.inaturalist.org/v1/observations?place_id=159205&d1=2017-05-01&iconic_taxa=Plantae&quality_grade=research&order=desc&order_by=created_at&per_page=200&page="

Initialize some control values

In [6]:
page=1
results = [1,2]

In [76]:
reclist = []
while len(results) > 0:
    res = requests.get(query+f"{page}")
    if res.status_code == 200:
        js = res.json()
        results = js["results"]
        for obs in results:
            this = {}
            if obs['taxon']['rank'] == "species":
                this['SciName'] = obs['taxon']['name']
                this['common_name'] = obs['taxon']['preferred_common_name']
            this['longitude'] = obs['geojson']['coordinates'][0]
            this['latitude'] =  obs['geojson']['coordinates'][1]
            this['observed_on'] = obs['observed_on']
            this['who'] = obs['user']['login']
            reclist.append(this)
    page += 1

In [77]:
df = pd.DataFrame(reclist)
df.to_excel('inat_raw_observations.xlsx')

Can use saved observations instead of re-querying iNaturalist

In [7]:
df = pd.read_excel('inat_raw_observations.xlsx')

In [8]:
def get_lb_data_from_soup(soup, sn, symbol):
    #print('Getting Lady Bird data from soup')
    rec = {"SciName": sn, 
           "Symbol": symbol, 
           "Bloom Time:": " ",
           "Jan": 0,
           "Feb": 0,
           "Mar": 0,
           "Apr": 0,
           "May": 0,
           "Jun": 0,
           "Jul": 0,
           "Aug": 0,
           "Sep": 0,
           "Oct": 0,
           "Nov": 0,
           "Dec": 0,
           "Special Value to Native Bees": 0 ,
           "Special Value to Bumble Bees": 0, 
           "Nectar Source:": 0, 
           "Larval Host:": "No",
           "Larval Host(Monarch)": 0,
           "Larval Host(Other)": 0}
    h4s = soup.find_all("h4")
    #print("h4's found: ",h4s)
    for n in h4s:
        #print('\n\n', '####', n, n.string)
        if n.string == "Bloom Information":
            #print("Found Bloom Information")
            sibs = n.find_next_siblings()
            for s in sibs:
                #print(s)
                if s.string == "Bloom Time:":
                    #print(n.parent)
                    #print(s)
                    #print(s.next_sibling)
                    mos = s.next_sibling.split(",")
                    for m in mos:
                        rec[m.strip()] = 1                    
                    rec[s.string] = s.next_sibling
        if n.string == "Benefit":
            sibs = n.find_next_siblings()
            for s in sibs:
                #print(s)
                if s.string == "Nectar Source:":
                    print("found benefit for ", sn, symbol)
                    #print(n.parent)
                    #print(s)
                    #print(s.next_sibling)
                    rec[s.string] = 1
                if s.string == "Larval Host:":
                    print("found benefit for ", sn, symbol)
                    #print(n.parent)
                    #print(s)
                    #print(s.next_sibling)
                    rec[s.string] = s.next_sibling
                    if "onarch" in s.next_sibling:
                        rec["Larval Host(Monarch)"] = 1
                    else: 
                        rec["Larval Host(Other)"] = 1
        if n.string == "Value to Beneficial Insects":
            #print(n.parent.prettify())
            for s in n.find_next_siblings():
                #print(s)
                if s.string: 
                    if (s.string.strip().startswith("Special") and (s.string.find("Honey") == -1)):
                        #print('adding string', s)
                        rec[s.string.replace("\xa0", " ")] = 1
                #print(s.next_sibling)
                if (str(s.next_sibling).strip().startswith("Special") and (str(s.next_sibling).find("Honey") == -1)):
                    #print("adding next sibling", s.next_sibling)
                    rec[str(s.next_sibling.strip().replace("\xa0", " "))] = 1                
    return rec

Run the two above functions once for each species reported, and collect the records into a dataframe

In [9]:
rec_list = []
for plant in dfp_flora.SciName.values:
    #print(plant)
    page, sn, symbol = get_lb_page_for_sci_name(plant)
    rec_list.append(get_lb_data_from_soup(page, sn, symbol))


NameError: name 'dfp_flora' is not defined

In [146]:
plant_df = pd.DataFrame(rec_list)

NameError: name 'rec_list' is not defined

Save the species information to an Excel sheet

In [143]:
plant_df.to_excel("LadyBirdData_iNaturalist_expanded.xlsx")

Can use the saved plant info data if it seems current enough

In [10]:
plant_df = pd.read_excel("LadyBirdData_iNaturalist_expanded.xlsx")

Clean up columns

In [11]:
plant_df = plant_df[['SciName', 'Symbol', 'Bloom Time:', 'Jan', 'Feb', 'Mar',
       'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec',
       'Special Value to Native Bees', 'Special Value to Bumble Bees',
       'Nectar Source:', 'Larval Host:', 'Larval Host(Monarch)',
       'Larval Host(Other)']]

In [12]:
# Open the shapefile
ecoUnits = gpd.GeoDataFrame.from_file('./Eco Unit Shapefile/')

make a geodataframe from the observations list (the inat has lat/long of observation)
Make  the coordinate reference systems match

In [13]:
gdf_inats = gpd.GeoDataFrame(df,
    geometry=gpd.points_from_xy(df.longitude, df.latitude))
gdf_inats = gdf_inats[['SciName', 'common_name', 'longitude', 'latitude','observed_on', 'who', 'geometry']].dropna()
gdf_inats = gdf_inats.set_crs(crs="EPSG:4326")

In [14]:
eco_lats = ecoUnits.to_crs("EPSG:4326")

Do a spatial join to match each observation to the EUnit it was made in.  Drop multilple observations of the same plant in the same EUnit

In [15]:
mdf = eco_lats.sjoin(gdf_inats, how="left", predicate="contains").sort_values("EUnit")

Add the plant characteristics to the observations

In [16]:
mdf =  mdf.join(plant_df.reset_index().set_index("SciName"), on="SciName")

In [17]:
mdf["habitat_index"] = (mdf["Special Value to Native Bees"] 
     + mdf["Special Value to Bumble Bees"] 
     + mdf["Nectar Source:"] 
     + mdf["Larval Host(Monarch)"] 
     + mdf["Larval Host(Other)"])
mdf["# months with blooms"] = (mdf["Jan"] + mdf["Feb"] + mdf["Mar"] + mdf["Apr"] 
     + mdf["May"] + mdf["Jun"] + mdf["Jul"] + mdf["Aug"] + mdf["Sep"] 
     + mdf["Oct"] + mdf["Nov"] + mdf["Dec"] )
mdf = mdf.drop_duplicates(subset=["EUnit", "SciName"])
int_df = mdf[mdf.habitat_index > 0]

Save the data to excel

In [159]:
mdf = mdf[mdf.habitat_index > 0]  # Should be unnecessary, we already filterd to "Interesting?"
#mdf = mdf.drop_duplicates(subset=["EUnit", "Symbol"])

In [18]:
sum_df = int_df.groupby([ 'EcosName', 'EUnit', 'Acres', 'Shape_Area' ]).agg({'SciName':'count', 
                                                                             'Special Value to Native Bees':'sum', 
                                                                             'Special Value to Bumble Bees':'sum',
                                                                             'Nectar Source:': "sum", 
                                                                             'Larval Host:': "sum",
                                                                             'Larval Host(Monarch)': "sum", 
                                                                             'Larval Host(Other)': "sum",
                                                                             '# months with blooms': "sum"
                                    })

In [173]:
sum_df.sort_values("common_name")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,Unnamed: 12_level_0,Unnamed: 13_level_0,Unnamed: 14_level_0,Unnamed: 15_level_0,Unnamed: 16_level_0,Unnamed: 17_level_0,Unnamed: 18_level_0,Unnamed: 19_level_0,Unnamed: 20_level_0,Unnamed: 21_level_0,Unnamed: 22_level_0,Unnamed: 23_level_0,Unnamed: 24_level_0,Unnamed: 25_level_0,Unnamed: 26_level_0,Unnamed: 27_level_0,Unnamed: 28_level_0,Unnamed: 29_level_0,Unnamed: 30_level_0,Unnamed: 31_level_0,Unnamed: 32_level_0,Unnamed: 33_level_0,common_name
EcosName,EUnit,SciName,Habitat,Classifica,FlorRating,GeneralCom,CommType,Acres,Shape_Area,observed_on,who,Symbol,Bloom Time:,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,Special Value to Native Bees,Special Value to Bumble Bees,Nectar Source:,Larval Host:,Larval Host(Monarch),Larval Host(Other),habitat_index,# months with blooms,Unnamed: 34_level_1
101ST STREET LOT,EWF84,Baptisia alba,Valuable,2,II,Developed,Mowed Areas,2.39,104237.06,2021-05-24,dicot1,BAAL,"Apr , May , Jun , Jul",0.00,0.00,0.00,1.00,1.00,1.00,1.00,0.00,0.00,0.00,0.00,0.00,1.00,1.00,0.00,No,0.00,0.00,2.00,4.00,1
RECREATION AREA,EMA15,Bouteloua curtipendula,Important,3,III,Eurasian Meadow,Forb Meadow,50.24,2188342.63,2021-01-06,adriansydor,BOCU,"Jun , Jul , Aug , Sep , Oct , Nov",0.00,0.00,0.00,0.00,0.00,1.00,1.00,1.00,1.00,1.00,1.00,0.00,0.00,0.00,0.00,"Green Skipper butterfly, Dotted Skipper butterfly",0.00,1.00,1.00,6.00,1
RECREATION AREA,EMA15,Eryngium yuccifolium,Important,3,III,Eurasian Meadow,Forb Meadow,50.24,2188342.63,2021-01-06,adriansydor,ERYU,"May , Jun , Jul , Aug",0.00,0.00,0.00,0.00,1.00,1.00,1.00,1.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,No,0.00,0.00,1.00,4.00,1
RECREATION AREA,EMA15,Lythrum alatum,Important,3,III,Eurasian Meadow,Forb Meadow,50.24,2188342.63,2021-01-06,adriansydor,LYAL4,"May , Jun , Jul , Aug , Sep",0.00,0.00,0.00,0.00,1.00,1.00,1.00,1.00,1.00,0.00,0.00,0.00,1.00,1.00,1.00,No,0.00,0.00,3.00,5.00,1
RECREATION AREA,EMA15,Mimulus ringens,Important,3,III,Eurasian Meadow,Forb Meadow,50.24,2188342.63,2021-01-06,adriansydor,MIRI,"Jun , Jul , Aug , Sep",0.00,0.00,0.00,0.00,0.00,1.00,1.00,1.00,1.00,0.00,0.00,0.00,0.00,0.00,0.00,"Common Buckeye, Baltimore checkerspots",0.00,1.00,1.00,4.00,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
RECREATIONAL AREAS,EDA13,Monarda fistulosa,Important,3,II,Eurasian Meadow,Tall-grass Meadow,97.27,4275279.67,2018-12-30,brandon236,MOBR2,"Jun , Jul , Aug , Sep",0.00,0.00,0.00,0.00,0.00,1.00,1.00,1.00,1.00,0.00,0.00,0.00,1.00,1.00,0.00,No,0.00,0.00,2.00,4.00,4
THUNDERBIRD WOODS,EGV20,Quercus macrocarpa,Critical,4,IV,Forest,Mesic Woodland,80.33,3499307.21,2018-09-14,andrewhipp,QUMA2,"Mar , Apr , May",0.00,0.00,1.00,1.00,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,"Edwards' Hairstreak, Horace's Duskywing butterfly",0.00,1.00,1.00,3.00,5
MAPLE WOODS,EMP01,Lobelia cardinalis,Critical,4,IV,Forest,Mesic Upland Forest,62.29,2713353.40,2021-08-15,andrewhipp,LOCA2,"May , Jun , Jul , Aug , Sep , Oct",0.00,0.00,0.00,0.00,1.00,1.00,1.00,1.00,1.00,1.00,0.00,0.00,0.00,0.00,1.00,No,0.00,0.00,1.00,6.00,8
MAPLE WOODS,EMP01,Lobelia siphilitica,Critical,4,IV,Forest,Mesic Upland Forest,62.29,2713353.40,2021-08-15,andrewhipp,LOSI,"Jul , Aug , Sep , Oct",0.00,0.00,0.00,0.00,0.00,0.00,1.00,1.00,1.00,1.00,0.00,0.00,1.00,1.00,0.00,No,0.00,0.00,2.00,4.00,12


In [195]:
with pd.ExcelWriter("inat_interesting_units.xlsx") as writer:
    sum_df.to_excel(writer, sheet_name="SummaryData")
    mdf.set_index("EUnit").to_excel(writer, sheet_name="DetailData")

In [85]:
df.groupby(['SciName']).count()['common_name']

SciName
Abutilon theophrasti       2
Acalypha rhomboidea        1
Acer negundo              40
Acer nigrum                1
Acer platanoides           3
                          ..
Vitis riparia              8
Xanthium strumarium       12
Yucca filamentosa          1
Zanthoxylum americanum     2
Zizia aurea               31
Name: common_name, Length: 714, dtype: int64

In [88]:
pd.options.display.float_format = '{:,.2f}'.format