In [1]:
KEY_PATH = "../ebird_api.key"
EBIRD_KEY = ""

with open(KEY_PATH) as f:
    EBIRD_KEY = f.read()
_ = """
If you get an error (i.e. FileNotFoundError), that means you don't have the API
key set up. You MUST request your own API key. It is intentionally excluded
from the git repo (via gitignore).
"""

In [38]:
import requests
import pandas as pd
from tqdm import tqdm

In [5]:
# Get all sightings close to this location.
# Only has one entry per species.
# San Jose, 37.33 N, 121.86 W
url = "https://api.ebird.org/v2/data/obs/geo/recent?lat=37.33&lng=-121.86"

payload= {}
headers = {
    'X-eBirdApiToken': EBIRD_KEY
}

resp_recent = requests.request("GET", url, headers=headers, data=payload)
resp_recent = pd.read_json(resp_recent.text)

In [32]:
species = resp_recent['speciesCode']
n_species = len(species)
print('Species in the list:', n_species)
species.head()

Species in the list: 176


0    moudov
1    houspa
2    houfin
3    daejun
4    sonspa
Name: speciesCode, dtype: object

In [40]:
# Get all individual sightings per species.
df_list = []

for i, s in tqdm(enumerate(species), total=n_species):
    url = f"https://api.ebird.org/v2/data/obs/geo/recent/{s}?lat=37.33&lng=-121.86"
    response = requests.request("GET", url, headers=headers, data=payload)
    df = pd.read_json(response.text)
    df_list.append(df)

100%|████████████████████████████████████████████████████████████████████████████████| 176/176 [02:40<00:00,  1.09it/s]

Done





In [97]:
# Replace invalid values for howMany with 1's
# print(pd.concat(df_list).isnull().sum())
df = pd.concat(df_list)
df = df.fillna({'howMany': 1})

In [131]:
# Save off this version of the df
# (named "ebd*" so that it gets gitignore'd)
df.to_csv('ebd_nearby.csv')

In [147]:
def decompress(df):
    """
    Decompresses a df. Removes the howMany column
    by creating new rows of duplicate entries.
    """
    
    # This helper function returns a list with a length
    # equal to the input number
    _x = lambda x: [None for _ in range(int(x))]
    df['howMany'] = df['howMany'].apply(_x)
    # Explode to make the duplicate entries
    df = df.explode('howMany')
    df = df.drop('howMany', axis=1)
    df = df.reset_index()
    df = df.drop('index', axis=1)
    return df

In [155]:
df = pd.read_csv('ebd_nearby.csv', index_col=0)
df_d = decompress(df)
df_d

Unnamed: 0,speciesCode,comName,sciName,locId,locName,obsDt,lat,lng,obsValid,obsReviewed,locationPrivate,subId,exoticCategory
0,moudov,Mourning Dove,Zenaida macroura,L50239426,Saratoga Glen Pl,2025-07-22 16:15,37.282621,-122.002054,True,False,True,S262009268,
1,moudov,Mourning Dove,Zenaida macroura,L50239426,Saratoga Glen Pl,2025-07-22 16:15,37.282621,-122.002054,True,False,True,S262009268,
2,moudov,Mourning Dove,Zenaida macroura,L730351,Charleston Slough/Coast Casey Forebay,2025-07-22 14:52,37.435919,-122.098918,True,False,False,S261993824,
3,moudov,Mourning Dove,Zenaida macroura,L594012,Shoreline Park--Shoreline Lake area,2025-07-22 12:40,37.432341,-122.091837,True,False,False,S261978563,
4,moudov,Mourning Dove,Zenaida macroura,L594012,Shoreline Park--Shoreline Lake area,2025-07-22 12:40,37.432341,-122.091837,True,False,False,S261978563,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
32950,graspa,Grasshopper Sparrow,Ammodramus savannarum,L279204,Sunnyvale WPCP (please use a more specific hot...,2025-07-11 15:52,37.419839,-122.014426,True,False,False,S259033263,
32951,graspa,Grasshopper Sparrow,Ammodramus savannarum,L3441700,Santa Teresa CP--Stile Entrance,2025-07-11 08:06,37.204099,-121.805763,True,False,False,S258933870,
32952,comgol,Common Goldeneye,Bucephala clangula,L951032,Salt Pond A11,2025-07-10 09:40,37.445037,-121.996325,True,True,False,S258740331,
32953,clagre,Clark's Grebe,Aechmophorus clarkii,L951032,Salt Pond A11,2025-07-10 09:40,37.445037,-121.996325,True,False,False,S258740331,
