In [21]:
KEY_PATH = "../ebird_api.key"
EBIRD_KEY = ""

"""
If you get an error (i.e. FileNotFoundError), that means you don't have the API
key set up. You MUST request your own API key. It is intentionally excluded
from the git repo (via gitignore).
"""
with open(KEY_PATH) as f:
    EBIRD_KEY = f.read()

In [3]:
import requests
import pandas as pd
from tqdm import tqdm

In [43]:
# San Jose, 37.33 N, 121.86 W
# 30 days back
# 50 km radius
params = {
    'lat': 37.33,
    'lng': -121.86,
    'back': 30,
    'dist': 50
}

In [67]:
def get_species(lat, lng, back=14, dist=25):
    """
    Get recent sightings of all species by the specified location.
    Per the API, only lists one entry per species.
    @param lat: Latitude to 2 decimal places (-90 to 90)
    @param lng: Longitude to 2 decimal places (-180 to 180)
    @param back: Days back to fetch observations (1 to 30)
    @param dist: Search radius (km) from given position (0 to 50)
    @return: List of species
    """
    # https://api.ebird.org/v2/data/obs/geo/recent
    url = f"https://api.ebird.org/v2/data/obs/geo/recent?lat={lat}&lng={lng}&back={back}&dist={dist}"
    headers = {'X-eBirdApiToken': EBIRD_KEY}
    req = requests.request("GET", url, headers=headers)
    
    df = pd.read_json(req.text)
    # Throw away the rest of the columns in the df. We only care about the speciesCode.
    output = df['speciesCode'].to_list()
    return output

In [69]:
def get_sightings(species, lat, lng, back=14, dist=25):
    """
    Get recent sightings of each of the listed species by the specified location.
    @param species: List of species
    @param lat: Latitude to 2 decimal places (-90 to 90)
    @param lng: Longitude to 2 decimal places (-180 to 180)
    @param back: Days back to fetch observations (1 to 30)
    @param dist: Search radius (km) from given position (0 to 50)
    @return: Df of all sightings
    """
    # Get all individual sightings per species.
    df_list = []
    for s in tqdm(species):
        # https://api.ebird.org/v2/data/obs/geo/recent/{{speciesCode}}
        url = f"https://api.ebird.org/v2/data/obs/geo/recent/{s}?lat={lat}&lng={lng}&back={back}&dist={dist}"
        headers = {'X-eBirdApiToken': EBIRD_KEY}
        req = requests.request("GET", url, headers=headers)
        
        df = pd.read_json(req.text)
        df_list.append(df)
    
    df = pd.concat(df_list)
    # Replace invalid values for howMany with 1's
    # print(pd.concat(df_list).isnull().sum())
    df = df.fillna({'howMany': 1})
    return df

In [57]:
species = get_species(**params)
print('Species found:', len(species))

Species found: 242


In [101]:
df = get_sightings(species, **params)

100%|████████████████████████████████████████████████████████████████████████████████| 242/242 [04:17<00:00,  1.07s/it]


In [102]:
df['comName'].value_counts()

comName
House Finch                    826
Anna's Hummingbird             817
California Towhee              760
American Crow                  724
Black Phoebe                   723
                              ... 
Red Phalarope                    1
Greater White-fronted Goose      1
Swan Goose                       1
Solitary Sandpiper               1
Townsend's Warbler               1
Name: count, Length: 242, dtype: int64

In [103]:
# Save off this version of the df
# (named "ebd*" so that it gets gitignore'd)
SAVED_FILE = 'ebd_nearby.csv'
df.to_csv(SAVED_FILE)

In [104]:
def decompress(df):
    """
    Decompresses a df. Removes the howMany column
    by creating new rows of duplicate entries.
    """
    df = df.copy()
    
    # This helper function returns a list with a length
    # equal to the input number
    _x = lambda x: [None for _ in range(int(x))]
    df['howMany'] = df['howMany'].apply(_x)
    # Explode to make the duplicate entries
    df = df.explode('howMany')
    df = df.drop('howMany', axis=1)
    # Clean up
    df = df.reset_index()
    df = df.drop('index', axis=1)
    return df

In [105]:
df_read = pd.read_csv(SAVED_FILE, index_col=0)
df_d = decompress(df_read)
df_d['comName'].value_counts()

comName
Sooty Shearwater      158494
Western Sandpiper      42374
California Gull        12940
American Avocet         8360
Brown Pelican           6665
                       ...  
Cassin's Auklet            1
Budgerigar                 1
Rufous Hummingbird         1
Red Phalarope              1
Solitary Sandpiper         1
Name: count, Length: 242, dtype: int64