In [1]:
KEY_PATH = "../ebird_api.key"
EBIRD_KEY = ""

with open(KEY_PATH) as f:
    EBIRD_KEY = f.read()
_ = """
If you get an error (i.e. FileNotFoundError), that means you don't have the API
key set up. You MUST request your own API key. It is intentionally excluded
from the git repo (via gitignore).
"""

In [38]:
import requests
import pandas as pd
from tqdm import tqdm

In [5]:
# Get all sightings close to this location.
# Only has one entry per species.
# San Jose, 37.33 N, 121.86 W
url = "https://api.ebird.org/v2/data/obs/geo/recent?lat=37.33&lng=-121.86"

payload= {}
headers = {
    'X-eBirdApiToken': EBIRD_KEY
}

resp_recent = requests.request("GET", url, headers=headers, data=payload)
resp_recent = pd.read_json(resp_recent.text)

In [32]:
species = resp_recent['speciesCode']
n_species = len(species)
print('Species in the list:', n_species)
species.head()

Species in the list: 176


0    moudov
1    houspa
2    houfin
3    daejun
4    sonspa
Name: speciesCode, dtype: object

In [40]:
# Get all individual sightings per species.
df_list = []

for i, s in tqdm(enumerate(species), total=n_species):
    url = f"https://api.ebird.org/v2/data/obs/geo/recent/{s}?lat=37.33&lng=-121.86"
    response = requests.request("GET", url, headers=headers, data=payload)
    df = pd.read_json(response.text)
    df_list.append(df)

100%|████████████████████████████████████████████████████████████████████████████████| 176/176 [02:40<00:00,  1.09it/s]

Done





In [167]:
# Replace invalid values for howMany with 1's
# print(pd.concat(df_list).isnull().sum())
df = pd.concat(df_list)
df = df.fillna({'howMany': 1})
df['comName'].value_counts()

comName
House Finch             211
Anna's Hummingbird      183
Lesser Goldfinch        177
American Crow           176
California Towhee       168
                       ... 
Cassin's Kingbird         1
Red Knot                  1
Glaucous-winged Gull      1
Least Bittern             1
Rock Wren                 1
Name: count, Length: 176, dtype: int64

In [131]:
# Save off this version of the df
# (named "ebd*" so that it gets gitignore'd)
df.to_csv('ebd_nearby.csv')

In [147]:
def decompress(df):
    """
    Decompresses a df. Removes the howMany column
    by creating new rows of duplicate entries.
    """
    
    # This helper function returns a list with a length
    # equal to the input number
    _x = lambda x: [None for _ in range(int(x))]
    df['howMany'] = df['howMany'].apply(_x)
    # Explode to make the duplicate entries
    df = df.explode('howMany')
    df = df.drop('howMany', axis=1)
    df = df.reset_index()
    df = df.drop('index', axis=1)
    return df

In [169]:
df = pd.read_csv('ebd_nearby.csv', index_col=0)
df_d = decompress(df)
df_d['comName'].value_counts()

comName
California Gull     4613
American Avocet     1543
House Finch         1528
Mallard             1363
Canada Goose        1235
                    ... 
Pacific Wren           1
Wood Duck              1
Western Gull           1
Northern Pintail       1
Rock Wren              1
Name: count, Length: 176, dtype: int64