# title placeholder

In [75]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go


## Data Loading

In [2]:
ebird_data = pd.read_csv('ebird_nm_2022.csv')


In [3]:
ebird_data.head()


Unnamed: 0.1,Unnamed: 0,COMMON_NAME,LATITUDE,LONGITUDE,OBSERVATION_DATE
0,0,Mallard/Mexican Duck,35.158855,-106.633675,2015-04-11
1,1,Common Merganser,33.802,-106.88,2005-12-27
2,2,Long-billed Dowitcher,33.802,-106.88,2005-02-03
3,3,American Avocet,33.802,-106.88,2007-04-14
4,4,Great Egret,33.802,-106.88,2003-05-22


In [4]:
ebird_data.shape


(9009593, 5)

## Quick Data Cleaning

In [5]:
ebird_data.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9009593 entries, 0 to 9009592
Data columns (total 5 columns):
 #   Column            Dtype  
---  ------            -----  
 0   Unnamed: 0        int64  
 1   COMMON_NAME       object 
 2   LATITUDE          float64
 3   LONGITUDE         float64
 4   OBSERVATION_DATE  object 
dtypes: float64(2), int64(1), object(2)
memory usage: 343.7+ MB


In [6]:
ebird_data.columns = ebird_data.columns.str.lower()


In [7]:
ebird_data.columns


Index(['unnamed: 0', 'common_name', 'latitude', 'longitude',
       'observation_date'],
      dtype='object')

In [8]:
#drop first column
ebird_data = ebird_data.drop(columns=['unnamed: 0'])


In [9]:
ebird_data['observation_date'] = pd.to_datetime(ebird_data['observation_date'])


In [10]:
ebird_data['year'] = ebird_data['observation_date'].dt.year
ebird_data['month'] = ebird_data['observation_date'].dt.month
ebird_data['day'] = ebird_data['observation_date'].dt.day
ebird_data['week of year'] = ebird_data['observation_date'].dt.isocalendar().week



In [11]:
ebird_data.head() 


Unnamed: 0,common_name,latitude,longitude,observation_date,year,month,day,week of year
0,Mallard/Mexican Duck,35.158855,-106.633675,2015-04-11,2015,4,11,15
1,Common Merganser,33.802,-106.88,2005-12-27,2005,12,27,52
2,Long-billed Dowitcher,33.802,-106.88,2005-02-03,2005,2,3,5
3,American Avocet,33.802,-106.88,2007-04-14,2007,4,14,15
4,Great Egret,33.802,-106.88,2003-05-22,2003,5,22,21


In [12]:
ebird_data_copy = ebird_data.copy()


## Functions

### Species Observations Over Time

In [64]:
def species_observations_over_time(data: pd.DataFrame, species: str, time_units: str) -> pd.DataFrame:
    """
    This function takes a dataframe and returns a dataframe with the number of observations of a species over a given unit of time.
    i.e. obersvations of the American Crow per month, week, or day.

    Args:
        data (pd.DataFrame): eBird dataframe
        species (str): Bird Species of interest
        time_units (str): Unit of time to aggregate observations. Options: 'Month', 'Week', 'Day'

    Raises:
        ValueError: If time unit is not supported
        ValueError: If species is not found in data

    Returns:
        pd.DataFrame: DataFrame with the number of observations of a species over a given unit of time.
    """
    
    time_dict = {
        'month': 'month',
        'week': 'week of year',
        'day': 'day',
    }
    
    if time_units not in time_dict:
        raise ValueError(f"Time unit {time_units} not supported")
    
    species_data = data[data['common_name'] == species]
    
    if species_data.empty:
        raise ValueError(f"Species {species} not found in data")
    
    species_observations = species_data.groupby(['year', time_dict[time_units]]).size().reset_index(name='observations')
    
    return species_observations


Example Usage

In [37]:
american_crow_monthly = species_observations_over_time(ebird_data, 'American Crow', 'Month')
american_crow_monthly.head(12)


Unnamed: 0,year,month,observations
0,1990,1,21
1,1990,2,21
2,1990,3,13
3,1990,4,4
4,1990,5,5
5,1990,6,1
6,1990,7,1
7,1990,8,15
8,1990,9,14
9,1990,10,15


In [44]:
american_crow_monthly['year'].unique()


array([1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
       2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,
       2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022],
      dtype=int64)

## Polynomial Fit

In [109]:
def calculate_fitted_polynomials(data: pd.DataFrame, time_units: str, degree: int) -> np.ndarray:
    """
    This function takes a dataframe and returns an array of the coefficients of the polynomial that best fits the number of observations of a species over a given unit of time.

    Args:
        data (pd.DataFrame): Aggregate dataframe of species observations over time
        species (str): Species of interest
        time_units (str): Unit of time to aggregate observations. Options: 'Month', 'Week', 'Day'
        degree (int): Degree of the polynomial to fit the data

    Returns:
        np.ndarray: Array of the coefficients of the polynomial that best fits the number of observations of a species over a given unit of time.
    """
    #create an empty numpy array to store the polynomials
    
    polynomial_array = np.array([])
    
    for year in data['year'].unique():
        species_year = data[data['year'] == year]
        x = species_year[time_units]
        y = species_year['observations']
        poly = np.polyfit(x, y, degree)
        polynomial_array = np.append(polynomial_array, poly)
             
    return polynomial_array
    
    



In [110]:
american_crow_polynomials = calculate_fitted_polynomials(american_crow_monthly, 'month', 5)
american_crow_polynomials


array([ 2.61123680e-03, -1.19866653e-01,  1.81225576e+00, -1.05801025e+01,
        1.91320170e+01,  1.11818182e+01, -5.77865762e-03,  1.55063074e-01,
       -1.30977050e+00,  3.17129439e+00,  3.92512341e+00, -3.09090909e+00,
        7.71116139e-03, -2.70280577e-01,  3.46525778e+00, -1.94917001e+01,
        4.46251903e+01, -2.00454545e+01, -2.53582202e-03,  4.27507541e-02,
       -5.07292952e-02, -1.41955043e+00,  2.22541650e+00,  1.82272727e+01,
       -2.14932127e-03,  3.53978130e-02, -6.83789250e-02, -1.09005639e+00,
        6.51237317e+00,  8.68181818e+00,  3.20512821e-04, -6.46853147e-02,
        1.28685897e+00, -8.48951049e+00,  2.27110723e+01, -1.63636364e+00,
       -1.17552790e-02,  3.32257816e-01, -3.31280423e+00,  1.41853275e+01,
       -2.48186943e+01,  2.97727273e+01, -1.60256410e-04, -4.35606061e-02,
        1.09695513e+00, -8.36115967e+00,  2.24110723e+01, -3.54545455e+00,
       -1.45456259e-02,  4.39552482e-01, -4.73214469e+00,  2.18896330e+01,
       -4.16059423e+01,  

## Calculate Inflection Points

In [67]:
def calculate_inflection_points(polynomial: np.array) -> np.ndarray:
    """
    This function takes an array of polynomial coefficients and returns an array of the inflection points of the polynomial.

    Args:
        polynomial (np.array): Array of polynomial coefficients

    Returns:
        np.ndarray: Array of the inflection points of the polynomial.
    """
    p = np.poly1d(polynomial)
    
    #calcualte secod derivative of polynomial
    second_derivative = np.polyder(p, 2)
    
    #calculate inflection points
    inflection_points = np.roots(second_derivative)
    
    return inflection_points
    


In [107]:
def plot_obervations_with_polynomial_fit(data: pd.DataFrame, species: str, time_units: str, degree: int) -> None:
    """
    This function takes a dataframe and plots the number of observations of a species over a given unit of time with the polynomial that best fits the data.

    Args:
        data (pd.DataFrame): eBird dataframe
        species (str): Bird Species of interest
        time_units (str): Unit of time to aggregate observations. Options: 'Month', 'Week', 'Day'
        degree (int): Degree of the polynomial to fit the data

    Returns:
        None
    """
    species_observations = species_observations_over_time(data, species, time_units)
    polynomial_coefficients = calculate_fitted_polynomials(species_observations, time_units, degree)
    inflection_points = calculate_inflection_points(polynomial_coefficients)
    
    # plot observations
    fig = px.line(species_observations,
                  x=time_units,
                  y='observations',
                  color='year',
                  markers=False,
                  title=f'Observation Count for {species} by {time_units.title()}')
    
    # plot polynomial fit for each year
    polynomials = []
    for year in species_observations['year'].unique():
        year_data = species_observations[species_observations['year'] == year]
        coefficients = np.polyfit(year_data[time_units], year_data['observations'], degree)
        polynomial = np.poly1d(coefficients)
        polynomials.append(polynomial)
    
    fig.add_scatter(x=year_data[time_units],
                        y=polynomial(year_data[time_units]),
                        mode='lines', line=dict(dash='dash'),
                        visible='legendonly',
                        name=f'Polynomial Approximation ({year})'
                        )
    # plot inflection points
    fig.add_scatter(x=inflection_points, y=(inflection_points), mode='markers', name='Inflection Points')
    
    fig.show()
    
   


In [108]:
plot_obervations_with_polynomial_fit(ebird_data, 'American Crow', 'month', 5)
