<a href="https://colab.research.google.com/github/brooksburkhead/Bird-Migration-Analysis/blob/main/Inflection_Point_Plotting_Workflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns

#Data Loading

In [None]:
from google.colab import auth
auth.authenticate_user()

In [None]:
%%bigquery full_data --project c11-capstone
SELECT COMMON_NAME, LATITUDE, LONGITUDE, OBSERVATION_DATE
FROM `c11-capstone.nm_ebird.ebd_NM_relSep_2023`
WHERE OBSERVATION_DATE BETWEEN '1990-01-01' AND '2022-12-31'

Query is running:   0%|          |

Downloading:   0%|          |

In [None]:
full_data.head()

Unnamed: 0,COMMON_NAME,LATITUDE,LONGITUDE,OBSERVATION_DATE
0,Tree Swallow,32.717147,-103.308434,1997-03-30
1,Lark Sparrow,32.857101,-103.793807,2016-09-16
2,Scaled Quail,32.522327,-103.569779,2019-04-24
3,Swainson's Hawk,32.842065,-103.1708,2017-05-31
4,Lark Sparrow,32.842001,-103.196461,2010-05-31


In [None]:
full_data.shape

(9009593, 4)

##  Quick Data Cleaning

In [None]:
full_data.columns = full_data.columns.str.replace('_', ' ')

In [None]:
full_data.columns

Index(['COMMON NAME', 'LATITUDE', 'LONGITUDE', 'OBSERVATION DATE'], dtype='object')

In [None]:
full_data['OBSERVATION DATE'] = pd.to_datetime(full_data['OBSERVATION DATE'])

In [None]:
def add_datetime_components(df, datetime_column_name='OBSERVATION DATE'):
    '''Extracts components of a datetime column into new columns'''
    df['MONTH'] = df[datetime_column_name].dt.month
    df['YEAR'] = df[datetime_column_name].dt.year
    df['WEEK'] = df[datetime_column_name].dt.isocalendar().week
    df['DAY'] = df[datetime_column_name].dt.dayofyear

# Assuming full_data is your DataFrame
add_datetime_components(full_data)


In [None]:
full_data['COMMON NAME'].value_counts()

House Finch                         299801
Dark-eyed Junco                     223502
Mourning Dove                       192101
Common Raven                        188503
American Robin                      187926
                                     ...  
Arctic/Pacific Loon                      1
Red-naped/Red-breasted Sapsucker         1
Golden Pheasant                          1
Green Jay                                1
Haemorhous sp.                           1
Name: COMMON NAME, Length: 801, dtype: int64

In [None]:
full_data_copy = full_data.copy()

In [None]:
def plot_bird_data(data, bird_name, time, deg = 6, floor = 0, ceiling = 365):
    """
    Automatically plots observations by month (over multiple years) for a bird species found in the
    ebird dataset.

    Then, for all the above plots, fits a polynomial, calculates the 2nd derivitive of each polynomial and find the inflection points.

    Then plots the fitted polynomials and the inflection points as vertical lines

    Optional parameter to change degree of fitted polynomials. Default = 6.

    """

    # Filter data for the specific bird
    bird_data = data[data['COMMON NAME'] == bird_name]

    if bird_data.empty:
        print(f"No data found for the bird: {bird_name}")
        return

    time_dict = {
                  'MONTH':   12,
                  'WEEK' :   52,
                  'DAY'  :   365,
                }

    if time not in time_dict.keys():
        print(f"No data found for the time period: {time}")
        return

    # Create a DataFrame similar to 'monthly_counts' for the specific bird
    global bird_timely_counts
    bird_timely_counts = bird_data.groupby(['YEAR', time]).size().reset_index(name='Observation Count')
    bird_timely_counts[time] = bird_timely_counts[time].astype('int64')

    degree = deg

    global polynomials
    polynomials = []

    # Step 1: Plot Observations over time

    fig = px.line(bird_timely_counts,
                  x= time,
                  y='Observation Count',
                  color='YEAR',
                  markers = False,
                  title=f'Observation Count for {bird_name} by {time.title()}')

    # Step 2: Calculate fitted polynomials for every year

    for year in bird_timely_counts['YEAR'].unique():
        year_data = bird_timely_counts[bird_timely_counts['YEAR'] == year]
        coefficients = np.polyfit(year_data[time], year_data['Observation Count'], degree)
        polynomial = np.poly1d(coefficients)
        polynomials.append(polynomial)

        # Add the polynomial curve to the plot with dashed lines
        fig.add_scatter(x=year_data[time],
                        y=polynomial(year_data[time]),
                        mode='lines', line=dict(dash='dash'),
                        visible='legendonly',
                        name=f'Polynomial Approximation ({year})'
                        )

    # Step 3: Calculate inflection points and filter out negative values and values outside the date range
    global inflection_points
    inflection_points = []

    for i, polynomial in enumerate(polynomials):
        year = bird_timely_counts['YEAR'].unique()[i]
        year_color = fig['data'][i]['line']['color']  # Get the color of the corresponding year line

        # Calculate the 2nd derivative of the polynomial
        derivative = np.polyder(polynomial, m=2)

        # Find the roots (zeros) of the 2nd derivative
        roots = np.roots(derivative)

        # Select real roots (ignore complex roots) and filter out non-relevant inflection points

        real_roots = [root.real for root in roots if np.isreal(root) and floor <= root <= ceiling]

        # Add inflection points to a list
        inflection_points.extend((inflection_point, year, year_color) for inflection_point in real_roots)

    # Plot vertical lines at inflection points
    for inflection_point, year, year_color in inflection_points:
        inflection_trace = go.Scatter(
            x=[inflection_point, inflection_point],
            y=[0, max(bird_timely_counts['Observation Count'])],
            mode='lines',
            line=dict(color=year_color, dash="dash"),
            name=f'Inflection Point ({year})',
            #visible='legendonly'  # Initially set to be hidden
        )
        fig.add_trace(inflection_trace)

    # Show the plot
    fig.update_layout(
        #xaxis=dict(title=dict(text='Year', font=dict(size=18))),  # Adjust the font size as needed
        #yaxis=dict(title=dict(text='Inflection Point', font=dict(size=18))),  # Adjust the font size as needed

        width= 1500,  # Set the width of the figure
        height=800,  # Set the height of the figure
    )
    fig.show()




In [None]:
plot_bird_data(full_data_copy, 'Sandhill Crane', time = 'WEEK', deg = 6, floor = 26 , ceiling= 43 ) #FLOOR 26 WEEKS CEILING 43 WEEKS

In [None]:
def search_name(data, col, name: str) -> pd.core.frame.DataFrame:
  '''use this tool to search for entry in df'''
  return data[data[col].str.contains(name)]

In [None]:
def inflection_scatter(name):
  def get_inflection_for_year(year, inflection_points):
      return [inflection_point for inflection_point, inflection_year, _ in inflection_points if inflection_year == year]

  # List of years
  years = bird_monthly_counts['YEAR'].unique()

  # List to store inflection points for each year
  inflection_points_by_year = [get_inflection_for_year(year, inflection_points) for year in years]

  # Create a list of dictionaries for Plotly Express
  scatter_data = []
  for i, year in enumerate(years):
      for inflection_point in inflection_points_by_year[i]:
          scatter_data.append({'Year': year, 'Inflection Point': inflection_point})

  # Create a scatter plot using Plotly Express with a trendline
  fig = px.scatter(scatter_data, x='Year', y='Inflection Point', title= f'Inflection Points by Year for {name}',
                  trendline='ols')  # Ordinary Least Squares (OLS) regression trendline

  fig.update_layout(
        xaxis=dict(title=dict(text='Year', font=dict(size=18))),  # Adjust the font size as needed
        yaxis=dict(title=dict(text='Inflection Point', font=dict(size=18))),  # Adjust the font size as needed
        #width=800,  # Set the width of the figure
        #height=600,  # Set the height of the figure
    )
  fig.show()


In [None]:
inflection_scatter('Sandhill Crane')

In [None]:
plot_bird_data(full_data_copy, 'Black-chinned Hummingbird', time = 'WEEK', deg = 6, floor = 8.95, ceiling = 19.80)

In [None]:
inflection_scatter('Black-chinned Hummingbird')

In [None]:
plot_bird_data(full_data_copy, 'Snow Goose', time = 'WEEK', deg = 6, floor = 30, ceiling = 45)

In [None]:
inflection_scatter('Snow Goose')

In [None]:
plot_bird_data(full_data_copy, 'Greater Roadrunner', time = 'WEEK', deg = 3, floor = 0, ceiling = 52)

In [None]:
inflection_scatter('Greater Roadrunner')

In [None]:
plot_bird_data(full_data_copy, 'Dark-eyed Junco', time = 'WEEK', deg = 6, floor = 30.9, ceiling = 47)

In [None]:
inflection_scatter()

In [None]:
plot_bird_data(full_data_copy, 'Mourning Dove', time = 'WEEK', deg = 6, floor = 8.8, ceiling = 20.999)

In [None]:
inflection_scatter()

In [None]:
plot_bird_data(full_data_copy, 'Broad-tailed Hummingbird' , time = 'WEEK', deg = 6, floor = 8.95, ceiling = 19.8)

In [None]:
inflection_scatter('Broad-tailed Hummingbird')

In [None]:
plot_bird_data(full_data_copy, 'Canada Goose' , time = 'WEEK', deg = 6, floor = 28.2, ceiling = 44)