# Rose-Breasted Grosbeak Population Challenges

# <u>Background</u>
## The Rose-Breasted Grosbeak, <i>Pheucticus ludovicianus</i>, is a 
## priority species in North Carolina according to the Audobon Society.
## While there is evidence that our changing climate has an impact, 
## due to changes in rainfall and temperatures, habitat loss is also an
## issue which can be address to prevent an endangered listing for this
## species.

# <u>Methods</u>
## Data to illustrate the habitats used in various ecoregions during 
## migration was obtained from the Global Biodiversity Information 
## Facility and included 21 datasets from 20 publishers in 6 countries.
### The downloaded species data was normalized by according to monthly 
### occurence by land area and spatially combined with data from the 
### Global Land Cover by National Mapping Organizations which 
### classifies land cover based on NASA MODIS data of the whole globe 
### into 20 categories or ecoregions.

<embed type="text/html",
 src="rbg_migration.html" width="600" height="600">


## Migration of the Rose-Breasted Grosbeak through Forested Habitats

## Information Sources
* GBIF.org (30 October 2024) Download https://doi.org/10.15468/dl.c2qewf
* Priority Bird Profile: Rose-breasted Grosbeak
https://nc.audubon.org/news/priority-bird-profile-rose-breasted-grosbeak
* Global change and the distributional dynamics of migratory bird
 populations wintering in Central America
https://doi.org/10.1111/gcb.13794


In [1]:
# Import packages to work with tabular & geospatial data
import os
import pathlib

import geopandas as gpd
import pandas as pd

In [2]:
# Import packages to extract work from tabular data
import time
import zipfile
from getpass import getpass
from glob import glob

import pygbif.occurrences as occ
import pandas as pd
import pygbif.species as species

In [3]:
# Get month names
import calendar
import warnings

# Libraries for Dynamic mapping
import cartopy.crs as ccrs
import hvplot.pandas
import panel as pn

warnings.filterwarnings('ignore', category=FutureWarning)


In [4]:
# Create data directory in the home folder
data_dir = os.path.join(
    # Home directory
    pathlib.Path.home(),
    # Earth analytics data directory
    'earth-analytics',
    'data',
    # Project directory
    'species-project',
)
os.makedirs(data_dir, exist_ok=True)
data_dir


'/home/jovyan/earth-analytics/data/species-project'

In [5]:
# Define study area from downloaded googleapis.com/teow2016
# Set up a path to save the data on your machine
ecoregions_dir = os.path.join(data_dir, 'ecoregions')
# Make ecoregions directory
os.makedirs(ecoregions_dir, exist_ok=True)

# Join ecoregion shapefile path
ecoregions_path = os.path.join(ecoregions_dir, 'ecoregions.shp')

# Only download once
if not os.path.exists(ecoregions_path):
    my_gdf = gpd.read_file(ecoregions_url)
    my_gdf.to_file(ecoregions_path)


In [18]:
# Open up the ecoregions boundaries 1st import
region_gdf = (
    gpd.read_file(ecoregions_path)
    [['OBJECTID', 'ECO_NAME', 'SHAPE_AREA', 'geometry']]
    .rename(columns={
        'OBJECTID': 'ecoregion_id',
        'ECO_NAME': 'name',
        'SHAPE_AREA':'area',
    })
    .set_index('ecoregion_id')
    
)
region_gdf.hvplot(geo=True, crs=ccrs.Mercator())

KeyboardInterrupt: 

In [7]:
# Verify the directory to store gbif data
os.makedirs(data_dir, exist_ok=True)
# Define the directory name for GBIF data
gbif_dir = os.path.join(data_dir, 'gbif')

In [8]:
# Use GitHub to authenticate GBIF account
reset_credentials = True
# GBIF needs a username, password, and email
credentials = dict(
    GBIF_USER=(input, 'GBIF username:'),
    GBIF_PWD=(getpass, 'GBIF password:'),
    GBIF_EMAIL=(input, 'GBIF email:'),
)
for env_variable, (prompt_func, prompt_text) in credentials.items():
    # Delete credential from environment if requested
    if reset_credentials and (env_variable in os.environ):
        os.environ.pop(env_variable)
    # Ask for credential and save to environment
    if not env_variable in os.environ:
        os.environ[env_variable] = prompt_func(prompt_text)

In [9]:
# Query species
species_info = species.name_lookup('Pheucticus ludovicianus', 
                                   rank='SPECIES')

# Get the first result
first_result = species_info['results'][0]

# Get the species key (nubKey)
species_key = first_result['nubKey']

# Check the result
first_result['species'], species_key

('Pheucticus ludovicianus', 2490494)

In [10]:
# Only download once
gbif_pattern = os.path.join(gbif_dir, '*.csv')
if not glob(gbif_pattern):
    # Only submit one request
    if not 'GBIF_DOWNLOAD_KEY' in os.environ:
        # Submit query to GBIF
        gbif_query = occ.download([
            f"speciesKey = { species_key }",
            "hasCoordinate = TRUE",
            "year = 2023",
        ])
        os.environ['GBIF_DOWNLOAD_KEY'] = gbif_query[0]

    # Wait for the download to build
    download_key = os.environ['GBIF_DOWNLOAD_KEY']
    wait = occ.download_meta(download_key)['status']
    while not wait=='SUCCEEDED':
        wait = occ.download_meta(download_key)['status']
        time.sleep(5)

    # Download GBIF data
    download_info = occ.download_get(
        os.environ['GBIF_DOWNLOAD_KEY'], 
        path=data_dir)

    # Unzip GBIF data
    with zipfile.ZipFile(download_info['path']) as download_zip:
        download_zip.extractall(path=gbif_dir)

# Find the extracted .csv file path (take the first result)
gbif_path = glob(gbif_pattern)[0]
gbif_path

'/home/jovyan/earth-analytics/data/species-project/gbif/0012167-241024112534372.csv'

In [11]:
# Load the GBIF data
gbif_df = pd.read_csv(
    gbif_path, 
    delimiter='\t',
    index_col='gbifID',
    usecols=['gbifID', 'month', 'decimalLatitude', 'decimalLongitude',]
)
gbif_df.head()

Unnamed: 0_level_0,decimalLatitude,decimalLongitude,month
gbifID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4746617534,45.369354,-86.911354,5
4829159730,43.960793,-79.54668,5
4838481129,45.011086,-78.283424,6
4775395086,39.77152,-82.297165,7
4684959135,42.230007,-90.72049,9


In [12]:
# Convert species dataframe into GeoDataFrame
gbif_gdf = (
    gpd.GeoDataFrame(
        gbif_df, 
        geometry=gpd.points_from_xy(
            gbif_df.decimalLongitude, 
            gbif_df.decimalLatitude), 
        crs="EPSG:4326")
    # Select the desired columns
    [['month', 'geometry']]
)
gbif_gdf

Unnamed: 0_level_0,month,geometry
gbifID,Unnamed: 1_level_1,Unnamed: 2_level_1
4746617534,5,POINT (-86.91135 45.36935)
4829159730,5,POINT (-79.54668 43.96079)
4838481129,6,POINT (-78.28342 45.01109)
4775395086,7,POINT (-82.29716 39.77152)
4684959135,9,POINT (-90.72049 42.23001)
...,...,...
4704614748,8,POINT (-93.76713 45.2357)
4678412469,5,POINT (-82.34081 33.61933)
4664684075,5,POINT (-82.57573 43.0365)
4674846650,5,POINT (-87.0921 41.6407)


In [13]:
region_gdf.head()

Unnamed: 0_level_0,name,area,geometry
ecoregion_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1.0,Adelie Land tundra,0.038948,"MULTIPOLYGON (((158.7141 -69.60657, 158.71264 ..."
2.0,Admiralty Islands lowland rain forests,0.170599,"MULTIPOLYGON (((147.28819 -2.57589, 147.2715 -..."
3.0,Aegean and Western Turkey sclerophyllous and m...,13.844952,"MULTIPOLYGON (((26.88659 35.32161, 26.88297 35..."
4.0,Afghan Mountains semi-desert,1.355536,"MULTIPOLYGON (((65.48655 34.71401, 65.52872 34..."
5.0,Ahklun and Kilbuck Upland Tundra,8.196573,"MULTIPOLYGON (((-160.26404 58.64097, -160.2673..."


In [14]:
gbif_gdf.head()

Unnamed: 0_level_0,month,geometry
gbifID,Unnamed: 1_level_1,Unnamed: 2_level_1
4746617534,5,POINT (-86.91135 45.36935)
4829159730,5,POINT (-79.54668 43.96079)
4838481129,6,POINT (-78.28342 45.01109)
4775395086,7,POINT (-82.29716 39.77152)
4684959135,9,POINT (-90.72049 42.23001)


In [15]:
occurrence_df = (
    region_gdf
    # Reset index
    .reset_index()
    # For each ecoregion
    .groupby(['ecoregion_id', 'area'])
    # ...count the number of occurrences
    .agg(
        occurrences=('ecoregion_id', 'count'),
         area=('area', 'first'))
)

# Normalize by area
occurrence_df['density'] = (
    occurrence_df.occurrences
    / occurrence_df.area
)

# Get rid of rare observations (possible misidentification?)
occurrence_df = occurrence_df[occurrence_df.occurrences>1]

In [16]:
occurrence_df

Unnamed: 0_level_0,Unnamed: 1_level_0,occurrences,area,density
ecoregion_id,area,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


In [17]:
# Simplify ecoregions to create a smaller file and change Coordinate 
# Reference System to Mercator
# original_geometry = region_gdf.geometry.copy()
# simplified_gdf = region_gdf.copy()
# region_gdf.geometry = region_gdf.simplify(.1,
  #  preserve_topology=False)

# Change the CRS to Mercator for mapping
simplified_gdf = simplified_gdf.to_crs(ccrs.Mercator ())


# Check that the plot runs in a reasonable amount of time
# region_gdf.hvplot(geo=True, crs=ccrs.Mercator())


NameError: name 'simplified_gdf' is not defined

In [None]:
# Join the occurrences with the plotting GeoDataFrame
occurrence_gdf = region_gdf.join(
    occurrence_df[['norm_occurrences']])

# Get the plot bounds so they don't change with the slider
xmin, ymin, xmax, ymax = gbif_gdf.to_crs(ccrs.Mercator()).total_bounds

# Define the slider widget
slider = pn.widgets.DiscreteSlider(name='month', 
    options={calendar.month_name[i]: i for i in range(1, 13)}
)

# Plot occurrence by ecoregion and month
rbg_migration_plot = (
    occurrence_gdf
    .hvplot(
        c='norm_occurrences',
        groupby='month',
        # Use background tiles
        geo=True, crs=ccrs.Mercator(), tiles='CartoLight',
        title="Rose-breasted Grosbeak Migration",
        xlim=(xmin, xmax), ylim=(ymin, ymax),
        frame_height=600,
        widget_location='bottom'
    )
)

# Save the plot
rbg_migration_plot.save('rbg_migration.html', embed=True)

# Show the plot
rbg_migration_plot

KeyError: "None of [Index(['norm_occurrences'], dtype='object')] are in the [columns]"