# Examining Migration of the Pacific Loon (Gavia pacifica) in 2023 using data from the Global Biodiversity Information Facility (GBIF)

Some narrative here. A couple cites.

In [1]:
%store -r

import os
import pathlib
import time
import calendar 
import zipfile
from getpass import getpass
from glob import glob

import geopandas as gpd
import pandas as pd
import pygbif.occurrences as occ
import pygbif.species as species

#dynamic mapping
import hvplot.pandas
import cartopy.crs as ccrs
import panel as pn

In [2]:
#Get the ecoregions shapefile
ecoreg_shp_dir = os.path.join(
    # Home directory
    pathlib.Path.home(),
    # Earth analytics data directory
    'earth-analytics',
    'data',
    # Project directory
    'species_dist_coding_assign',
    'ecoregions_dirname'
)
os.makedirs(ecoreg_shp_dir, exist_ok=True)
ecoregion_shppath = os.path.join(ecoreg_shp_dir, 'ecoregions_filename.shp')
# Open up the ecoregions boundaries
ecoreg_gdf = gpd.read_file(ecoregion_shppath)

# Name the index so it will match the other data later on
ecoreg_gdf.index.name = 'ecoregion'
#ecoreg_gdf.crs

In [3]:
ecoreg_gdf

Unnamed: 0_level_0,OBJECTID,ECO_NAME,BIOME_NUM,BIOME_NAME,REALM,ECO_BIOME_,NNH,ECO_ID,SHAPE_LENG,SHAPE_AREA,NNH_NAME,COLOR,COLOR_BIO,COLOR_NNH,LICENSE,geometry
ecoregion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,1.0,Adelie Land tundra,11.0,Tundra,Antarctica,AN11,1,117,9.749780,0.038948,Half Protected,#63CFAB,#9ED7C2,#257339,CC-BY 4.0,"MULTIPOLYGON (((158.7141 -69.60657, 158.71264 ..."
1,2.0,Admiralty Islands lowland rain forests,1.0,Tropical & Subtropical Moist Broadleaf Forests,Australasia,AU01,2,135,4.800349,0.170599,Nature Could Reach Half Protected,#70A800,#38A700,#7BC141,CC-BY 4.0,"MULTIPOLYGON (((147.28819 -2.57589, 147.2715 -..."
2,3.0,Aegean and Western Turkey sclerophyllous and m...,12.0,"Mediterranean Forests, Woodlands & Scrub",Palearctic,PA12,4,785,162.523044,13.844952,Nature Imperiled,#FF7F7C,#FE0000,#EE1E23,CC-BY 4.0,"MULTIPOLYGON (((26.88659 35.32161, 26.88297 35..."
3,4.0,Afghan Mountains semi-desert,13.0,Deserts & Xeric Shrublands,Palearctic,PA13,4,807,15.084037,1.355536,Nature Imperiled,#FA774D,#CC6767,#EE1E23,CC-BY 4.0,"MULTIPOLYGON (((65.48655 34.71401, 65.52872 34..."
4,5.0,Ahklun and Kilbuck Upland Tundra,11.0,Tundra,Nearctic,NE11,1,404,22.590087,8.196573,Half Protected,#4C82B6,#9ED7C2,#257339,CC-BY 4.0,"MULTIPOLYGON (((-160.26404 58.64097, -160.2673..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
842,848.0,Sulawesi lowland rain forests,1.0,Tropical & Subtropical Moist Broadleaf Forests,Australasia,AU01,2,156,150.744361,9.422097,Nature Could Reach Half Protected,#70A800,#38A700,#7BC141,CC-BY 4.0,"MULTIPOLYGON (((117.33111 -7.53306, 117.30525 ..."
843,212.0,East African montane forests,1.0,Tropical & Subtropical Moist Broadleaf Forests,Afrotropic,AF01,3,8,157.848926,5.010930,Nature Could Recover,#13ED00,#38A700,#F9A91B,CC-BY 4.0,"MULTIPOLYGON (((36.7375 -3.13, 36.7375 -3.1316..."
844,224.0,Eastern Arc forests,1.0,Tropical & Subtropical Moist Broadleaf Forests,Afrotropic,AF01,3,9,34.153333,0.890325,Nature Could Recover,#267400,#38A700,#F9A91B,CC-BY 4.0,"MULTIPOLYGON (((36.38 -8.96583, 36.38 -8.96667..."
845,79.0,Borneo montane rain forests,1.0,Tropical & Subtropical Moist Broadleaf Forests,Indomalayan,IN01,2,220,38.280990,9.358407,Nature Could Reach Half Protected,#23DB01,#38A700,#7BC141,CC-BY 4.0,"MULTIPOLYGON (((112.82394 -0.5066, 112.82298 -..."


In [4]:
# Simplify the geometry to speed up processing
ecoreg_gdf.geometry = ecoreg_gdf.simplify(.1, preserve_topology=False)
# Change the CRS to Mercator for mapping
ecoreg_gdf = ecoreg_gdf.to_crs(ccrs.Mercator())
# Check that the plot runs in a reasonable amount of time
#ecoreg_gdf.hvplot(geo=True, crs=ccrs.Mercator())

In [5]:
#convert the pacific loon occurrence data to a geodataframe
gaviapac_gbif_gdf = (
    gpd.GeoDataFrame(
        gaviapac_gbif_df, 
        geometry=gpd.points_from_xy(
            gaviapac_gbif_df.decimalLongitude, 
            gaviapac_gbif_df.decimalLatitude), 
        crs="EPSG:4326")
    # Select the desired columns
    #[['gbifID', 'decimalLatitude', 'decimalLongitude', 'month']]
)
gaviapac_gbif_gdf = gaviapac_gbif_gdf.to_crs(ccrs.Mercator())
gaviapac_gbif_gdf

Unnamed: 0_level_0,occurrenceID,species,scientificName,countryCode,occurrenceStatus,individualCount,decimalLatitude,decimalLongitude,month,year,speciesKey,basisOfRecord,geometry
gbifID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
4953151418,https://www.inaturalist.org/observations/17616...,Gavia pacifica,"Gavia pacifica (Lawrence, 1858)",CA,PRESENT,,58.765791,-94.122485,8,2023,2481955,HUMAN_OBSERVATION,POINT (-10477667.102 8093368.02)
4950273871,https://www.inaturalist.org/observations/24385...,Gavia pacifica,"Gavia pacifica (Lawrence, 1858)",US,PRESENT,,32.758759,-117.245769,11,2023,2481955,HUMAN_OBSERVATION,POINT (-13051739.303 3840207.905)
4946632056,https://www.inaturalist.org/observations/23034...,Gavia pacifica,"Gavia pacifica (Lawrence, 1858)",US,PRESENT,,36.909267,-122.026831,11,2023,2481955,HUMAN_OBSERVATION,POINT (-13583964.69 4400804.577)
4937187752,https://www.inaturalist.org/observations/14980...,Gavia pacifica,"Gavia pacifica (Lawrence, 1858)",US,PRESENT,,49.000610,-123.167541,2,2023,2481955,HUMAN_OBSERVATION,POINT (-13710947.946 6242699.205)
4936192321,https://www.inaturalist.org/observations/19074...,Gavia pacifica,"Gavia pacifica (Lawrence, 1858)",US,PRESENT,,34.725271,-118.167093,11,2023,2481955,HUMAN_OBSERVATION,POINT (-13154300.621 4102268.936)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4018248179,https://www.inaturalist.org/observations/14657...,Gavia pacifica,"Gavia pacifica (Lawrence, 1858)",CA,PRESENT,,48.315404,-123.650051,1,2023,2481955,HUMAN_OBSERVATION,POINT (-13764660.714 6127561.023)
4018104911,https://www.inaturalist.org/observations/14636...,Gavia pacifica,"Gavia pacifica (Lawrence, 1858)",US,PRESENT,,36.959997,-122.018938,1,2023,2481955,HUMAN_OBSERVATION,POINT (-13583086.045 4407839.324)
4015258054,https://www.inaturalist.org/observations/14594...,Gavia pacifica,"Gavia pacifica (Lawrence, 1858)",US,PRESENT,,57.082025,-135.379996,1,2023,2481955,HUMAN_OBSERVATION,POINT (-15070432.218 7741003.326)
4011669284,https://www.inaturalist.org/observations/14571...,Gavia pacifica,"Gavia pacifica (Lawrence, 1858)",US,PRESENT,,39.517589,-83.990892,1,2023,2481955,HUMAN_OBSERVATION,POINT (-9349823.329 4768891.522)


In [6]:
gaviapac_gbif_gdf.crs

In [7]:

gaviapac_ecoregion_gdf = (
    ecoreg_gdf
    # Match the CRS of the GBIF data and the ecoregions
    .to_crs(gaviapac_gbif_gdf.crs)
    # Find ecoregion for each observation
    .sjoin(
        gaviapac_gbif_gdf,
        how='inner', 
        predicate='contains')
    # Select the required columns
    [['OBJECTID', 'gbifID', 'ECO_NAME','BIOME_NUM','BIOME_NAME', 'month', 'SHAPE_AREA']]
)
#gaviapac_ecoregion_gdf

In [8]:
# Aggregate the occurrences to ecoregion and month
gaviapac_occ_df = (
    gbif_ecoregion_gdf
    #.reset index()
    # For each ecoregion, for each month...
    .groupby(['ecoregion', 'month'])
    # ...count the number of occurrences
    .agg(occurrences=('gbifID', 'count'),
         area=('SHAPE_AREA', 'first'))
)
# Get rid of rare observations (possible misidentification?)
gaviapac_occ_df = occurrence_df[occurrence_df.occurrences > 1]

#Normalize by area
gaviapac_occ_df['density'] = (
    gaviapac_occ_df.occurrences / gaviapac_occ_df.area
)

#gaviapac_occ_df

In [9]:
#check the monthly data values across all ecoregions
# gaviapac_occ_df.groupby('month').mean()

In [10]:
#check the data given plotting issues
# #ecoregion_mean = gaviapac_occ_df.groupby('ecoregion').mean()
#ecoregion_mean
#ecoregion_mean.to_csv('ecoregion_means.csv')
#gaviapac_occ_df.to_csv('gaviapacifica_occur_df.csv')

In [11]:
#gaviapac_occ_df.crs
gaviapac_ecoreg_gdf = ecoreg_gdf.join(gaviapac_occ_df)

In [12]:
# setup slider widget to be labeled as the month name
mon_widget = pn.widgets.DiscreteSlider(
            options={calendar.month_name[month_num]: month_num 
                     for month_num in range(1,13) }
        )
#mon_widget

In [13]:
# Get the plot bounds so they don't change with the slider
xmin, ymin, xmax, ymax = gaviapac_ecoreg_gdf.to_crs(ccrs.Mercator()).total_bounds

# Plot occurrence by ecoregion and month
gaviapac_migration_plot = (
    gaviapac_ecoreg_gdf
    .hvplot(
        c='norm_occurrences',
        groupby='month',
        # Use background tiles
        geo=True, crs=ccrs.Mercator(), tiles='CartoLight',
        title="Pacific Loon (Gavia pacifica) Migration Across Ecoregions in 2023",
        xlim=(xmin, xmax), ylim=(ymin, ymax),
        frame_height=600,
        widgets = {'month': mon_widget},
        widget_location='bottom'
    )
)

# Save the plot
gaviapac_migration_plot.save('gaviapac_migration_plot_migration.html', 
                                            embed=True)

# Show the plot
gaviapac_migration_plot

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



                                               





BokehModel(combine_events=True, render_bundle={'docs_json': {'11615e4d-000b-4e26-9a14-cbfecfef5155': {'version…

2024-11-07 20:36:49,345 ERROR: panel.reactive - Callback failed for object named '' changing property {'value': 10} 
Traceback (most recent call last):
  File "/opt/conda/lib/python3.11/site-packages/holoviews/plotting/bokeh/element.py", line 2222, in _update_glyphs
    self._update_glyph(renderer, properties, mapping, glyph, source, data)
  File "/opt/conda/lib/python3.11/site-packages/holoviews/plotting/bokeh/element.py", line 1992, in _update_glyph
    glyph.update(**update)
  File "/opt/conda/lib/python3.11/site-packages/bokeh/core/has_props.py", line 483, in update
    setattr(self, k, v)
  File "/opt/conda/lib/python3.11/site-packages/bokeh/core/has_props.py", line 336, in __setattr__
    return super().__setattr__(name, value)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/conda/lib/python3.11/site-packages/bokeh/core/property/descriptors.py", line 330, in __set__
    value = self.property.prepare_value(obj, self.name, value)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

AbbreviatedException: ValueError: failed to validate MultiPolygons(id='p1110', ...).fill_color: expected an element of either String, Nullable(Color), Instance(Value), Instance(Field), Instance(Expr), Struct(value=Nullable(Color), transform=Instance(Transform)), Struct(field=String, transform=Instance(Transform)) or Struct(expr=Instance(Expression), transform=Instance(Transform)), got dim('norm_occurrences')

To view the original traceback, catch this exception and call print_traceback() method.

In [14]:
gaviapac_ecoreg_gdf.to_csv('gavia_pacifica_ecoreg_gdf.csv')