# Examining Migration of the Pacific Loon (Gavia pacifica) in 2023 using data 
# from the Global Biodiversity Information Forum (GBIF)

Some narrative here. A couple cites.

In [4]:
%store -r

import os
import pathlib
import time
import zipfile
from getpass import getpass
from glob import glob

import geopandas as gpd
import hvplot.pandas
import pandas as pd
import pygbif.occurrences as occ
import pygbif.species as species

In [6]:
#Get the ecoregions shapefile
ecoreg_shp_dir = os.path.join(
    # Home directory
    pathlib.Path.home(),
    # Earth analytics data directory
    'earth-analytics',
    'data',
    # Project directory
    'species_dist_coding_assign',
    'ecoregions_dirname'
)
os.makedirs(ecoreg_shp_dir, exist_ok=True)
ecoregion_shppath = os.path.join(ecoreg_shp_dir, 'ecoregions_filename.shp')
# Open up the ecoregions boundaries
ecoreg_gdf = gpd.read_file(ecoregion_shppath)

# Name the index so it will match the other data later on
ecoreg_gdf.index.name = 'ecoregion'

In [8]:
#ecoreg_gdf.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [7]:
#convert the pacific loon occurrence data to a geodataframe
gaviapac_gbif_gdf = (
    gpd.GeoDataFrame(
        gaviapac_gbif_df, 
        geometry=gpd.points_from_xy(
            gaviapac_gbif_df.decimalLongitude, 
            gaviapac_gbif_df.decimalLatitude), 
        crs="EPSG:4326")
    # Select the desired columns
    #[['gbifID', 'decimalLatitude', 'decimalLongitude', 'month']]
)
#gaviapac_gbif_gdf


Unnamed: 0_level_0,occurrenceID,species,scientificName,countryCode,occurrenceStatus,individualCount,decimalLatitude,decimalLongitude,month,year,speciesKey,basisOfRecord,geometry
gbifID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
4953151418,https://www.inaturalist.org/observations/17616...,Gavia pacifica,"Gavia pacifica (Lawrence, 1858)",CA,PRESENT,,58.765791,-94.122485,8,2023,2481955,HUMAN_OBSERVATION,POINT (-94.12248 58.76579)
4950273871,https://www.inaturalist.org/observations/24385...,Gavia pacifica,"Gavia pacifica (Lawrence, 1858)",US,PRESENT,,32.758759,-117.245769,11,2023,2481955,HUMAN_OBSERVATION,POINT (-117.24577 32.75876)
4946632056,https://www.inaturalist.org/observations/23034...,Gavia pacifica,"Gavia pacifica (Lawrence, 1858)",US,PRESENT,,36.909267,-122.026831,11,2023,2481955,HUMAN_OBSERVATION,POINT (-122.02683 36.90927)
4937187752,https://www.inaturalist.org/observations/14980...,Gavia pacifica,"Gavia pacifica (Lawrence, 1858)",US,PRESENT,,49.000610,-123.167541,2,2023,2481955,HUMAN_OBSERVATION,POINT (-123.16754 49.00061)
4936192321,https://www.inaturalist.org/observations/19074...,Gavia pacifica,"Gavia pacifica (Lawrence, 1858)",US,PRESENT,,34.725271,-118.167093,11,2023,2481955,HUMAN_OBSERVATION,POINT (-118.16709 34.72527)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4018248179,https://www.inaturalist.org/observations/14657...,Gavia pacifica,"Gavia pacifica (Lawrence, 1858)",CA,PRESENT,,48.315404,-123.650051,1,2023,2481955,HUMAN_OBSERVATION,POINT (-123.65005 48.3154)
4018104911,https://www.inaturalist.org/observations/14636...,Gavia pacifica,"Gavia pacifica (Lawrence, 1858)",US,PRESENT,,36.959997,-122.018938,1,2023,2481955,HUMAN_OBSERVATION,POINT (-122.01894 36.96)
4015258054,https://www.inaturalist.org/observations/14594...,Gavia pacifica,"Gavia pacifica (Lawrence, 1858)",US,PRESENT,,57.082025,-135.379996,1,2023,2481955,HUMAN_OBSERVATION,POINT (-135.38 57.08202)
4011669284,https://www.inaturalist.org/observations/14571...,Gavia pacifica,"Gavia pacifica (Lawrence, 1858)",US,PRESENT,,39.517589,-83.990892,1,2023,2481955,HUMAN_OBSERVATION,POINT (-83.99089 39.51759)


In [9]:
#normalize the occurrences by density, ecoregion, and month
gaviapac_ecoregion_gdf = (
    ecoreg_gdf
    # Match the CRS of the GBIF data and the ecoregions
    .to_crs(gaviapac_gbif_gdf.crs)
    # Find ecoregion for each observation
    .sjoin(
        gaviapac_gbif_gdf,
        how='inner', 
        predicate='contains')
    # Select the required columns
    [['OBJECTID', 'gbifID', 'ECO_NAME','BIOME_NUM','BIOME_NAME', 'month', 'SHAPE_AREA']]
)
#gaviapac_ecoregion_gdf

Unnamed: 0_level_0,OBJECTID,gbifID,ECO_NAME,BIOME_NUM,BIOME_NAME,month,SHAPE_AREA
ecoregion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
9,10.0,4818305950,Alaska-St. Elias Range tundra,11.0,Tundra,6,28.388010
9,10.0,4703567487,Alaska-St. Elias Range tundra,11.0,Tundra,9,28.388010
9,10.0,4655774065,Alaska-St. Elias Range tundra,11.0,Tundra,6,28.388010
9,10.0,4818376050,Alaska-St. Elias Range tundra,11.0,Tundra,5,28.388010
9,10.0,4679977514,Alaska-St. Elias Range tundra,11.0,Tundra,6,28.388010
...,...,...,...,...,...,...,...
833,839.0,4724514514,Northern Rockies conifer forests,5.0,Temperate Conifer Forests,9,35.905513
833,839.0,4678994611,Northern Rockies conifer forests,5.0,Temperate Conifer Forests,5,35.905513
833,839.0,4734672195,Northern Rockies conifer forests,5.0,Temperate Conifer Forests,11,35.905513
833,839.0,4740547144,Northern Rockies conifer forests,5.0,Temperate Conifer Forests,11,35.905513


In [10]:
gaviapac_occ_df = (
    gbif_ecoregion_gdf
    #reset index
    .reset_index()
    # For each ecoregion, for each month...
    .groupby(['ecoregion', 'month'])
    # ...count the number of occurrences
    .agg(occurrences=('gbifID', 'count'),
         area=('SHAPE_AREA', 'first'))
)
#Normalize by area
occurrence_df['density'] = (
    occurrence_df.occurrences / occurrence_df.area
)
# Get rid of rare observations (possible misidentification?)
occurrence_df = occurrence_df[occurrence_df.occurrences > 1]