In [1]:
%%bash
pip install pygbif



In [2]:
import os
import pathlib
import time
import zipfile

from getpass import getpass
from glob import glob

import geopandas as gpd
import pandas as pd
import pygbif.occurrences as occ
import pygbif.species as species

In [3]:
# Create data directory in the home folder
data_dir_bobcat = os.path.join(
    # Home directory
    pathlib.Path.home(),
    # Earth analytics data directory
    'earth-analytics',
    'data',
    # Project directory
    'species_distribution_bobcat',
)
os.makedirs(data_dir_bobcat, exist_ok=True)

# Define the directory name for GBIF data
gbif_dir_bobcat = os.path.join(data_dir_bobcat, 'gbif_bobcat')

In [4]:
data_dir_bobcat

'/home/codespace/earth-analytics/data/species_distribution_bobcat'

In [5]:
reset_credentials = False
# GBIF needs a username, password, and email
credentials = dict(
    GBIF_USER=(input, 'GBIF username:'),
    GBIF_PWD=(getpass, 'GBIF password'),
    GBIF_EMAIL=(input, 'GBIF email')
)
for env_variable, (prompt_func, prompt_text) in credentials.items():
    # Delete credential from environment if requested
    if reset_credentials and (env_variable in os.environ):
        os.environ.pop(env_variable)
    # Ask for credential and save to environment
    if not env_variable in os.environ:
        os.environ[env_variable] = prompt_func(prompt_text)

In [7]:
os.environ ['GBIF_USER']

'brglea'

In [8]:
!echo $GBIF_USER

brglea


In [9]:
'GBIF_PWD' in os.environ

True

In [10]:
# Query species
species_info = species.name_lookup('lynx rufus', rank='SPECIES')

# Get the first result
first_result = species_info['results'][0]

# Get the species key (nubKey)
species_key = first_result['nubKey']

# Check the result
first_result['species'], species_key

('Lynx rufus', 2435246)

In [11]:
# Only download once
gbif_pattern = os.path.join(gbif_dir_bobcat, '*.csv')
if not glob(gbif_pattern):
    # Only submit one request
    if not 'GBIF_DOWNLOAD_KEY' in os.environ:
        # Submit query to GBIF
        gbif_query = occ.download([
            "speciesKey = 2435246",
            "hasCoordinate = True",
            "year = 2023",
        ])
        os.environ['GBIF_DOWNLOAD_KEY'] = gbif_query[0]

    # Wait for the download to build
    download_key = os.environ['GBIF_DOWNLOAD_KEY']
    wait = occ.download_meta(download_key)['status']
    while not wait=='SUCCEEDED':
        wait = occ.download_meta(download_key)['status']
        time.sleep(5)

    # Download GBIF data
    download_info = occ.download_get(
        os.environ['GBIF_DOWNLOAD_KEY'], 
        path=data_dir_bobcat)

    # Unzip GBIF data
    with zipfile.ZipFile(download_info['path']) as download_zip:
        download_zip.extractall(path=gbif_dir_bobcat)

# Find the extracted .csv file path (take the first result)
gbif_path = glob(gbif_pattern)[0]

INFO:Your download key is 0014118-241007104925546
INFO:Download file size: 247369 bytes
INFO:On disk at /home/codespace/earth-analytics/data/species_distribution_bobcat/0014118-241007104925546.zip


In [12]:
gbif_path

'/home/codespace/earth-analytics/data/species_distribution_bobcat/gbif_bobcat/0014118-241007104925546.csv'

In [13]:
gbif_query

('0014118-241007104925546',
 {'creator': 'brglea',
  'notification_address': ['briglea@gmail.com'],
  'sendNotification': True,
  'predicate': {'type': 'and',
   'predicates': [{'type': 'equals', 'key': 'SPECIES_KEY', 'value': '2435246'},
    {'type': 'equals', 'key': 'HAS_COORDINATE', 'value': 'True'},
    {'type': 'equals', 'key': 'YEAR', 'value': '2023'}]},
  'format': 'SIMPLE_CSV'})

In [14]:
!head -n 2 $gbif_path 

gbifID	datasetKey	occurrenceID	kingdom	phylum	class	order	family	genus	species	infraspecificEpithet	taxonRank	scientificName	verbatimScientificName	verbatimScientificNameAuthorship	countryCode	locality	stateProvince	occurrenceStatus	individualCount	publishingOrgKey	decimalLatitude	decimalLongitude	coordinateUncertaintyInMeters	coordinatePrecision	elevation	elevationAccuracy	depth	depthAccuracy	eventDate	day	month	year	taxonKey	speciesKey	basisOfRecord	institutionCode	collectionCode	catalogNumber	recordNumber	identifiedBy	dateIdentified	license	rightsHolder	recordedBy	typeStatus	establishmentMeans	lastInterpreted	mediaType	issue
4953158569	50c9509d-22c7-4a22-a47d-8c48425ef4a7	https://www.inaturalist.org/observations/151699352	Animalia	Chordata	Mammalia	Carnivora	Felidae	Lynx	Lynx rufus		SPECIES	Lynx rufus (Schreber, 1777)	Lynx rufus		US		California	PRESENT		28eb1a3f-1c15-4a95-931a-4af90ecb574d	34.205588	-118.36292	28846.0						2023-03-16T03:21	16	3	2023	2435246	2435246	HUMAN_OBSERVATION

In [15]:
# Load the GBIF data
gbif_df = pd.read_csv(
    gbif_path, 
    delimiter='\t',
    index_col='gbifID',
    #usecols=[]
)
gbif_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3657 entries, 4953158569 to 4011547228
Data columns (total 49 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   datasetKey                        3657 non-null   object 
 1   occurrenceID                      3657 non-null   object 
 2   kingdom                           3657 non-null   object 
 3   phylum                            3657 non-null   object 
 4   class                             3657 non-null   object 
 5   order                             3657 non-null   object 
 6   family                            3657 non-null   object 
 7   genus                             3657 non-null   object 
 8   species                           3657 non-null   object 
 9   infraspecificEpithet              145 non-null    object 
 10  taxonRank                         3657 non-null   object 
 11  scientificName                    3657 non-null   object 
 

In [17]:
# Load the GBIF data
gbif_df = pd.read_csv(
    gbif_path, 
    delimiter='\t',
    index_col='gbifID',
    usecols=['gbifID', 'month', 'decimalLatitude', 'decimalLongitude']
)
gbif_df.head()

Unnamed: 0_level_0,decimalLatitude,decimalLongitude,month
gbifID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4953158569,34.205588,-118.36292,3
4953055247,36.537634,-121.890603,10
4953008628,44.12236,-119.848505,11
4952902566,34.270494,-118.320036,9
4952869276,41.546645,-72.60872,1
