### Steps for downloading GBIF data for another species for your portfolio post

#### Step 1: choose a terrestrial species that migrates (and find its scientific/Latin name)
- Western Tanager (Piranga ludoviciana)

#### Step 2: make an account on [gbif.org](<https://www.gbif.org/)>)
- raini.helmstadter@colorado.edu

#### Step 3: Install the pygbif package in your python environment

If you're on a Mac, just run this code chunk:

In [None]:
# %% bash
# pip install pygbif

If you're on a PC, open Git Bash in the Terminal at the bottom of the Jupyter notebook, and enter this: 

`pip install pygbif`

#### Step 4: load the packages you'll need

In [15]:
import pathlib
import os
import time
import zipfile

from getpass import getpass
from glob import glob

import geopandas as gpd
import pygbif.occurrences as occ
import pandas as pd
import pygbif.species as species

#### Step 5: make a folder for your GBIF data
You'll need to modify this code to match your folder structure:

In [12]:
# create data directory
wtan_dir = os.path.join(

    # Home Directory
    pathlib.Path.home(),

    # Earth Analytics Data Directory
    'Documents',
    'Graduate_School',
    'EDA_Certificate',
    'data',
    # Project Directory
    'migration-portfolio'
)

# make the directory
os.makedirs(wtan_dir, exist_ok = True)

# define directory for gbif data
gbif_dir = os.path.join(wtan_dir, 'gbif_downloads')

# make the directory
os.makedirs(gbif_dir, exist_ok = True)

gbif_dir

'C:\\Users\\raini\\Documents\\Graduate_School\\EDA_Certificate\\data\\migration-portfolio\\gbif_downloads'

#### Step 6: link GBIF to your Jupyter notebook
- Change to `reset = True` the first time you run it. When you run it, you'll be prompted to enter your GBIF username, password, and associated email address
- Then change it back to `reset = False` so you can re-run the chunk without having to reconnect to GBIF
- You don’t need to modify this code chunk in any other way


In [None]:
####--------------------------####
#### DO NOT MODIFY THIS CODE! ####
####--------------------------####
# This code ASKS for your credentials 
# and saves it for the rest of the session.
# NEVER put your credentials into your code!!!!

# GBIF needs a username, password, and email 
# All 3 need to match the account
reset = False

# Request and store username
if (not ('GBIF_USER'  in os.environ)) or reset:
    os.environ['GBIF_USER'] = input('GBIF username:')

# Securely request and store password
if (not ('GBIF_PWD'  in os.environ)) or reset:
    os.environ['GBIF_PWD'] = getpass('GBIF password:')
    
# Request and store account email address
if (not ('GBIF_EMAIL'  in os.environ)) or reset:
    os.environ['GBIF_EMAIL'] = input('GBIF email:')

#### Step 7: Get the species key for your species from GBIF using the `name_backbone` command
You'll need to modify this code with the scientific name of your species

In [6]:
### grab the species info
backbone = species.name_backbone(name = 'Piranga ludoviciana')

### check it out
backbone

{'usageKey': 2488484,
 'scientificName': 'Piranga ludoviciana (A.Wilson, 1811)',
 'canonicalName': 'Piranga ludoviciana',
 'rank': 'SPECIES',
 'status': 'ACCEPTED',
 'confidence': 99,
 'matchType': 'EXACT',
 'kingdom': 'Animalia',
 'phylum': 'Chordata',
 'order': 'Passeriformes',
 'family': 'Cardinalidae',
 'genus': 'Piranga',
 'species': 'Piranga ludoviciana',
 'kingdomKey': 1,
 'phylumKey': 44,
 'classKey': 212,
 'orderKey': 729,
 'familyKey': 9285,
 'genusKey': 2488483,
 'speciesKey': 2488484,
 'class': 'Aves'}

In [9]:

### pull out the species key
species_key = backbone['usageKey']

### check it out
species_key

2488484

#### Step 8: Download Data via the GBIF API


In [18]:
# Only download once
gbif_pattern = os.path.join(gbif_dir, '*.csv')

if not glob(gbif_pattern):
    # Only submit one request
    if not 'GBIF_DOWNLOAD_KEY' in os.environ:
        # Submit query to GBIF
        gbif_query = occ.download([
            f'speciesKey = {species_key}',
            'hasCoordinate = True',
            'year = 2024',
        ])
        # Take first result
        os.environ['GBIF_DOWNLOAD_KEY'] = gbif_query[0]

    # Wait for the download to build
    dld_key = os.environ['GBIF_DOWNLOAD_KEY']
    wait = occ.download_meta(dld_key)['status']
    while not wait=='SUCCEEDED':
        wait = occ.download_meta(dld_key)['status']
        time.sleep(5)

    # Download GBIF data
    dld_info = occ.download_get(
        os.environ['GBIF_DOWNLOAD_KEY'], 
        path = gbif_dir)
    dld_path = dld_info['path']

    # Unzip GBIF data
    with zipfile.ZipFile(dld_path) as dld_zip:
        dld_zip.extractall(path=gbif_dir)
        
    # Clean up the .zip file
    os.remove(dld_path)
    
# Find the extracted .csv file path (first result)
original_gbif_path = glob(gbif_pattern)[0]
original_gbif_path

'C:\\Users\\raini\\Documents\\Graduate_School\\EDA_Certificate\\data\\migration-portfolio\\gbif_downloads\\0049809-251009101135966.csv'

#### Step 9: Load the GBIF Data into Python

In [19]:
!head -n 2 $original_gbif_path

gbifID	datasetKey	occurrenceID	kingdom	phylum	class	order	family	genus	species	infraspecificEpithet	taxonRank	scientificName	verbatimScientificName	verbatimScientificNameAuthorship	countryCode	locality	stateProvince	occurrenceStatus	individualCount	publishingOrgKey	decimalLatitude	decimalLongitude	coordinateUncertaintyInMeters	coordinatePrecision	elevation	elevationAccuracy	depth	depthAccuracy	eventDate	day	month	year	taxonKey	speciesKey	basisOfRecord	institutionCode	collectionCode	catalogNumber	recordNumber	identifiedBy	dateIdentified	license	rightsHolder	recordedBy	typeStatus	establishmentMeans	lastInterpreted	mediaType	issue
5835936327	e635240a-3cb1-4d26-ab87-57d8c7afdfdb	b7ed2b96-3213-4ffe-87ed-88174e6612aa	Animalia	Chordata	Aves	Passeriformes	Cardinalidae	Piranga	Piranga ludoviciana		SPECIES	Piranga ludoviciana (A.Wilson, 1811)	Piranga ludoviciana		US	Gilbert-Baker Wildlife Management Area	Nebraska	PRESENT		b554c320-0560-11d8-b851-b8a03c50a862	42.770317	-103.939122	3.0						2024-0

In [25]:
gbif_df = pd.read_csv(
    original_gbif_path,
    delimiter='\t',
    index_col= 'gbifID',
    usecols= ['gbifID', 'month', 'decimalLatitude', 'decimalLongitude']
)

gbif_df.head()

Unnamed: 0_level_0,decimalLatitude,decimalLongitude,month
gbifID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
5835936327,42.770317,-103.939122,6
5835936343,42.765938,-103.944211,6
5835936316,42.765938,-103.944211,6
5196101425,40.043866,-105.282882,7
5196101437,40.268283,-105.353865,9
