# Get Occurrence Data

Fetches occurrence data for species that require it using the GBIF Python API.

In [2]:
# EXTENSIONS
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
# IMPORTS
import os
import numpy as np
import pandas as pd
from glob import glob
from tqdm import tqdm
import ___creds_gbif as creds
from pygbif import occurrences as occ

In [4]:
# SETTINGS
pd.set_option('display.max_columns', None)

In [None]:
# GET DOWNLOAD KEYS FOR SERVER SIDE REQUESTS PER CHUNK
gbif_keys = ["5824863"]
download_keys = []
query = {
    "type": "and",
    "predicates": [
        {"type": "in", "key": "TAXON_KEY", "values": gbif_keys},
        {"type": "equals", "key": "HAS_COORDINATE", "value": "true"},
        {"type": "equals", "key": "HAS_GEOSPATIAL_ISSUE", "value": "false"},
    ]
}
dk = occ.download(queries=query,
                    user=creds.GBIF_USER,
                    pwd=creds.GBIF_PWD,
                    email=creds.GBIF_EMAIL,
                    format="SIMPLE_CSV")
download_keys.append(dk[0])
print("Download Keys =", download_keys)

INFO:Your download key is 0013072-251025141854904


Download Keys = ['0013072-251025141854904']


In [13]:
# LOAD DOWNLOADED DATA
data = pd.read_csv("../../db/occurrence/raw/gbif_0013072-251025141854904.csv",
                   sep="\t")
data = data[data.occurrenceStatus == "PRESENT"].drop_duplicates()
data = data.dropna(subset=["decimalLatitude", "decimalLongitude"])

In [17]:
data

Unnamed: 0,gbifID,datasetKey,occurrenceID,kingdom,phylum,class,order,family,genus,species,infraspecificEpithet,taxonRank,scientificName,verbatimScientificName,verbatimScientificNameAuthorship,countryCode,locality,stateProvince,occurrenceStatus,individualCount,publishingOrgKey,decimalLatitude,decimalLongitude,coordinateUncertaintyInMeters,coordinatePrecision,elevation,elevationAccuracy,depth,depthAccuracy,eventDate,day,month,year,taxonKey,speciesKey,basisOfRecord,institutionCode,collectionCode,catalogNumber,recordNumber,identifiedBy,dateIdentified,license,rightsHolder,recordedBy,typeStatus,establishmentMeans,lastInterpreted,mediaType,issue
0,5868133124,50c9509d-22c7-4a22-a47d-8c48425ef4a7,https://www.inaturalist.org/observations/32118...,Chromista,Ochrophyta,Phaeophyceae,Dictyotales,Dictyotaceae,Rugulopteryx,Rugulopteryx okamurae,,SPECIES,"Rugulopteryx okamurae (E.Y.Dawson) I.K.Hwang, ...",Rugulopteryx okamurae,,FR,,Languedoc-Roussillon,PRESENT,,28eb1a3f-1c15-4a95-931a-4af90ecb574d,43.424697,3.700030,96.0,,,,,,2025-10-15,15.0,10.0,2025.0,5824863,5824863,HUMAN_OBSERVATION,iNaturalist,Observations,321186143,,Pascal GIRARD,2025-10-16T07:09:25,CC_BY_NC_4_0,Pascal GIRARD,Pascal GIRARD,,,2025-10-27T23:58:07.848Z,,COORDINATE_ROUNDED;CONTINENT_DERIVED_FROM_COOR...
1,5867862883,50c9509d-22c7-4a22-a47d-8c48425ef4a7,https://www.inaturalist.org/observations/32118...,Chromista,Ochrophyta,Phaeophyceae,Dictyotales,Dictyotaceae,Rugulopteryx,Rugulopteryx okamurae,,SPECIES,"Rugulopteryx okamurae (E.Y.Dawson) I.K.Hwang, ...",Rugulopteryx okamurae,,FR,,Languedoc-Roussillon,PRESENT,,28eb1a3f-1c15-4a95-931a-4af90ecb574d,43.424697,3.700030,96.0,,,,,,2025-10-15,15.0,10.0,2025.0,5824863,5824863,HUMAN_OBSERVATION,iNaturalist,Observations,321186145,,Pascal GIRARD,2025-10-16T07:09:25,CC_BY_NC_4_0,Pascal GIRARD,Pascal GIRARD,,,2025-10-28T00:56:12.452Z,,COORDINATE_ROUNDED;CONTINENT_DERIVED_FROM_COOR...
2,5844431381,50c9509d-22c7-4a22-a47d-8c48425ef4a7,https://www.inaturalist.org/observations/31983...,Chromista,Ochrophyta,Phaeophyceae,Dictyotales,Dictyotaceae,Rugulopteryx,Rugulopteryx okamurae,,SPECIES,"Rugulopteryx okamurae (E.Y.Dawson) I.K.Hwang, ...",Rugulopteryx okamurae,,FR,,Provence-Alpes-Côte d'Azur,PRESENT,,28eb1a3f-1c15-4a95-931a-4af90ecb574d,43.214032,5.325451,63.0,,,,,,2025-10-08T20:58,8.0,10.0,2025.0,5824863,5824863,HUMAN_OBSERVATION,iNaturalist,Observations,319838079,,Sylvain Le Bris,2025-10-09T21:11:57,CC_BY_NC_4_0,Sylvain Le Bris,Sylvain Le Bris,,,2025-10-27T23:56:48.064Z,StillImage,COORDINATE_ROUNDED;CONTINENT_DERIVED_FROM_COOR...
3,5844122096,50c9509d-22c7-4a22-a47d-8c48425ef4a7,https://www.inaturalist.org/observations/31960...,Chromista,Ochrophyta,Phaeophyceae,Dictyotales,Dictyotaceae,Rugulopteryx,Rugulopteryx okamurae,,SPECIES,"Rugulopteryx okamurae (E.Y.Dawson) I.K.Hwang, ...",Rugulopteryx okamurae,,FR,,Provence-Alpes-Côte d'Azur,PRESENT,,28eb1a3f-1c15-4a95-931a-4af90ecb574d,43.449560,4.433542,185.0,,,,,,2025-10-07T21:04,7.0,10.0,2025.0,5824863,5824863,HUMAN_OBSERVATION,iNaturalist,Observations,319600251,,Frédéric ANDRE,2025-10-08T17:21:31,CC_BY_NC_4_0,Frédéric ANDRE,Frédéric ANDRE,,,2025-10-27T23:56:43.210Z,StillImage,COORDINATE_ROUNDED;CONTINENT_DERIVED_FROM_COOR...
4,5840069636,50c9509d-22c7-4a22-a47d-8c48425ef4a7,https://www.inaturalist.org/observations/31916...,Chromista,Ochrophyta,Phaeophyceae,Dictyotales,Dictyotaceae,Rugulopteryx,Rugulopteryx okamurae,,SPECIES,"Rugulopteryx okamurae (E.Y.Dawson) I.K.Hwang, ...",Rugulopteryx okamurae,,FR,,Provence-Alpes-Côte d'Azur,PRESENT,,28eb1a3f-1c15-4a95-931a-4af90ecb574d,43.210931,5.350958,67.0,,,,,,2025-10-02T21:22,2.0,10.0,2025.0,5824863,5824863,HUMAN_OBSERVATION,iNaturalist,Observations,319164622,,Sylvain Le Bris,2025-10-06T16:08:52,CC_BY_NC_4_0,Sylvain Le Bris,Sylvain Le Bris,,,2025-10-28T00:55:51.306Z,StillImage,COORDINATE_ROUNDED;CONTINENT_DERIVED_FROM_COOR...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1397,1252989293,2754fff5-18b1-4706-bc8c-a647f08b665b,urn:catalog:CSIRO:CMAR_SRFME_ALGAL_BIOMASS:269...,Chromista,Ochrophyta,Phaeophyceae,Dictyotales,Dictyotaceae,Rugulopteryx,Rugulopteryx okamurae,,SPECIES,"Dilophus marginatus (Okamura) Okamura, 1915",Dilophus marginatus,J.Agardh ex J.Agardh,AU,,,PRESENT,8.0,5fa89f68-9af0-4a0d-8998-ea39695c1db9,-30.298030,115.028950,100.0,,,,6.0,0.0,2003-10-07,7.0,10.0,2003.0,7813249,5824863,HUMAN_OBSERVATION,"CSIRO, Australia",CMAR_SRFME_ALGAL_BIOMASS,269444-54025005,,,,CC_BY_NC_4_0,,,,,2025-10-08T01:00:25.566Z,,GEODETIC_DATUM_ASSUMED_WGS84;FOOTPRINT_WKT_INV...
1398,1252989287,2754fff5-18b1-4706-bc8c-a647f08b665b,urn:catalog:CSIRO:CMAR_SRFME_ALGAL_BIOMASS:269...,Chromista,Ochrophyta,Phaeophyceae,Dictyotales,Dictyotaceae,Rugulopteryx,Rugulopteryx okamurae,,SPECIES,"Dilophus marginatus (Okamura) Okamura, 1915",Dilophus marginatus,J.Agardh ex J.Agardh,AU,,,PRESENT,37.0,5fa89f68-9af0-4a0d-8998-ea39695c1db9,-33.535870,115.033220,100.0,,,,6.0,0.0,2003-09-18,18.0,9.0,2003.0,7813249,5824863,HUMAN_OBSERVATION,"CSIRO, Australia",CMAR_SRFME_ALGAL_BIOMASS,269264-54025005,,,,CC_BY_NC_4_0,,,,,2025-10-08T01:00:25.469Z,,GEODETIC_DATUM_ASSUMED_WGS84;FOOTPRINT_WKT_INV...
1399,1252989284,2754fff5-18b1-4706-bc8c-a647f08b665b,urn:catalog:CSIRO:CMAR_SRFME_ALGAL_BIOMASS:269...,Chromista,Ochrophyta,Phaeophyceae,Dictyotales,Dictyotaceae,Rugulopteryx,Rugulopteryx okamurae,,SPECIES,"Dilophus marginatus (Okamura) Okamura, 1915",Dilophus marginatus,J.Agardh ex J.Agardh,AU,,,PRESENT,25.0,5fa89f68-9af0-4a0d-8998-ea39695c1db9,-30.298030,115.028950,100.0,,,,6.0,0.0,2003-10-07,7.0,10.0,2003.0,7813249,5824863,HUMAN_OBSERVATION,"CSIRO, Australia",CMAR_SRFME_ALGAL_BIOMASS,269441-54025005,,,,CC_BY_NC_4_0,,,,,2025-10-08T01:00:25.704Z,,GEODETIC_DATUM_ASSUMED_WGS84;FOOTPRINT_WKT_INV...
1400,1252989279,2754fff5-18b1-4706-bc8c-a647f08b665b,urn:catalog:CSIRO:CMAR_SRFME_ALGAL_BIOMASS:269...,Chromista,Ochrophyta,Phaeophyceae,Dictyotales,Dictyotaceae,Rugulopteryx,Rugulopteryx okamurae,,SPECIES,"Dilophus marginatus (Okamura) Okamura, 1915",Dilophus marginatus,J.Agardh ex J.Agardh,AU,,,PRESENT,30.0,5fa89f68-9af0-4a0d-8998-ea39695c1db9,-30.298030,115.028950,100.0,,,,6.0,0.0,2003-10-07,7.0,10.0,2003.0,7813249,5824863,HUMAN_OBSERVATION,"CSIRO, Australia",CMAR_SRFME_ALGAL_BIOMASS,269445-54025005,,,,CC_BY_NC_4_0,,,,,2025-10-08T01:00:25.455Z,,GEODETIC_DATUM_ASSUMED_WGS84;FOOTPRINT_WKT_INV...


In [16]:
data.year.describe()

count    1153.000000
mean     2017.431917
std         9.228245
min      1938.000000
25%      2013.000000
50%      2021.000000
75%      2024.000000
max      2025.000000
Name: year, dtype: float64