# Using aphiaIDs to download OBIS occurrences for Bio/Eco EOVs

Created: 2024-09-13

In [3]:
library(gh)
library(readr)
library(robis)
library(dplyr)

In [5]:
# first we will pull the files where the EOV taxonomy are stored from GitHub
repo_files <- gh("GET /repos/:owner/:repo/contents/:path",
                 owner = "ioos",
                 repo = "marine_life_data_network",
                 path = "eov_taxonomy")

for (file_info in repo_files) {
  if (file_info$type == "file") {
    download.file(file_info$download_url, destfile = file_info$name)
  }
}

In [6]:
# let's try mangroves as our first example
# we will open the mangroves taxonomy CSV that we downloaded and grab the taxonIDs
mangroves <- read.csv("mangroves.csv")
mangroves$ID <- gsub("urn:lsid:marinespecies.org:taxname.", "", mangroves$acceptedTaxonId)
mangroves$ID <- as.numeric(mangroves$ID)
mangroveIdentifiers <- paste(mangroves$ID, collapse = ", ")

In [7]:
# using the taxonIDs from the last step, let's search OBIS for occurrence data for this EOV
# this will take a bit to download because there were over 130k records when this script was written in Sept 2024
mangrove_occ <- robis::occurrence(taxonid = mangroveIdentifiers)
# let's check how many occurrences we got from OBIS
nrow(mangrove_occ)

Retrieved 5000 records of approximately 132901 (3%)
Retrieved 10000 records of approximately 132901 (7%)
Retrieved 15000 records of approximately 132901 (11%)
Retrieved 20000 records of approximately 132901 (15%)
Retrieved 25000 records of approximately 132901 (18%)
Retrieved 30000 records of approximately 132901 (22%)
Retrieved 35000 records of approximately 132901 (26%)
Retrieved 40000 records of approximately 132901 (30%)
Retrieved 45000 records of approximately 132901 (33%)
Retrieved 50000 records of approximately 132901 (37%)
Retrieved 55000 records of approximately 132901 (41%)
Retrieved 60000 records of approximately 132901 (45%)
Retrieved 65000 records of approximately 132901 (48%)
Retrieved 70000 records of approximately 132901 (52%)
Retrieved 75000 records of approximately 132901 (56%)
Retrieved 80000 records of approximately 132901 (60%)
Retrieved 85000 records of approximately 132901 (63%)
Retrieved 90000 records of approximately 132901 (67%)
Retrieved 95000 records of appr

In [None]:
# use the built in leaflet capability from robis to map the occurrences
map_leaflet(mangrove_occ,
            provider_tiles = "Esri.WorldGrayCanvas",
            popup = function(x) { x["scientificName"] },
            )

[1] "235048, 235033, 234450, 234495, 235086, 235089, 235091, 235106, 235056, 235060, 235045, 235116, 235063, 235072, 235075, 235077, 235068, 234488, 235103"


Retrieved 5000 records of approximately 132901 (3%)
Retrieved 10000 records of approximately 132901 (7%)
Retrieved 15000 records of approximately 132901 (11%)
Retrieved 20000 records of approximately 132901 (15%)
Retrieved 25000 records of approximately 132901 (18%)
Retrieved 30000 records of approximately 132901 (22%)
Retrieved 35000 records of approximately 132901 (26%)
Retrieved 40000 records of approximately 132901 (30%)
Retrieved 45000 records of approximately 132901 (33%)
Retrieved 50000 records of approximately 132901 (37%)
Retrieved 55000 records of approximately 132901 (41%)
Retrieved 60000 records of approximately 132901 (45%)
Retrieved 65000 records of approximately 132901 (48%)
Retrieved 70000 records of approximately 132901 (52%)
Retrieved 75000 records of approximately 132901 (56%)
Retrieved 80000 records of approximately 132901 (60%)
Retrieved 85000 records of approximately 132901 (63%)
Retrieved 90000 records of approximately 132901 (67%)
Retrieved 95000 records of appr