how this data contribution expands the OBIS holdings

- picking a species from SCCOOS, then pulling all the data for that species from OBIS
- plotting it all together with SCCOOS on a timeslider.

## Load library and data for SCCOOS dataset

In [None]:
# get SCCOOS occurrence and measurement or fact data
library(robis)
library(leaflet)

sccoos_dataset_id = "c9aaa0e9-8f6c-4553-a014-a857baba0680"

species = "Lingulodinium polyedra"

sccoos_occur <- occurrence(
    datasetid = sccoos_dataset_id, mof=TRUE
)

## Take a look at response data

In [None]:
# # make table bigger for review
options(repr.matrix.max.cols=100)
head(sccoos_occur,n = 1)

## Note measurement or fact data are in `mof` column.

In [None]:
# # additional observations (measurements or facts) connected to the occurrence above 
# sccoos_mof <- sccoos_occur$mof # list
head(sccoos_occur$mof,n = 1)

## Create a map of all SCCOOS observations for the dataset

In [None]:
# create a map
library(htmlwidgets)
library(IRdisplay)

# m = map_leaflet(sccoos_occur)
# m
# saveWidget(m, "sccoos_data.html", selfcontained = FALSE)
# display_html("<iframe src=\"sccoos_data.html\"></iframe>")

## Query OBIS for specific species and map out all observations

In [None]:
# get all Lingulodinium polyedra occurrences from OBIS
# lingpoly_occur <- occurrence(
#     scientificname = species
# )

# or by taxon id from AphiaID: urn:lsid:algaebase.org:taxname:233592
# could use rtaxize 'get_wormsid' here or obistools 'match_taxa' to search on taxonid from species name? 
# lingpoly_occur <- occurrence(
#   taxonid = '233592'
# )

# remove sccoos entries from full occurrence response
# lingpoly_occur <- lingpoly_occur[!lingpoly_occur$dataset_id==sccoos_dataset_id,]

# create a map of all lingulodinium polyedra, excluding sccoos contributions
# m = map_leaflet(lingpoly_occur)
# m
# saveWidget(m, "lingpoly_data.html", selfcontained = FALSE)
# display_html("<iframe src=\"lingpoly_data.html\"></iframe>")

## Subset SCCOOS response for species and move mof data into occurrence data table

In [None]:
# build a character list of all the unique measurement or fact types.
sccoos_lingpoly_occur <- sccoos_occur[ which(sccoos_occur$scientificName == species), ]

for (j in 1:length(sccoos_lingpoly_occur$mof)){
    if (j == 1){
        measurementTypes <- sccoos_lingpoly_occur$mof[[j]]$measurementType
    } else {
    measurementTypes <- unique(c(measurementTypes, sccoos_lingpoly_occur$mof[[j]]$measurementType))
        }
}

# add columns for all potential mof types
sccoos_lingpoly_occur[,measurementTypes] <- ""

## Populate mof_tbl with mof observations
for (i in 1:length(sccoos_lingpoly_occur$mof)){
    for (j in 1:length(sccoos_lingpoly_occur$mof[[i]])){
        for (var in measurementTypes){
            if ( length(sccoos_lingpoly_occur$mof[[i]]$measurementType[j]) == 0 || is.na(sccoos_lingpoly_occur$mof[[i]]$measurementType[j])) {
            } else{
                if ( sccoos_lingpoly_occur$mof[[i]]$measurementType[j] == var ){
                    #print(paste("sccoos_lingpoly_occur$mof[[",i,"]]$measurementType[",j,"] =", sccoos_lingpoly_occur$mof[[i]]$measurementType[j]," = ", sccoos_lingpoly_occur$mof[[i]]$measurementValue[j]))
                    sccoos_lingpoly_occur[i,var] <- sccoos_lingpoly_occur$mof[[i]]$measurementValue[j]
                } else {
                    sccoos_lingpoly_occur[i,var] <- NA
                }     
            }
        }
    }
}
# convert mofs to doubles
measurementTypes <- c("organismQuantity", "sss", "sst", "bathymetry", measurementTypes)
sccoos_lingpoly_occur[ ,measurementTypes] <- apply(sccoos_lingpoly_occur[ ,measurementTypes], 2, function(x) as.double(x))
sccoos_lingpoly_occur$dateTime <- as.Date(sccoos_lingpoly_occur$eventDate, format = "%Y-%m-%dT%H:%M:%SZ", tz=utc)

# collect column names and drop 'mof' from list:
cols <- colnames(sccoos_lingpoly_occur)
cols <- cols[cols!='mof']
sccoos_lingpoly_occur <- sccoos_lingpoly_occur[cols]
head(sccoos_lingpoly_occur)

## Count the number of observations per location and include some stats

In [None]:
library(tidyverse)
by_location <- sccoos_lingpoly_occur %>% group_by(locationID)
# by_location %>% tally

by_location %>%
    summarise(
        n = n(),
        Temp = mean(Temp, na.rm = TRUE),
        Salinity = mean(Salinity, na.rm = TRUE),
        organismQuantity = max(organismQuantity, na.rm = TRUE)
    )



## create some time-series plots

In [None]:
library(ggplot2)

#sccoos_lingpoly_occur.m <- melt(sccoos_lingpoly_occur, measure.vars=c("Temp","organismQuantity"))

p1 <- ggplot(sccoos_lingpoly_occur, aes(x=dateTime, y=Temp)) +
geom_line()

p2 <- ggplot(sccoos_lingpoly_occur, aes(x=dateTime, y=organismQuantity)) +
geom_line()

p1
p2

In [None]:
glimpse(sccoos_lingpoly_occur)

In [None]:
ggplot(by_location, aes(x=dateTime, y=Temp, col=locationID)) + geom_line()

## Make a map of all occurrences with SCCOOS obs as cluster pins

In [None]:
# TODO
# make the popup's on the map above have a table of the mof observations for each occurrence.
# Might need to have the popup with a time slider since the SCCOOS occurrences are at only 7 locations for 1007 occurrences.
m = map_leaflet(lingpoly_occur) %>% 
  addTiles() %>% 
  addMarkers(
    sccoos_lingpoly_occur$decimalLongitude, sccoos_lingpoly_occur$decimalLatitude,
    clusterOptions = markerClusterOptions(),
    #radius = 3, weight = 1, opacity = 1, fillOpacity = 0.1, 
    popup = ~paste("<table>
                        <thead><tr><td>measurementType</td><td>measurementValue</td></tr></thead>
                        <tbody><tr><td>", sccoos_lingpoly_occur$mof[[1]]$measurementType, "</td><td>", sccoos_lingpoly_occur$mof[[1]]$measurementValue, "</td></tr></tbody>
                    </table>")
  )
m
saveWidget(m, "lingpoly_data_sub.html", selfcontained = FALSE)
display_html("<iframe src=\"lingpoly_data_sub.html\"></iframe>")

## Update map to plot cluster pins with time-series observational data

In [None]:

# iterate through each site
for (site in unique(sccoos_lingpoly_occur$locationID)) {
    site_filt <- filter(sccoos_lingpoly_occur, locationID == site)
    ggplot(cal_poly, aes(x=dateTime, y=Temp)) + geom_line() + ggtitle(site)
    # save ggplot as image to add to plot 
    image_name <- paste0(site,"_ts.png")
    ggsave(image_name)
    n <- length(measurementTypes)
    pop = paste0("<img src=\"",image_name,"\" />")
    m <- map_leaflet(site_filt) %>%
        addTiles() %>%
        addMarkers(
            head(site_filt$decimalLongitude, n=n), head(site_filt$decimalLatitude, n=n),
            clusterOptions=markerClusterOptions(),
            popup = pop
        )
    filename <- paste0(site,"_sub.html")
    saveWidget(m, filename, selfcontained = FALSE)
    html_file = paste0("<iframe src=\"",filename,"\"></iframe>")
    display_html(html_file)
    }

In [None]:
p