# Marine EOV Broker



In [1]:
from marine_eov_broker import MarineRiBroker
import logging
import matplotlib.pyplot as plt
import xarray as xr
import pandas as pd

logger = logging.getLogger()
logger.setLevel(logging.INFO)
# logger.setLevel(logging.DEBUG)

# print(MarineRiBroker.ERDDAP_OUTPUT_FORMATS)
# print(MarineRiBroker.EOV_LIST)
# import sys
# sys.prefix

## Start the broker

It will take some time (though it still needs improvements on performances). This is because the broker will :
* load vocabularies upon startup
* load erddap datasets metadata from all erddap servers


**Question :**
Do we want to work with all datasets on Erddap servers ? Or do we want to build a fixed list for them ?

In [2]:
%%time
broker = MarineRiBroker.MarineBroker({"https://www.ifremer.fr/erddap": ["ArgoFloats-synthetic-BGC"]})

#broker = MarineRiBroker.MarineBroker({
#         "https://www.ifremer.fr/erddap": ["ArgoFloats", "ArgoFloats-synthetic-BGC", 
#                                           "SDC_BAL_CLIM_TS_V2_m", "SDC_BAL_CLIM_TS_V2_s",
#                                           "SDC_GLO_AGG_V2", 
#                                           "SDC_GLO_CLIM_TS_V2_1", "SDC_GLO_CLIM_TS_V2_2",
#                                           "SDC_BLS_CLIM_TS_V2_m", "SDC_BLS_CLIM_TS_V2_s",
#                                           "SDC_MED_CLIM_TS_V2_m_pre_post_emt",
#                                           "SDC_MED_CLIM_TS_V2_m_whole_period",
#                                           "SDC_MED_CLIM_TS_V2_s_decades",
#                                           "SDC_MED_CLIM_TS_V2_s_pre_post_emt",
#                                           "SDC_MED_CLIM_TS_V2_s_whole_period",
#                                           "SDC_NAT_CLIM_TS_V2_050_m", "SDC_NAT_CLIM_TS_V2_050_s"]
#     })

INFO:root:Querying vocabulary server for EOV : EV_OXY
INFO:root:Querying vocabulary server for EOV : EV_SEATEMP
INFO:root:Querying vocabulary server for EOV : EV_SALIN


CPU times: user 77.6 ms, sys: 26.3 ms, total: 104 ms
Wall time: 6.81 s


## Create a request to the broker :
The user must provide the EOVs, min/max date/lat/lon.
In the case of Argo, the user must provide a SPARQL query returning the platforms WMO.

In [3]:
#eovs_request = ["EV_SALIN", "EV_OXY", "EV_SEATEMP"]
eovs_request = ["EV_OXY"]

start_date = "2022-01-16"
end_date = "2022-01-17"
# North-east Atlantic Ocean
min_lon = -40
min_lat = 35
max_lon = 2
max_lat = 62


# logger.setLevel(logging.DEBUG)

## Query the SPARQL endpoint
Once you defined the eov variables you want, you can query the SPARQL endpoint of your choice that will do a selection over the metadata you wish.
In this example, we are going to use `platform_number` as the link between the SPARQL endpoint and ERDAPP (the variable name in the SPARQL query has to be the same than the ERDAPP variable). You then specify the ERDAPP server on which the data will be fetched (here `ArgoFloats-synthetic-BGC`).

In [4]:
%%time
response = broker.submit_sparql_query("""
prefix geo: <https://www.w3.org/2003/01/geo/wgs84_pos#>
prefix owl: <http://www.w3.org/2002/07/owl#>
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
prefix ssn: <http://www.w3.org/ns/ssn/> 
prefix xml: <http://www.w3.org/XML/1998/namespace> 
prefix xsd: <http://www.w3.org/2001/XMLSchema#> 
prefix argo: <https://co.ifremer.fr/co/argo-linked-data/doc/argo-floats.ttl#> 
prefix foaf: <http://xmlns.com/foaf/0.1/> 
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
prefix sosa: <http://www.w3.org/ns/sosa/>
prefix nerc: <http://vocab.nerc.ac.uk/collection/>
prefix dct: <http://purl.org/dc/terms/>
prefix prov: <https://www.w3.org/TR/prov-o/>
prefix dcat: <http://www.w3.org/ns/dcat#>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT ?platform_number ?lat ?lon WHERE { 
?float a dcat:Dataset;
       dct:identifier ?platform_number;
       argo:cycle ?cycle;
       sosa:hosts ?sensor.
?sensor sosa:observes <http://vocab.nerc.ac.uk/collection/R03/current/DOXY>.
?cycle argo:startDate ?date;
       geo:latitude ?lat;
       geo:longitude ?lon.

FILTER(?lat >= 35)
FILTER(?lat <= 62)
FILTER(?lon >= -40)
FILTER(?lon <= 2)
FILTER(?date >= "2022-01-16"^^xsd:date)
FILTER(?date <= "2022-01-17"^^xsd:date)
}
""", broker.DEFAULT_SPARQL_ENDPOINTS["Argo"], 
     eovs_request, 
     start_date,
     end_date,
     min_lon,
     min_lat,
     max_lon,
     max_lat,
     "nc",
     "platform_number",
     "ArgoFloats-synthetic-BGC")

CPU times: user 2.58 s, sys: 777 ms, total: 3.35 s
Wall time: 10.7 s


In [6]:
%%time

# Dinamic way of building the same query.
# Using pykg2tbl sparql templates

from pykg2tbl.j2.jinja_sparql_builder import J2SparqlBuilder
from pysubyt.j2.functions import Filters, Functions

template_folder = "src/marine_eov_broker/j2_templates"

j2sqb = J2SparqlBuilder(template_folder, j2_filters=Filters, j2_functions=Functions)

query_filename = "platform_finder.sparql"
variables = {
"start_date" : start_date,
"end_date" : end_date,
"min_lon" : min_lon,
"min_lat" : min_lat,
"max_lon" : max_lon,
"max_lat" : max_lat,
}

qry = j2sqb.build_sparql_query(query_filename, **variables)
print(qry)

response = broker.submit_sparql_query(qry, broker.DEFAULT_SPARQL_ENDPOINTS["Argo"], 
     eovs_request, 
     start_date,
     end_date,
     min_lon,
     min_lat,
     max_lon,
     max_lat,
     "nc",
     "platform_number",
     "ArgoFloats-synthetic-BGC")






prefix geo: <https://www.w3.org/2003/01/geo/wgs84_pos#>
prefix owl: <http://www.w3.org/2002/07/owl#>
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
prefix ssn: <http://www.w3.org/ns/ssn/> 
prefix xml: <http://www.w3.org/XML/1998/namespace> 
prefix xsd: <http://www.w3.org/2001/XMLSchema#> 
prefix argo: <https://co.ifremer.fr/co/argo-linked-data/doc/argo-floats.ttl#> 
prefix foaf: <http://xmlns.com/foaf/0.1/> 
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
prefix sosa: <http://www.w3.org/ns/sosa/>
prefix nerc: <http://vocab.nerc.ac.uk/collection/>
prefix dct: <http://purl.org/dc/terms/>
prefix prov: <https://www.w3.org/TR/prov-o/>
prefix dcat: <http://www.w3.org/ns/dcat#>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT ?platform_number ?lat ?lon WHERE { 
?float a dcat:Dataset;
       dct:identifier ?platform_number;
       argo:cycle ?cycle;
       sosa:hosts ?sensor.
?sensor sosa:observes <http://vocab.nerc.ac.uk/collection/R03/current/DOXY>.
?cycle argo:

## Results

Once we have the SPARQL results containing the list of values that will be processed to ERDAPP, we can send the requests and compile them into a single Pandas frame.

In [7]:
response.compile_results()

Unnamed: 0_level_0,time,latitude,longitude,doxy,mlpl_doxy,pres
row,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,2022-01-16 07:02:00,44.389088,-20.07749,230.547607,,0.700000
1,2022-01-16 07:02:00,44.389088,-20.07749,,,1.000000
2,2022-01-16 07:02:00,44.389088,-20.07749,230.644394,,1.600000
3,2022-01-16 07:02:00,44.389088,-20.07749,,,1.900000
4,2022-01-16 07:02:00,44.389088,-20.07749,,,2.500000
...,...,...,...,...,...,...
505,2022-01-16 04:15:45,42.600000,-16.77500,246.669815,,1749.500000
506,2022-01-16 04:15:45,42.600000,-16.77500,246.785339,,1799.099976
507,2022-01-16 04:15:45,42.600000,-16.77500,246.422516,,1848.400024
508,2022-01-16 04:15:45,42.600000,-16.77500,246.775803,,1898.500000


![SPARQL combined with ERDDAP](images/SPARQL_broker.JPG)