In [None]:
import json
import os
import sys
import rdflib
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd
import numpy as np

## Setup SPARQL / Python Interface

### Define SPARQL Request Function

In [None]:
#Credit - Doug Fils

def get_sparql_dataframe(service, query):
    """
    Helper function to convert SPARQL results into a Pandas data frame.
    """
    sparql = SPARQLWrapper(service)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()

    processed_results = json.load(result.response)
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        out.append(item)

    return pd.DataFrame(out, columns=cols)

### Set SPARQL Endpoint and base URIs

In [None]:
BCODMO_SERVE = "https://lod.bco-dmo.org/sparql"  #BCO-DMO SPARQL Endpoint
BCODMO_PREF = "http://lod.bco-dmo.org/id/"       #BCO-DMO URI base

## Basic SPARQL Queries
SPARQL is queried in as a string. Can define variables within SPARQL, then query result will be returned in dataframe format which can be used as python variables. Can also return SPARQL query in other formats (like straight json or lists). 

**NOTE:** The BCO-DMO SPARQL endpoint (like many public SPARQL endpoints) limits results to 10,000.

Helpful SPARQL Cheat Sheet: http://www.iro.umontreal.ca/~lapalme/ift6281/sparql-1_1-cheat-sheet.pdf

### Properties ("predicates" in Subject-Predicate-Object model) in BCO-DMO

Also see https://ocean-data.org/schema/ for ontology

#### List all the properties in the BCO-DMO Knowledge Graph

In [None]:
propertyQuery = '''
SELECT DISTINCT ?property
WHERE { [] ?property [] }
ORDER BY ?property
'''

In [None]:
propertySearch = get_sparql_dataframe(BCODMO_SERVE, propertyQuery)

In [None]:
propertySearch.style.set_properties(**{'width': '600px'})

#### Get more information about the properties

In [None]:
propertyQuery = '''
SELECT DISTINCT ?dataset_parameter ?masterParam
WHERE { ?dataset_parameter odo:isInstanceOf ?masterParam  .
}
'''

In [None]:
propertySearch = get_sparql_dataframe(BCODMO_SERVE, propertyQuery)

In [None]:
propertySearch.style.set_properties(**{'width': '600px'})

#### Find out more about "Monitored Properties"

In [None]:
monitoredPropertiesQuery = '''
SELECT DISTINCT ?masterParam ?shortDesc
WHERE { 
?masterParam odo:hasParameterShortDescription ?shortDesc .
?masterParam owl:deprecated 0 . #Binay - only show those that are not deprecated
}
ORDER BY ?masterParam'''

In [None]:
monitoredPropertiesSearch = get_sparql_dataframe(BCODMO_SERVE, monitoredPropertiesQuery)

In [None]:
monitoredPropertiesSearch.style.set_properties(**{'width': '600px'})

#### DESCRIBE a monitored property

In [None]:
monitoredPropertiesQuery = '''
DESCRIBE <http://lod.bco-dmo.org/id/parameter/901>
'''

In [None]:
monitoredPropertiesSearch = get_sparql_dataframe(BCODMO_SERVE, monitoredPropertiesQuery)
#ignore the error

In [None]:
monitoredPropertiesSearch.style.set_properties(**{'width': '600px'})

### Fernando Question:
How can we track the Salinity (water bottle) results from the Salinometer all the way up to BCO-DMO Parameters repository ? Not just the results, but also including reports, sensors, persons involved… can we include DOI’s? What would be the steps?

First, you need to find the parameters you are interested in: look up the Ocean Data Ontology Schema which can be found here: https://ocean-data.org/schema/ . Explore the browse feature on the BCO-DMO SPARQL endpoint to get familiar with the structure of the data: https://lod.bco-dmo.org/browse/

You can also look up what master parameters you are interested in finding. If looking for Salinity from a water bottle (as opposed to the CTD), you can find it listed here: https://www.bco-dmo.org/search/parameter/salinity?size=20&sort=relevance. To note: it appears we have multiple entries in the "master parameters" that are associated with water bottle salinity, we are aware of these duplications and this is something we are actively working on right now. For now, the master parameters of interest appear to be "sal_bottle", "sal_bot", and "sal_niskin". We'll start with those three.

If you click on "sal_bottle", you'll be re-directed to the site https://www.bco-dmo.org/parameter/956 -- here, you can see that this is then parameter #956. A way to get this information viq SPARQL would be to use the property skos:prefLabel which for this parameter is "sal_bottle"@en-us

In [None]:
#Write a SPARQL query to pull those three parameters and the datasets that have them
propertyQuery = '''
SELECT DISTINCT ?masterParam ?shortDesc
WHERE { 
VALUES (?shortDesc) {("sal_bot"@en-us) ("sal_nis"@en-us) ("sal_bottle"@en-us)} .
?masterParam skos:prefLabel ?shortDesc .
?masterParam rdf:type odo:MonitoredProperty . #This states that we are only looking for the master parameters
}
'''

In [None]:
propertySearch = get_sparql_dataframe(BCODMO_SERVE, propertyQuery)

In [None]:
propertySearch.style.set_properties(**{'width': '600px'})

In [None]:
#Now we can add to this query to look for all the times these master parameters occur in datasets. 
propertyQuery2 = '''
SELECT DISTINCT ?datasetParameter ?masterParam ?shortDesc
WHERE { 
VALUES (?shortDesc) {("sal_bot"@en-us) ("sal_nis"@en-us) ("sal_bottle"@en-us)} .
?masterParam skos:prefLabel ?shortDesc .
?masterParam rdf:type odo:MonitoredProperty .
?datasetParameter odo:isInstanceOf ?masterParam  .
}
'''

In [None]:
propertySearch2 = get_sparql_dataframe(BCODMO_SERVE, propertyQuery2)

In [None]:
propertySearch2.style.set_properties(**{'width': '600px'})

In [None]:
#From the dataset parameters, we can pull the specific datasets and the metadata associated with the datasets
propertyQuery3 = '''
SELECT DISTINCT ?dataset ?datasetURL ?datasetParameter ?masterParam ?shortDesc
WHERE { 
VALUES (?shortDesc) {("sal_bot"@en-us) ("sal_nis"@en-us) ("sal_bottle"@en-us)} .
?masterParam skos:prefLabel ?shortDesc .
?masterParam rdf:type odo:MonitoredProperty .
?datasetParameter odo:isInstanceOf ?masterParam  .
?dataset odo:storesValuesFor ?datasetParameter .
?dataset odo:osprey_page ?datasetURL .
}
'''

In [None]:
propertySearch3 = get_sparql_dataframe(BCODMO_SERVE, propertyQuery3)

In [None]:
propertySearch3.style.set_properties(**{'width': '600px'})

In [None]:
#Now you can also start to pull information about the dataset as a whole. Recommend using the OPTIONAL
#call as datasets that do not have the features you are looking for will not be returned without it
propertyQuery4 = '''
SELECT DISTINCT ?dataset ?datasetURL ?datasetTitle ?role ?datasetParameter ?masterParam ?shortDesc
WHERE { 
VALUES (?shortDesc) {("sal_bot"@en-us) ("sal_nis"@en-us) ("sal_bottle"@en-us)} .
?masterParam skos:prefLabel ?shortDesc .
?masterParam rdf:type odo:MonitoredProperty .
?datasetParameter odo:isInstanceOf ?masterParam  .
?dataset odo:storesValuesFor ?datasetParameter .
?dataset odo:osprey_page ?datasetURL . 
OPTIONAL {
?dataset odo:datasetTitle ?datasetTitle .
?dataset odo:hasAgentWithRole ?person .
?person dcterms:description ?role .
}
}
'''

In [None]:
propertySearch4 = get_sparql_dataframe(BCODMO_SERVE, propertyQuery4)

In [None]:
propertySearch4.style.set_properties(**{'width': '600px'})