In [1]:
import os
from typing import Dict, List
import requests

# Define NVS host

VOCAB_HOST = os.environ.get("VOCAB_HOST", "http://vocab.nerc.ac.uk")

In [2]:
def get_sparql_query(vocab_host: str, vocab_id: str) -> str:
    """Generates a SPARQL query for retrieving vocab data.

    Args:
        vocab_host (str): e.g. http://vocab.nerc.ac.uk
        vocab_id (str): e.g. P01, L22, etc.

    Returns:
        str: SPARQL query created based on input params.
    """
    sparql_query = """
    PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
    PREFIX owl: <http://www.w3.org/2002/07/owl#>
    SELECT DISTINCT (?c as ?uri) (?pl as ?altLabel)
    WHERE {{
        <{0}/collection/{1}/current/> skos:member ?c .
        ?c skos:altLabel ?pl .
        ?c owl:deprecated ?isDeprecated .
        FILTER (?isDeprecated = "false") .
    }}
    ORDER BY ?pl
    """
    return sparql_query.format(vocab_host, vocab_id)

In [3]:
def get_vocab_data(vocab_host: str, vocab_id: str) -> List[Dict]:
    """Retrieves data for a given vocab.

    Args:
        vocab_host (str): Where to fetch the data from, e.g. http://vocab.nerc.ac.uk
        vocab_id (str): Which vocab to fetch, e.g. L22

    Returns:
        List[Dict]: JSON data containing all terms in the specified vocab.
    """
    query_url = f"{vocab_host}/sparql/sparql"
    query = get_sparql_query(vocab_host, vocab_id)

    resp = requests.post(query_url, data=query, headers={"Content-Type": "application/sparql-query"}, timeout=60)
    results = [
        {"uri": x["uri"]["value"], "alt_label": x["altLabel"]["value"]} for x in resp.json()["results"]["bindings"]
    ]
    return results

In [4]:
results = get_vocab_data(VOCAB_HOST,'R28')
print(results)

[{'uri': 'http://vocab.nerc.ac.uk/collection/R28/current/CB0008/', 'alt_label': 'APMT'}, {'uri': 'http://vocab.nerc.ac.uk/collection/R28/current/CB0003/', 'alt_label': 'GG32'}, {'uri': 'http://vocab.nerc.ac.uk/collection/R28/current/CB0004/', 'alt_label': 'HC12'}, {'uri': 'http://vocab.nerc.ac.uk/collection/R28/current/CB0005/', 'alt_label': 'I535'}, {'uri': 'http://vocab.nerc.ac.uk/collection/R28/current/CB0001/', 'alt_label': 'N1'}, {'uri': 'http://vocab.nerc.ac.uk/collection/R28/current/CB0002/', 'alt_label': 'N2'}, {'uri': 'http://vocab.nerc.ac.uk/collection/R28/current/CB0006/', 'alt_label': 'OSEAN'}, {'uri': 'http://vocab.nerc.ac.uk/collection/R28/current/CB0007/', 'alt_label': 'USEA'}]


In [6]:
alt_label = 'N2'
termURIs= [result['uri'] for result in results if result['alt_label']==alt_label]
print(termURIs)


['http://vocab.nerc.ac.uk/collection/R28/current/CB0002/']
