In [1]:
import pathlib, logging
from SPARQLWrapper import SPARQLWrapper, JSON

In [2]:
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

In [3]:
def get_dataset_periods(dataset_url: str = None, endpoint_url: str = None):
    logging.info('Dataset url is {}'.format(dataset_url))
    logging.info('SPARQL endpoint is {}'.format(endpoint_url))

    query = f'PREFIX qb: <http://purl.org/linked-data/cube#> PREFIX dim: <http://purl.org/linked-data/sdmx/2009/dimension#> SELECT DISTINCT ?period WHERE {{ ?object a qb:DataSet . ?obs qb:dataSet ?obj ; ?p ?period . ?obs dim:refPeriod ?period . FILTER (?obj = <{dataset_url}>) }}'
    logging.info(f'Query is {query}')

    sparql = SPARQLWrapper(endpoint_url)
    query = sparql.setQuery(query)

    sparql.setReturnFormat(JSON)
    result = sparql.query().convert()

    return [x['period']['value'] for x in result['results']['bindings']]

In [20]:
result = get_dataset_periods(dataset_url='http://gss-data.org.uk/data/gss_data/covid-19/phs-weekly-covid-19-statistical-data-in-scotland#dataset',
                    endpoint_url='https://staging.gss-data.org.uk/sparql')
    

INFO:root:Dataset url is http://gss-data.org.uk/data/gss_data/covid-19/phs-weekly-covid-19-statistical-data-in-scotland#dataset
INFO:root:SPARQL endpoint is https://staging.gss-data.org.uk/sparql
INFO:root:Query is PREFIX qb: <http://purl.org/linked-data/cube#> PREFIX dim: <http://purl.org/linked-data/sdmx/2009/dimension#> SELECT DISTINCT ?period WHERE { ?object a qb:DataSet . ?obs qb:dataSet ?obj ; ?p ?period . ?obs dim:refPeriod ?period . FILTER (?obj = <http://gss-data.org.uk/data/gss_data/covid-19/phs-weekly-covid-19-statistical-data-in-scotland#dataset>) }


In [21]:
len(result), result[:5]

(192,
 ['http://reference.data.gov.uk/id/day/2020-02-28',
  'http://reference.data.gov.uk/id/day/2020-02-29',
  'http://reference.data.gov.uk/id/day/2020-03-01',
  'http://reference.data.gov.uk/id/day/2020-03-02',
  'http://reference.data.gov.uk/id/day/2020-03-03'])

In [16]:
# Test dataset
import urllib
url = 'http%3A%2F%2Fgss-data.org.uk%2Fdata%2Fgss_data%2Ftrade%2Fons-quarterly-national-accounts%2Fquarterly-national-accounts-gdp-data-tables-national-accounts-aggregates-gdp-and-gva-in-ps-million'
dataset_url = urllib.parse.unquote(url)
dataset_url

'http://gss-data.org.uk/data/gss_data/trade/ons-quarterly-national-accounts/quarterly-national-accounts-gdp-data-tables-national-accounts-aggregates-gdp-and-gva-in-ps-million'

In [17]:
result = get_dataset_periods(endpoint_url='http://gss-data.org.uk/sparql', dataset_url=dataset_url)

INFO:root:Dataset url is http://gss-data.org.uk/data/gss_data/trade/ons-quarterly-national-accounts/quarterly-national-accounts-gdp-data-tables-national-accounts-aggregates-gdp-and-gva-in-ps-million
INFO:root:SPARQL endpoint is http://gss-data.org.uk/sparql
INFO:root:Query is PREFIX qb: <http://purl.org/linked-data/cube#> PREFIX dim: <http://purl.org/linked-data/sdmx/2009/dimension#> SELECT DISTINCT ?period WHERE { ?object a qb:DataSet . ?obs qb:dataSet ?obj ; ?p ?period . ?obs dim:refPeriod ?period . FILTER (?obj = <http://gss-data.org.uk/data/gss_data/trade/ons-quarterly-national-accounts/quarterly-national-accounts-gdp-data-tables-national-accounts-aggregates-gdp-and-gva-in-ps-million>) }


In [15]:
len(result), result[:5]

(0, [])