# Introduction
In this notebook we illustrate how to assess the datasets. A configuration file is provided with all the documentation concerning the dataset.

### First, we import the libraries

In [3]:
import rdflib
from SPARQLWrapper import SPARQLWrapper, JSON, XML
import time
import json

### We are going to use the BnF example

In [4]:
file = "data/bnf-data-quality.ttl"
graph = rdflib.Graph()
graph.parse(file)

<Graph identifier=Nee94bba832cc4189a6ed2fbfd93a71b7 (<class 'rdflib.graph.Graph'>)>

### Let's retrieve the SPARQL endpoint

In [10]:
query = """
        SELECT DISTINCT ?sparql
        WHERE {
            ?s void:sparqlEndpoint ?sparql
        }"""

qres = graph.query(query)

endpoint = ''
for row in qres:
    endpoint = str(row.sparql)
print(endpoint)

https://data.bnf.fr/sparql


### Let's retrieve the title

In [11]:
query = """
        PREFIX dcterms: <http://purl.org/dc/terms/> 
        SELECT DISTINCT ?title
        WHERE {
            ?s dcterms:title ?title
        }"""

qres = graph.query(query)

for row in qres:
    print(str(row.title))

National Library of France


### Let's now get the criteria

In [15]:
query = """
        prefix dcterms: <http://purl.org/dc/terms/> 
        prefix skos: <http://www.w3.org/2004/02/skos/core#> 
        prefix schema: <https://schema.org/> 
        prefix wdt: <http://www.wikidata.org/prop/direct/> 
        prefix dqv: <http://www.w3.org/ns/dqv#> 
        
        SELECT DISTINCT ?criterionLabel
        WHERE {{
            ?s void:sparqlEndpoint <{0}> .
            ?s dqv:hasQualityMeasurement ?qualityMeasurement .
            ?qualityMeasurement dqv:isMeasurementOf ?metric .
            ?metric skos:prefLabel ?criterionLabel .
            ?metric schema:query ?query 
        }}""".format(endpoint)
        
        #print(query)

qres = graph.query(query)

for row in qres:
    print(str(row.criterionLabel))
    

Availability
Licensing
Interlinking-Person
Interlinking-Work
Performance
Conciseness
Trustworthiness-Unknown
Understandability-Labels
Understandability-Vocabularies
Understandability-URI-Patterns
Understandability-Examples
Timeliness
Representational-Conciseness-URIs-Length
Representational-Conciseness-Containers
Interoperability
Interpretability-VIAF
Interpretability-Isni
Interpretability-Labels
Versatility-Serialisation
Versatility-Multilingual
Relevancy
Consistency
Completeness-Person
Completeness-Work
Completeness-Place
Completeness-Population


### Let's now retrieve the assessment results

In [18]:
query = """
        prefix dcterms: <http://purl.org/dc/terms/> 
        prefix skos: <http://www.w3.org/2004/02/skos/core#> 
        prefix schema: <https://schema.org/> 
        prefix dqv: <http://www.w3.org/ns/dqv#> 
        
        SELECT DISTINCT ?dimensionLabel ?criterionLabel ?value
        WHERE {{
            ?s void:sparqlEndpoint <{0}> .
            ?s dqv:hasQualityMeasurement ?qualityMeasurement .
            ?qualityMeasurement dqv:isMeasurementOf ?metric .
            ?qualityMeasurement dqv:value ?value .
            ?metric skos:prefLabel ?criterionLabel .
            ?metric dqv:inDimension ?dimension .
            ?dimension skos:prefLabel ?dimensionLabel 
        }}""".format(endpoint)
        
        #print(query)

qres = graph.query(query)

for row in qres:    
    value = str(row.value)
    print("dimension:" + str(row.dimensionLabel))
    print("criterion:" + str(row.criterionLabel)) 
    print("value:" + value)

dimension:Availability
criterion:Availability
value:1
dimension:Licensing
criterion:Licensing
value:0.5
dimension:Interlinking
criterion:Interlinking-Person
value:0.44
dimension:Interlinking
criterion:Interlinking-Work
value:0.02
dimension:Interlinking
criterion:Interlinking-Wikidata
value:1
dimension:Security
criterion:Security
value:1
dimension:Performance
criterion:Performance
value:1
dimension:Syntactic validity
criterion:Syntactic Validity
value:1
dimension:Conciseness
criterion:Conciseness
value:0
dimension:Conciseness
criterion:Conciseness-Wikidata
value:0.99
dimension:Trustworthiness
criterion:Trustworthiness-Dataset
value:1
dimension:Trustworthiness
criterion:Trustworthiness-Unknown
value:0
dimension:Understandability
criterion:Understandability-Labels
value:0
dimension:Understandability
criterion:Understandability-Vocabularies
value:0
dimension:Understandability
criterion:Understandability-URI-Patterns
value:0
dimension:Understandability
criterion:Understandability-Examples
v

### Finally, we can run a criterion and see the result

By using this SPARQL query, we retrieve all the information concerning the Interlinking-Person data quality criteria.

In [47]:
criterion = "Interlinking-Person"
query = """
        PREFIX dcterms: <http://purl.org/dc/terms/> 
        PREFIX skos: <http://www.w3.org/2004/02/skos/core#> 
        PREFIX schema: <https://schema.org/> 
        PREFIX wdt: <http://www.wikidata.org/prop/direct/> 
        PREFIX dqv: <http://www.w3.org/ns/dqv#> 
        PREFIX ldqd: <https://www.w3.org/2016/05/ldqd#>
        
        SELECT DISTINCT ?query ?description
        WHERE {{
            ?s void:sparqlEndpoint <{0}> .
            ?s dqv:hasQualityMeasurement ?qualityMeasurement .
            ?qualityMeasurement dqv:isMeasurementOf ?metric .
            ?metric schema:description ?description .
            ?metric schema:query ?query .
            ?metric skos:prefLabel "{1}" 
        }}""".format(endpoint, criterion)
        
qres = graph.query(query)

### Now we can see the result

In [48]:
sparqlEndpoint = SPARQLWrapper(endpoint)
sparqlEndpoint.setReturnFormat(JSON)

jsonResult = []
for row in qres:
            
    label = str(row.description)
    assessmentQuery = str(row.query).format(10)
    print(assessmentQuery) # We can see the query 
    sparqlEndpoint.setQuery(assessmentQuery)
                
    ret2 = sparqlEndpoint.queryAndConvert()

    for r in ret2["results"]["bindings"]:
        print(r)

PREFIX foaf: <http://xmlns.com/foaf/0.1/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> SELECT (xsd:float(xsd:float(?totalLink)/xsd:float(?total)) AS ?result) WHERE { { SELECT (count(DISTINCT ?s) as ?total) WHERE { ?s rdf:type foaf:Person  }  } { SELECT (count(DISTINCT ?s) as ?totalLink) WHERE { ?s rdf:type foaf:Person . ?s owl:sameAs ?o . FILTER (!strstarts(str(?o), 'http://data.bnf')) }  } }
{'result': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#float', 'value': '0.43754723668098449707'}}
