# Example dcat catalog



### Preparation

Import the libraries required to query the RDF dataset

In [1]:
from rdflib import Graph

### Loading the dataset descriptions

In [3]:
# Create a Graph
g_bnf = Graph().parse("datasets/bnf.ttl")
g_bnl = Graph().parse("datasets/bnl.ttl")
g_nls = Graph().parse("datasets/data-foundry-nls.ttl")
g_eu = Graph().parse("datasets/europeana.ttl")
g_harvard = Graph().parse("datasets/harvard.ttl")
g_kb = Graph().parse("datasets/kb.ttl")
g_lc = Graph().parse("datasets/lc.ttl")
g_moma = Graph().parse("datasets/moma.ttl")
g_rijks = Graph().parse("datasets/rijksmuseum.ttl")
g_zeri = Graph().parse("datasets/zeri.ttl")

### Join all the graphs

In [5]:
graph = g_bnf + g_bnl + g_nls + g_eu + g_harvard + g_kb + g_lc + g_moma + g_rijks + g_zeri

### Let's explore the content of the dataset using SPARQL

The following examples describe how the dataset can be queried in a wide variety of forms.

#### Let's retrieve the number of classes in the RDF dataset

In [6]:
print('##### Number of classes:')

# Query the data in g using SPARQL
q = """
    SELECT (count(distinct ?class) as ?classes)
    WHERE {
        ?s a ?class .
    }
"""

# Apply the query to the graph and iterate through results
for r in graph.query(q):
    print(r["classes"])

##### Number of classes:
13


#### What classes are used in the dataset?

In [7]:
print('##### Classes:')

# Query the data in g using SPARQL
q = """
    SELECT distinct ?class
    WHERE {
        ?s a ?class .
    }
"""

# Apply the query to the graph and iterate through results
for r in graph.query(q):
    print(r["class"])

##### Classes:
http://www.w3.org/ns/prov#Plan
http://www.w3.org/ns/dcat#Distribution
http://www.w3.org/ns/prov#Person
http://www.w3.org/ns/prov#Agent
http://www.w3.org/ns/dcat#Dataset
http://xmlns.com/foaf/0.1/Organization
http://purl.org/dc/terms/PeriodOfTime
http://www.w3.org/ns/prov#Organization
http://www.w3.org/2004/02/skos/core#Concept
http://www.w3.org/ns/prov#Activity
http://www.w3.org/ns/dcat#DataService
http://www.w3.org/ns/prov#Entity
http://www.w3.org/ns/prov#Association


#### How many properties are used in the dataset?

In [8]:
print('##### Number of properties:')

# Query the data in g using SPARQL
q = """
    SELECT (count(distinct ?prop) as ?properties)
    WHERE {
        ?s ?prop ?o .
    }
"""

# Apply the query to the graph and iterate through results
for r in graph.query(q):
    print(r["properties"])

##### Number of properties:
52


#### What are the properties used in the dataset?

In [9]:
print('##### Properties:')

# Query the data in g using SPARQL
q = """
    SELECT distinct ?prop
    WHERE {
        ?s ?prop ?o .
    }
"""

# Apply the query to the graph and iterate through results
for r in graph.query(q):
    print(r["prop"])

##### Properties:
http://www.w3.org/ns/dcat#landingPage
http://purl.org/dc/terms/bibliographicCitation
http://www.w3.org/ns/dcat#mediaType
http://www.w3.org/ns/prov#startedAtTime
http://www.w3.org/ns/dcat#compressFormat
http://www.w3.org/ns/dcat#distribution
http://www.w3.org/ns/prov#wasGeneratedBy
http://www.w3.org/ns/dcat#byteSize
http://www.w3.org/ns/prov#used
http://www.w3.org/ns/dcat#downloadURL
http://purl.org/dc/terms/license
http://www.w3.org/1999/02/22-rdf-syntax-ns#type
http://purl.org/dc/terms/type
http://www.w3.org/2000/01/rdf-schema#label
http://www.wikidata.org/prop/direct/P7014
http://www.w3.org/ns/dcat#keyword
http://xmlns.com/foaf/0.1/givenName
http://www.w3.org/ns/dcat#endDate
http://www.w3.org/2002/07/owl#sameAs
http://purl.org/dc/terms/hasPart
http://purl.org/dc/terms/conformsTo
http://www.w3.org/ns/dcat#startDate
http://www.w3.org/ns/dcat#contactPoint
http://xmlns.com/foaf/0.1/mbox
http://purl.org/dc/terms/title
http://purl.org/dc/terms/isReferencedBy
http://purl.o

#### How many external links are included in the dataset?

In [10]:
print('##### Number of owl:sameAs properties:')
    
# Query the data in g using SPARQL
q = """
    SELECT (count(distinct ?sameAs) as ?total)
    WHERE {
        ?p owl:sameAs ?sameAs .
    }
"""

# Apply the query to the graph and iterate through results
for r in graph.query(q):
    print(r["total"])

##### Number of owl:sameAs properties:
21


#### Let's explore the external links (owl:sameAs)

In [11]:
print('##### owl:sameAs properties:')
    
# Query the data in g using SPARQL
q = """
    SELECT distinct ?sameAs
    WHERE {
        ?p owl:sameAs ?sameAs .
    }
"""

# Apply the query to the graph and iterate through results
for r in graph.query(q):
    print(r["sameAs"])

##### owl:sameAs properties:
https://www.wikidata.org/wiki/Q234110
https://www.wikidata.org/wiki/Q1670994
https://www.wikidata.org/wiki/Q1526131
https://www.wikidata.org/wiki/Q3783572
https://www.wikidata.org/wiki/Q131454
http://www.wikidata.org/entity/Q188740
https://viaf.org/viaf/155463906/
https://viaf.org/viaf/144524678/
https://www.wikidata.org/wiki/Q856651
https://orcid.org/0000-0002-8715-0129
https://www.wikidata.org/wiki/Q190804
https://viaf.org/viaf/102145970121932251120/
https://www.wikidata.org/wiki/Q193563
https://viaf.org/viaf/132075128/
https://viaf.org/viaf/151962300/
https://viaf.org/viaf/128460224/
https://viaf.org/viaf/3061147967353484200005/
https://viaf.org/viaf/137156173/
http://www.wikidata.org/entity/Q50841422
https://viaf.org/viaf/159624082/
https://www.wikidata.org/wiki/Q108481932


#### Let's compute the number of triples

In [12]:
print('##### Number of triples:')
    
# Query the data in g using SPARQL
q = """
    SELECT (COUNT(*) as ?triples) 
    WHERE { ?s ?p ?o } 
"""

# Apply the query to the graph and iterate through results
for r in graph.query(q):
    print(r["triples"])

##### Number of triples:
620


#### What is the number of organizations?

In [13]:
print('##### Number of organizations:')
    
# Query the data in g using SPARQL
q = """
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    SELECT (COUNT(distinct ?s) as ?orgs) 
    WHERE {?s a foaf:Organization} 
"""

# Apply the query to the graph and iterate through results
for r in graph.query(q):
    print(r["orgs"])

##### Number of organizations:
10
