# Example dcat catalog



### Preparation

Import the libraries required to query the RDF dataset

In [1]:
from rdflib import Graph

### Loading the RDF dataset

In [7]:
# Create a Graph
g = Graph().parse("data-foundry-nls.ttl")

### Let's explore the content of the dataset using SPARQL

The following examples describe how the dataset can be queried in a wide variety of forms.

#### Let's retrieve the number of classes in the RDF dataset

In [8]:
print('##### Number of classes:')

# Query the data in g using SPARQL
q = """
    SELECT (count(distinct ?class) as ?classes)
    WHERE {
        ?s a ?class .
    }
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["classes"])

##### Number of classes:
6


#### What classes are used in the dataset?

In [9]:
print('##### Classes:')

# Query the data in g using SPARQL
q = """
    SELECT distinct ?class
    WHERE {
        ?s a ?class .
    }
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["class"])

##### Classes:
http://www.w3.org/ns/dcat#Dataset
http://www.w3.org/ns/prov#Activity
http://xmlns.com/foaf/0.1/Organization
http://www.w3.org/ns/prov#Agent
http://www.w3.org/ns/prov#Organization
http://www.w3.org/ns/dcat#Distribution


#### How many properties are used in the dataset?

In [10]:
print('##### Number of properties:')

# Query the data in g using SPARQL
q = """
    SELECT (count(distinct ?prop) as ?properties)
    WHERE {
        ?s ?prop ?o .
    }
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["properties"])

##### Number of properties:
29


#### What are the properties used in the dataset?

In [11]:
print('##### Properties:')

# Query the data in g using SPARQL
q = """
    SELECT distinct ?prop
    WHERE {
        ?s ?prop ?o .
    }
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["prop"])

##### Properties:
http://www.w3.org/2002/07/owl#sameAs
http://purl.org/dc/terms/title
http://www.w3.org/2000/01/rdf-schema#seeAlso
http://purl.org/dc/terms/description
http://www.w3.org/ns/dcat#distribution
http://www.w3.org/ns/dcat#mediaType
http://www.w3.org/ns/dcat#compressFormat
http://purl.org/dc/terms/bibliographicCitation
http://www.w3.org/ns/prov#used
http://xmlns.com/foaf/0.1/mbox
http://www.w3.org/1999/02/22-rdf-syntax-ns#type
http://www.w3.org/ns/dcat#downloadURL
http://www.w3.org/ns/dcat#byteSize
http://www.w3.org/ns/dcat#keyword
http://www.w3.org/ns/dcat#contactPoint
http://www.w3.org/ns/prov#startedAtTime
http://www.w3.org/ns/prov#wasGeneratedBy
http://purl.org/dc/terms/language
http://purl.org/dc/terms/conformsTo
http://www.w3.org/ns/prov#generated
http://www.w3.org/ns/dcat#landingPage
http://purl.org/dc/terms/isReferencedBy
http://xmlns.com/foaf/0.1/homePage
http://www.w3.org/2000/01/rdf-schema#label
http://purl.org/dc/terms/publisher
http://xmlns.com/foaf/0.1/givenName

#### How many external links are included in the dataset?

In [12]:
print('##### Number of owl:sameAs properties:')
    
# Query the data in g using SPARQL
q = """
    SELECT (count(distinct ?sameAs) as ?total)
    WHERE {
        ?p owl:sameAs ?sameAs .
    }
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["total"])

##### Number of owl:sameAs properties:
3


#### Let's explore the external links (owl:sameAs)

In [13]:
print('##### owl:sameAs properties:')
    
# Query the data in g using SPARQL
q = """
    SELECT distinct ?sameAs
    WHERE {
        ?p owl:sameAs ?sameAs .
    }
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["sameAs"])

##### owl:sameAs properties:
http://www.wikidata.org/entity/Q50841422
https://www.wikidata.org/wiki/Q1670994
https://viaf.org/viaf/132075128/


#### Let's compute the number of triples

In [14]:
print('##### Number of triples:')
    
# Query the data in g using SPARQL
q = """
    SELECT (COUNT(*) as ?triples) 
    WHERE { ?s ?p ?o } 
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["triples"])

##### Number of triples:
56


#### What is the number of organizations?

In [19]:
print('##### Number of organizations:')
    
# Query the data in g using SPARQL
q = """
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    SELECT (COUNT(distinct ?s) as ?orgs) 
    WHERE {?s a foaf:Organization} 
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["orgs"])

##### Number of organizations:
1


In [19]:
print('##### group by authors:')
    
# Query the data in g using SPARQL
q = """
    SELECT ?author (COUNT(distinct ?s) as ?count) 
    WHERE { ?s schema:author ?author} 
    GROUP BY ?author
    HAVING (count(distinct ?s) > 20)
    ORDER BY DESC(?count)
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(str(r["author"]) + " " + str(r["count"]))

##### group by authors:
https://example.org/author/scottishscreencollection 699
https://example.org/author/grampiantelevision 499
https://example.org/organization/templarfilms 318
https://example.org/author/edinburghcineandvideosocietyecvs 293
https://example.org/author/scottishballet 171
https://example.org/organization/filmsofscotlandcommittee 166
https://example.org/author/scottishindependencereferendumcollection2014 157
https://example.org/author/scottisheducationalfilmassociationsefa 148
https://example.org/organization/campbellharperproductions 130
https://example.org/organization/scottishamateurfilmfestivalsaff 117
https://example.org/author/group5 85
https://example.org/author/russellstanleylivingstone 63
https://example.org/author/cocozzaenrico 62
https://example.org/author/marshallfrankm 59
https://example.org/author/elderjohnc 58
https://example.org/author/mcconnelledwardeddie 54
https://example.org/author/colonelarthureirvineandrichardirvinecollection 50
https://example.org

#### How many places are in the dataset?

In [20]:
print('##### Number of places:')
    
# Query the data in g using SPARQL
q = """
    PREFIX dcterms: <http://purl.org/dc/terms/>
    SELECT (COUNT(distinct ?place) as ?places) 
    WHERE { ?s dcterms:spatial ?place} 
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["places"])

##### Number of places:
44


#### What are the most relevant places?

In [21]:
print('##### group by place:')
    
# Query the data in g using SPARQL
q = """
    PREFIX dcterms: <http://purl.org/dc/terms/> 
    SELECT ?place (COUNT(distinct ?s) as ?count) 
    WHERE { ?s dcterms:spatial ?place} 
    GROUP BY ?place
    HAVING (count(distinct ?s) > 20)
    ORDER BY DESC(?count)
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(str(r["place"]) + " " + str(r["count"]))    

##### group by place:
https://example.org/location/glasgow 976
https://example.org/location/edinburgh 686
https://example.org/location/renfrewshire 288
https://example.org/location/ayrshire 288
https://example.org/location/lanarkshire 271
https://example.org/location/fife 254
https://example.org/location/aberdeen 248
https://example.org/location/argyllshire 230
https://example.org/location/aberdeenshire 216
https://example.org/location/perth 201
https://example.org/location/dunbartonshire 194
https://example.org/location/dundee 142
https://example.org/location/highlandsthe 140
https://example.org/location/invernesshire 134
https://example.org/location/westlothian 128
https://example.org/location/innerhebrides 124
https://example.org/location/outerhebrides 119
https://example.org/location/dumfriesshire 118
https://example.org/location/orkneyislands 117
https://example.org/location/borders 108
https://example.org/location/stirlingshire 98
https://example.org/location/shetlandislands 86
h