In [25]:
from rdflib import Graph

# Create a Graph
#g = Graph().parse("../rdf/movingImageArchiveEnriched.rdf")
g = Graph().parse("../rdf/datasetEnriched.rdf")

### Number of classes

In [26]:
print('##### Number of classes:')

# Query the data in g using SPARQL
q = """
    SELECT (count(distinct ?class) as ?classes)
    WHERE {
        ?s a ?class .
    }
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["classes"])

##### Number of classes:
7


### Classes

In [27]:
print('##### Classes:')

# Query the data in g using SPARQL
q = """
    SELECT distinct ?class
    WHERE {
        ?s a ?class .
    }
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["class"])

##### Classes:
https://schema.org/VideoObject
http://www.europeana.eu/schemas/edm/Place
https://schema.org/Place
http://xmlns.com/foaf/0.1/Person
https://schema.org/Person
http://xmlns.com/foaf/0.1/Organization
https://schema.org/Organization


### Number of properties

In [28]:
print('##### Number of properties:')

# Query the data in g using SPARQL
q = """
    SELECT (count(distinct ?prop) as ?properties)
    WHERE {
        ?s ?prop ?o .
    }
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["properties"])

##### Number of properties:
23


### Properties

In [29]:
print('##### Properties:')

# Query the data in g using SPARQL
q = """
    SELECT distinct ?prop
    WHERE {
        ?s ?prop ?o .
    }
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["prop"])

##### Properties:
https://schema.org/name
https://schema.org/duration
http://purl.org/dc/terms/spatial
https://schema.org/abstract
https://schema.org/videoQuality
https://schema.org/datePublished
http://www.w3.org/1999/02/22-rdf-syntax-ns#type
https://schema.org/identifier
http://purl.org/dc/elements/1.1/identifier
http://purl.org/dc/elements/1.1/subject
http://purl.org/dc/elements/1.1/date
https://schema.org/sourceOrganization
http://purl.org/dc/elements/1.1/title
https://schema.org/author
https://schema.org/creditText
https://schema.org/thumbnail
http://www.w3.org/2004/02/skos/core#prefLabel
http://xmlns.com/foaf/0.1/name
http://www.w3.org/2002/07/owl#sameAs
http://www.w3.org/2003/01/geo/wgs84_pos#lat
http://www.w3.org/2003/01/geo/wgs84_pos#long
https://schema.org/url
https://schema.org/logo


### Number of external links

In [30]:
print('##### Number of owl:sameAs properties:')
    
# Query the data in g using SPARQL
q = """
    SELECT (count(distinct ?sameAs) as ?total)
    WHERE {
        ?p owl:sameAs ?sameAs .
    }
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["total"])

##### Number of owl:sameAs properties:
75


### External links

In [31]:
print('##### owl:sameAs properties:')
    
# Query the data in g using SPARQL
q = """
    SELECT distinct ?sameAs
    WHERE {
        ?p owl:sameAs ?sameAs .
    }
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["sameAs"])

##### owl:sameAs properties:
https://www.wikidata.org/wiki/Q81052
https://www.geonames.org/2646177
https://www.wikidata.org/wiki/Q201149
https://www.geonames.org/2649469
https://www.wikidata.org/wiki/Q123709
https://www.geonames.org/2650752
https://www.wikidata.org/wiki/Q1147435
https://www.geonames.org/2654168
https://www.wikidata.org/wiki/Q189912
https://www.geonames.org/2657830
https://www.wikidata.org/wiki/Q106652
https://www.geonames.org/2646943
https://www.wikidata.org/wiki/Q23436
https://www.geonames.org/2650225
https://www.wikidata.org/wiki/Q47134
https://www.geonames.org/2638010
https://www.wikidata.org/wiki/Q664892
https://www.geonames.org/2639033
https://www.wikidata.org/wiki/Q207268
https://www.geonames.org/2652975
https://www.wikidata.org/wiki/Q2421
https://www.geonames.org/2649177
https://www.wikidata.org/wiki/Q211091
https://www.geonames.org/2639494
https://www.wikidata.org/wiki/Q1247384
https://www.geonames.org/2650795
https://www.wikidata.org/wiki/Q980084
https://www.g

### Number of triples

In [32]:
print('##### Number of triples:')
    
# Query the data in g using SPARQL
q = """
    SELECT (COUNT(*) as ?triples) 
    WHERE { ?s ?p ?o } 
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["triples"])

##### Number of triples:
261061


### Number of videos

In [33]:
print('##### Number of videos:')
    
# Query the data in g using SPARQL
q = """
    SELECT (COUNT(distinct ?s) as ?videos) 
    WHERE { ?s a schema:VideoObject} 
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["videos"])

##### Number of videos:
20608


### Number of authors

In [34]:
print('##### Number of authors:')
    
# Query the data in g using SPARQL
q = """
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    SELECT (COUNT(*) as ?authors) 
    WHERE { ?s ?p ?o . ?s a foaf:Person} 
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["authors"])

##### Number of authors:
140


### Authors

In [35]:
print('##### List of authors:')
    
# Query the data in g using SPARQL
q = """
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    SELECT ?s ?name 
    WHERE {?s a foaf:Person . ?s foaf:name ?name} 
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["s"] + " " + r["name"])

https://example.org/author/mickeldouglas  does not look like a valid URI, trying to serialize this will break.
https://example.org/author/mickeldouglas MICKEL, Douglas does not look like a valid URI, trying to serialize this will break.
https://example.org/author/ogamfilms  does not look like a valid URI, trying to serialize this will break.
https://example.org/author/ogamfilms Ogam Films / Oscar Marzaroli does not look like a valid URI, trying to serialize this will break.
https://example.org/author/nairnjamess  does not look like a valid URI, trying to serialize this will break.
https://example.org/author/nairnjamess NAIRN, James S. does not look like a valid URI, trying to serialize this will break.
https://example.org/author/macleanwilliamjmac  does not look like a valid URI, trying to serialize this will break.
https://example.org/author/macleanwilliamjmac MacLEAN, William J. ‘Mac’ does not look like a valid URI, trying to serialize this will break.
https://example.org/author/grig

##### List of authors:
https://example.org/author/mickeldouglas MICKEL, Douglas
https://example.org/author/ogamfilms Ogam Films / Oscar Marzaroli
https://example.org/author/nairnjamess NAIRN, James S.
https://example.org/author/macleanwilliamjmac MacLEAN, William J. ‘Mac’
https://example.org/author/grigormurray Grigor, Murray
https://example.org/author/russellstanleylivingstone RUSSELL, Stanley Livingstone
https://example.org/author/jayronaldl JAY, Ronald L.
https://example.org/author/colonelarthureirvineandrichardirvinecollection Colonel Arthur E Irvine and Richard Irvine Collection
https://example.org/author/cooperhenry COOPER, Henry
https://example.org/author/taitmargaret TAIT, Margaret
https://example.org/author/walkerwilliam WALKER, William
https://example.org/author/gillespiejamesstirling GILLESPIE, James Stirling
https://example.org/author/harperalanjames HARPER, Alan James
https://example.org/author/riddell-blackrobert RIDDELL-BLACK, Robert
https://example.org/author/taggartnan

### Organizations

In [36]:
print('##### Number of organizations:')
    
# Query the data in g using SPARQL
q = """
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    SELECT (COUNT(*) as ?orgs) 
    WHERE { ?s ?p ?o . ?s a foaf:Organization} 
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["orgs"])

##### Number of organizations:
50


### How a resource typed as Organization is defined?

In [37]:
print('##### Check the properties used to define an organization:')
    
# Query the data in g using SPARQL
q = """
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    SELECT *
    WHERE {<http://example.org/organisation/ifascotland> ?p ?o} 
    limit 40
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["p"] + " " + r["o"])

##### Check the properties used to define an organization:


In [38]:
print('##### List of organizations:')
    
# Query the data in g using SPARQL
q = """
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    SELECT ?s ?name 
    WHERE {?s a foaf:Organization . ?s foaf:name ?name} 
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["s"] + " " + r["name"])

https://example.org/organization/greensfilmservice  does not look like a valid URI, trying to serialize this will break.
https://example.org/organization/greensfilmservice Green’s Film Service / Green’s Topical Productions does not look like a valid URI, trying to serialize this will break.
https://example.org/organization/campbellharperproductions  does not look like a valid URI, trying to serialize this will break.
https://example.org/organization/campbellharperproductions Campbell Harper Productions does not look like a valid URI, trying to serialize this will break.
https://example.org/organization/filmsofscotlandcommittee  does not look like a valid URI, trying to serialize this will break.
https://example.org/organization/filmsofscotlandcommittee Films of Scotland Committee does not look like a valid URI, trying to serialize this will break.
https://example.org/organization/ifascotland  does not look like a valid URI, trying to serialize this will break.
https://example.org/organ

##### List of organizations:
https://example.org/organization/greensfilmservice Green’s Film Service / Green’s Topical Productions
https://example.org/organization/campbellharperproductions Campbell Harper Productions
https://example.org/organization/filmsofscotlandcommittee Films of Scotland Committee
https://example.org/organization/ifascotland IFA Scotland
https://example.org/organization/scottishamateurfilmfestivalsaff Scottish Amateur Film Festival (SAFF)
https://example.org/organization/treefilms Tree Films
https://example.org/organization/peliculafilms Pelicula Films
https://example.org/organization/templarfilms Templar Films
https://example.org/organization/scottishfilms Scottish Films / Thames and Clyde Productions
https://example.org/organization/elderdalrympleproductions Elder Dalrymple Productions / Elder Films


### Subjects

In [39]:
print('##### Number of subjects:')
    
# Query the data in g using SPARQL
q = """
    SELECT (COUNT(distinct ?subject) as ?subjects) 
    WHERE { ?s dc:subject ?subject} 
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["subjects"])

##### Number of subjects:
91


### List of subjects

In [40]:
print('##### List of subjects:')
    
# Query the data in g using SPARQL
q = """
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    SELECT distinct ?subject 
    WHERE { ?s dc:subject ?subject} 
    limit 10
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["subject"])  #+ " " + r["name"])

##### List of subjects:
Transport
Celts and Celtic Culture
Leisure and Recreation
Music
Sporting Activities
Gaelic
Carriages
Employment, Industry and Industrial Relations
Fish Gutting
Fish Markets


### Group by subject

In [41]:
print('##### group by subjects:')
    
# Query the data in g using SPARQL
q = """
    SELECT ?subject (COUNT(distinct ?s) as ?count) 
    WHERE { ?s dc:subject ?subject} 
    GROUP BY ?subject
    HAVING (count(distinct ?s) > 50)
    ORDER BY DESC(?count)
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(str(r["subject"]) + " " + str(r["count"]))

##### group by subjects:
Gaelic 1943
Leisure and Recreation 1145
Transport 732
Employment, Industry and Industrial Relations 730
Sporting Activities 625
Ships and Shipping 567
Education 525
Celebrations, Traditions and Customs 497
Tourism and Travel 459
Media, Communication and the Creative Industries 441
Children and Infants 426
Ceremonies 420
Arts and Crafts 386
Agriculture 379
Construction and Engineering 363
Fish and Fishing 311
Emotions, Attitudes and Behaviour 299
Food and Drink 299
Art and Artists, general 289
Landscapes and Seascapes 284
Environment 274
Water and Waterways 255
Home Life 239
Military, the 236
Animals 234
Politics 228
Science and Technology 223
Power Resources 223
Religion 221
Healthcare 212
War 210
Architecture and Buildings 204
Birds 164
Royalty 157
Housing and Living Conditions 145
Music 135
Fishing Boats 131
Buses and Coaches, general 119
Carriages 106
Ferries 95
Local Government 88
Forth River 85
Crime, Punishment and Law Enforcement 69
Institutional Care 63

### Group by author

In [42]:
print('##### group by authors:')
    
# Query the data in g using SPARQL
q = """
    SELECT ?author (COUNT(distinct ?s) as ?count) 
    WHERE { ?s schema:author ?author} 
    GROUP BY ?author
    HAVING (count(distinct ?s) > 20)
    ORDER BY DESC(?count)
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(str(r["author"]) + " " + str(r["count"]))

##### group by authors:
https://example.org/organization/templarfilms 318
https://example.org/organization/filmsofscotlandcommittee 166
https://example.org/organization/campbellharperproductions 130
https://example.org/organization/scottishamateurfilmfestivalsaff 117
https://example.org/author/russellstanleylivingstone 63
https://example.org/author/cocozzaenrico 62
https://example.org/author/elderjohnc 58
https://example.org/author/colonelarthureirvineandrichardirvinecollection 50
https://example.org/author/nairnjamess 49
https://example.org/author/taitmargaret 48
https://example.org/author/glasgowcorporation 46
https://example.org/organization/scottishfilms 45
https://example.org/author/annandlouise 39
https://example.org/author/harperalanjames 39
https://example.org/author/taggartnan 39
https://example.org/author/cooperhenry 38
https://example.org/organization/ifascotland 37
https://example.org/author/jayronaldl 37
https://example.org/organization/greensfilmservice 37
https://example

### Places

In [43]:
print('##### Number of places:')
    
# Query the data in g using SPARQL
q = """
    PREFIX dcterms: <http://purl.org/dc/terms/>
    SELECT (COUNT(distinct ?place) as ?places) 
    WHERE { ?s dcterms:spatial ?place} 
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["places"])

##### Number of places:
44


### Group by Place

In [44]:
print('##### group by place:')
    
# Query the data in g using SPARQL
q = """
    PREFIX dcterms: <http://purl.org/dc/terms/> 
    SELECT ?place (COUNT(distinct ?s) as ?count) 
    WHERE { ?s dcterms:spatial ?place} 
    GROUP BY ?place
    HAVING (count(distinct ?s) > 20)
    ORDER BY DESC(?count)
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(str(r["place"]) + " " + str(r["count"]))    

##### group by place:
https://example.org/location/glasgow 976
https://example.org/location/edinburgh 686
https://example.org/location/ayrshire 288
https://example.org/location/renfrewshire 288
https://example.org/location/lanarkshire 271
https://example.org/location/fife 254
https://example.org/location/aberdeen 248
https://example.org/location/argyllshire 230
https://example.org/location/aberdeenshire 216
https://example.org/location/perth 201
https://example.org/location/dunbartonshire 194
https://example.org/location/dundee 142
https://example.org/location/highlandsthe 140
https://example.org/location/invernesshire 134
https://example.org/location/westlothian 128
https://example.org/location/innerhebrides 124
https://example.org/location/outerhebrides 119
https://example.org/location/dumfriesshire 118
https://example.org/location/orkneyislands 117
https://example.org/location/borders 108
https://example.org/location/stirlingshire 98
https://example.org/location/shetlandislands 86
h