In [1]:
from rdflib import Graph

# Create a Graph
g = Graph().parse("../rdf/movingImageArchiveEnriched.rdf")

### Number of classes

In [3]:
print('##### Number of classes:')

# Query the data in g using SPARQL
q = """
    SELECT (count(distinct ?class) as ?classes)
    WHERE {
        ?s a ?class .
    }
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["classes"])

##### Number of classes:
7


### Classes

In [4]:
print('##### Classes:')

# Query the data in g using SPARQL
q = """
    SELECT distinct ?class
    WHERE {
        ?s a ?class .
    }
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["class"])

##### Classes:
http://schema.org/VideoObject
http://www.europeana.eu/schemas/edm/Place
http://schema.org/Place
http://schema.org/Organization
http://xmlns.com/foaf/0.1/Organization
http://schema.org/Person
http://xmlns.com/foaf/0.1/Person


### Number of properties

In [5]:
print('##### Number of properties:')

# Query the data in g using SPARQL
q = """
    SELECT (count(distinct ?prop) as ?properties)
    WHERE {
        ?s ?prop ?o .
    }
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["properties"])

##### Number of properties:
21


### Properties

In [6]:
print('##### Properties:')

# Query the data in g using SPARQL
q = """
    SELECT distinct ?prop
    WHERE {
        ?s ?prop ?o .
    }
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["prop"])

##### Properties:
http://schema.org/abstract
http://schema.org/duration
http://schema.org/datePublished
http://schema.org/identifier
http://schema.org/name
http://www.w3.org/1999/02/22-rdf-syntax-ns#type
http://schema.org/sourceOrganization
http://schema.org/videoQuality
http://purl.org/dc/elements/1.1/subject
http://schema.org/author
http://schema.org/creditText
http://purl.org/dc/terms/spatial
http://schema.org/creator
http://www.w3.org/2003/01/geo/wgs84_pos#lat
http://schema.org/givenName
http://www.w3.org/2003/01/geo/wgs84_pos#long
http://www.w3.org/2002/07/owl#sameAs
http://www.w3.org/2004/02/skos/core#prefLabel
http://xmlns.com/foaf/0.1/name
http://schema.org/url
http://schema.org/logo


### Number of external links

In [7]:
print('##### Number of owl:sameAs properties:')
    
# Query the data in g using SPARQL
q = """
    SELECT (count(distinct ?sameAs) as ?total)
    WHERE {
        ?p owl:sameAs ?sameAs .
    }
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["total"])

##### Number of owl:sameAs properties:
75


### Number of triples

In [8]:
print('##### Number of triples:')
    
# Query the data in g using SPARQL
q = """
    SELECT (COUNT(*) as ?triples) 
    WHERE { ?s ?p ?o } 
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["triples"])

##### Number of triples:
183065


### Number of authors

In [9]:
print('##### Number of authors:')
    
# Query the data in g using SPARQL
q = """
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    SELECT (COUNT(*) as ?authors) 
    WHERE { ?s ?p ?o . ?s a foaf:Person} 
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["authors"])

##### Number of authors:
112


### Authors

In [9]:
print('##### List of authors:')
    
# Query the data in g using SPARQL
q = """
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    SELECT ?s ?name 
    WHERE {?s a foaf:Person . ?s foaf:name ?name} 
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["s"] + " " + r["name"])

http://example.org/author/walkerwilliam  does not look like a valid URI, trying to serialize this will break.
http://example.org/author/walkerwilliam WALKER, William does not look like a valid URI, trying to serialize this will break.
http://example.org/author/cooperhenry  does not look like a valid URI, trying to serialize this will break.
http://example.org/author/cooperhenry COOPER, Henry does not look like a valid URI, trying to serialize this will break.
http://example.org/author/irvinemalcolm  does not look like a valid URI, trying to serialize this will break.
http://example.org/author/irvinemalcolm IRVINE, Malcolm does not look like a valid URI, trying to serialize this will break.
http://example.org/author/elderjohnc  does not look like a valid URI, trying to serialize this will break.
http://example.org/author/elderjohnc ELDER, John C does not look like a valid URI, trying to serialize this will break.
http://example.org/author/taggartnan  does not look like a valid URI, tryi

##### List of authors:
http://example.org/author/walkerwilliam WALKER, William
http://example.org/author/cooperhenry COOPER, Henry
http://example.org/author/irvinemalcolm IRVINE, Malcolm
http://example.org/author/elderjohnc ELDER, John C
http://example.org/author/taggartnan Taggart, Nan
http://example.org/author/loorijjanpaulusjack LOORIJ, Jan Paulus (Jack)
http://example.org/author/birrellharry Birrell, Harry
http://example.org/author/grayalexanderjoe GRAY, Alexander ‘Joe’
http://example.org/author/cocozzaenrico COCOZZA, Enrico
http://example.org/author/harperalanjames HARPER, Alan James
http://example.org/author/dicksonlouis DICKSON, Louis
http://example.org/author/grigormurray Grigor, Murray
http://example.org/author/jayronaldl JAY, Ronald L.
http://example.org/author/hutchisonisobelwylie HUTCHISON, Isobel Wylie
http://example.org/author/gilbertsonjennyisabelneebrown GILBERTSON, Jenny Isabel (nee Brown)
http://example.org/author/nairnjamess NAIRN, James S.
http://example.org/author/

### Organizations

In [10]:
print('##### Number of organizations:')
    
# Query the data in g using SPARQL
q = """
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    SELECT (COUNT(*) as ?orgs) 
    WHERE { ?s ?p ?o . ?s a foaf:Organization} 
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["orgs"])

##### Number of organizations:
40


### List of Organizations

In [11]:
print('##### List of organizations:')
    
# Query the data in g using SPARQL
q = """
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    SELECT ?s ?name 
    WHERE {?s a foaf:Organization . ?s foaf:name ?name} 
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["s"] + " " + r["name"])

http://example.org/organisation/ifascotland  does not look like a valid URI, trying to serialize this will break.
http://example.org/organisation/ifascotland IFA Scotland does not look like a valid URI, trying to serialize this will break.
http://example.org/organisation/scottishfilms/thamesandclydeproductions  does not look like a valid URI, trying to serialize this will break.
http://example.org/organisation/scottishfilms/thamesandclydeproductions Scottish Films / Thames and Clyde Productions does not look like a valid URI, trying to serialize this will break.
http://example.org/organisation/scottishamateurfilmfestivalsaff  does not look like a valid URI, trying to serialize this will break.
http://example.org/organisation/scottishamateurfilmfestivalsaff Scottish Amateur Film Festival (SAFF) does not look like a valid URI, trying to serialize this will break.
http://example.org/organisation/templarfilms  does not look like a valid URI, trying to serialize this will break.
http://exam

##### List of organizations:
http://example.org/organisation/ifascotland IFA Scotland
http://example.org/organisation/scottishfilms/thamesandclydeproductions Scottish Films / Thames and Clyde Productions
http://example.org/organisation/scottishamateurfilmfestivalsaff Scottish Amateur Film Festival (SAFF)
http://example.org/organisation/templarfilms Templar Films
http://example.org/organisation/peliculafilms Pelicula Films
http://example.org/organisation/elderdalrympleproductions/elderfilms Elder Dalrymple Productions / Elder Films
http://example.org/organisation/treefilms Tree Films
http://example.org/organisation/greensfilmservice/greenstopicalproductions Green’s Film Service / Green’s Topical Productions
http://example.org/organisation/filmsofscotlandcommittee Films of Scotland Committee
http://example.org/organisation/campbellharperproductions Campbell Harper Productions


### Subjects

In [13]:
print('##### Number of subjects:')
    
# Query the data in g using SPARQL
q = """
    SELECT (COUNT(distinct ?subject) as ?subjects) 
    WHERE { ?s dc:subject ?subject} 
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["subjects"])

##### Number of subjects:
2232


### List of subjects

In [12]:
print('##### List of subjects:')
    
# Query the data in g using SPARQL
q = """
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    SELECT ?subject 
    WHERE { ?s dc:subject ?subject} 
    limit 10
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(r["subject"])  #+ " " + r["name"])

##### List of subjects:
Leisure and Recreation -- Tourism and Travel -- Water and Waterways
Leisure and Recreation -- Tourism and Travel -- Water and Waterways
Leisure and Recreation -- Tourism and Travel -- Water and Waterways
Leisure and Recreation -- Tourism and Travel -- Water and Waterways
Leisure and Recreation -- Tourism and Travel -- Water and Waterways
Agriculture -- Media, Communication and the Creative Industries
Agriculture -- Media, Communication and the Creative Industries
Agriculture -- Media, Communication and the Creative Industries
Arts and Crafts
Arts and Crafts


### Group by subject

In [27]:
print('##### group by subjects:')
    
# Query the data in g using SPARQL
q = """
    SELECT ?subject (COUNT(distinct ?s) as ?count) 
    WHERE { ?s dc:subject ?subject} 
    GROUP BY ?subject
    HAVING (count(distinct ?s) > 50)
    ORDER BY DESC(?count)
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(str(r["subject"]) + " " + str(r["count"]))

##### group by subjects:
Gaelic 1935
Leisure and Recreation 220
Sporting Activities 204
Ships and Shipping 160
Transport 124
Employment, Industry and Industrial Relations 98
Celebrations, Traditions and Customs 89
Tourism and Travel 88
Media, Communication and the Creative Industries 70
Education 69
Religion 64
Arts and Crafts 63
Leisure and Recreation -- Sporting Activities 59
Agriculture 59
Construction and Engineering 56
Politics 56
Food and Drink -- Media, Communication and the Creative Industries 51


### Group by author

In [36]:
print('##### group by authors:')
    
# Query the data in g using SPARQL
q = """
    SELECT ?author (COUNT(distinct ?s) as ?count) 
    WHERE { ?s schema:author ?author} 
    GROUP BY ?author
    HAVING (count(distinct ?s) > 20)
    ORDER BY DESC(?count)
"""

# Apply the query to the graph and iterate through results
for r in g.query(q):
    print(str(r["author"]) + " " + str(r["count"]))

##### group by authors:
http://example.org/organisation/templarfilms 304
http://example.org/organisation/filmsofscotlandcommittee 146
http://example.org/organisation/campbellharperproductions 130
http://example.org/organisation/scottishamateurfilmfestivalsaff 116
http://example.org/organisation/scottishfilms/thamesandclydeproductions 41
http://example.org/organisation/greensfilmservice/greenstopicalproductions 37
http://example.org/organisation/elderdalrympleproductions/elderfilms 36
