**If you are here only for the queries, scroll down until the next text block!**

In [1]:
import json #json is a built in package of python, more info here: 
# https://docs.python.org/3/library/json.html

def cleandict(s): #this algorithm cleans the "text" strings of the json
  #example of a text string: "partner: Jeff- offspring: rosa, Vera, Sky"
  # result of the cleaning of that: {'offspring': {'Rosa', 'Sky', 'Vera'}, 'partners': {'Jeff'}}
  newdict = dict()
  news = s.replace("-", ";") #first thing we noticed, "-" and ";" are used in the same way
  news = news.replace(" and ", ", ") #same thing for "and" and ", "
  pieces = news.split("; ") #we separate every different relationship
  for el in pieces:
    halfed = el.split(": ") #we separate the name of the relationship from the values
    if halfed[0] != "offspring" and halfed[0][-1] != "s": #we add the plural from for all but offspring
      halfed = [halfed[0]+"s", halfed[1]] 
    multiple = halfed[1].split(", ") #we separate each different value
    newdict[halfed[0]] = set()
    for mult in multiple:
      newdict[halfed[0]].add(mult.title()) #we add to the dictionary every relationship with its values
  return newdict


with open("people.json", "r") as jsonfile: #you have to load "people.json" that you can find here:
  # link in google colab to do this
  cleaned = dict()
  data = json.load(jsonfile) # we can iterate over a json just like we iterate over
  # a dictionary!
  for el in data:
    cleaned[el.title()] = cleandict(data[el]['text']) #we send to the algorithm the 
    # "text" value of the json for each element, which contains the uncleaned
    # string that defines relationships and values. The result would be that
    # for each key in the Json, there will be a dictionary of relationships



In [None]:
#let's check how it looks like now
cleaned

In [None]:
pip install rdflib

In [None]:
from rdflib import URIRef, BNode, Literal, Graph
from rdflib.namespace import CSVW, DC, DCAT, DCTERMS, DOAP, FOAF, ODRL2, ORG, OWL, \
                           PROF, PROV, RDF, RDFS, SDO, SH, SKOS, SOSA, SSN, TIME, \
                           VOID, XMLNS, XSD
from rdflib import Namespace

per = Namespace("http://example.org/element/")   #we use this to declare our prefix in the graph
g = Graph() #we create the graph
g.bind("per", per) #
g.bind("rdf",  RDF)

#we create a dictionary to map from the json names to the properties name, and also their inverse

nameofpropertiesandinverse = {"offspring":{"name":"hasOffspring", "inverse":"hasParent"},
                              "siblings":{"name":"hasSibling", "inverse":"hasSibling"},
                              "partners":{"name":"hasPartner", "inverse":"hasPartner"},
                              "parents":{"name":"hasParent", "inverse":"hasOffspring"},
                              "dogs":{"name":"hasPet", "inverse":"hasOwner"},
                              "snakes":{"name":"hasPet", "inverse":"hasOwner"},
                              "cats":{"name":"hasPet", "inverse":"hasOwner"}}
siblingdone = [] # list used for the hasSibling relationship
for person in cleaned: #the json keys are also the name of the persons
  g.add((URIRef("http://example.org/element/"+person), RDF.type, per.Person))   #this is how we add a triple to our graph, a triple
  # is added by creating a 3-element tuple in a subject, relationship, object structure
  # in this case we added the uri of the person, then rdf:type relationship and the Person class in the ontology.
  g.add((URIRef("http://example.org/element/"+person), per.name, Literal(person)))
  # in this other case we are adding the data property "per:name" to that same person
  # everything that starts with a URIRef is a URI, if you want to add a literal you'd write Literal(anything) <- 
  for relationship in cleaned[person]:
    if relationship == "offspring":
      for sibling1 in cleaned[person]["offspring"]:
        for sibling2 in cleaned[person]["offspring"]:
          if sibling1 != sibling2 and (sibling1, sibling2) not in siblingdone: #the reasoning behind this is that if A hasSibiling B, then B hasSibiling A... so we add all these relationships
            g.add((URIRef("http://example.org/element/"+sibling1), per.hasSibling, URIRef("http://example.org/element/"+sibling2)))
            g.add((URIRef("http://example.org/element/"+sibling1), per.hasSibling, URIRef("http://example.org/element/"+sibling2)))
            siblingdone.append((sibling1, sibling2))
            siblingdone.append((sibling2, sibling1))
    for value in cleaned[person][relationship]:
      name = nameofpropertiesandinverse[relationship]["name"] # we take the name of the relationship from the dictionary we have created
      inverse = nameofpropertiesandinverse[relationship]["inverse"]  # we take the inverse name of the relationship from the dictionary we have created
      g.add((URIRef("http://example.org/element/"+person), URIRef("http://example.org/element/"+name), URIRef("http://example.org/element/"+value)))
      g.add((URIRef("http://example.org/element/"+value), URIRef("http://example.org/element/"+inverse), URIRef("http://example.org/element/"+person)))
      g.add((URIRef("http://example.org/element/"+value), per.name, Literal(value)))
      if relationship == "offspring" or relationship == "sibilings" or relationship == "partners" or relationship == "parents": #if the relationship is one of those, then we add the triple ?value (which is the object of the relationship) rdf:type per:Person
        g.add((URIRef("http://example.org/element/"+value), RDF.type, per.Person))
      elif relationship == "dogs":  # if the relationship is "dogs" the type of the object is per:Dog
        g.add((URIRef("http://example.org/element/"+value), RDF.type, per.Dog))
      elif relationship == "snakes":
        g.add((URIRef("http://example.org/element/"+value), RDF.type, per.Snake)) #and so on...
      elif relationship == "cats":
        g.add((URIRef("http://example.org/element/"+value), RDF.type, per.Cat))

print(g.serialize(format="turtle").decode("utf-8")) #this prints the serialization in turtle in the console
# we can save the final turtle like this
g.serialize(destination = "abox.ttl", format="turtle") # this creates the final turtle

# Queries time!
If you have already created the turtle following the code above you don't need to load it into google colab, otherwise if you have done it manually and if you have validated it in http://ttl.summerofcode.be/ you have to load it in google colab

In [None]:
# if you have already done the cleaning and automatic generation you don't have to install rdflib again, otherwise you 
# have to run this line

pip install rdflib

In [6]:
import rdflib
from rdflib.namespace import CSVW, DC, DCAT, DCTERMS, DOAP, FOAF, ODRL2, ORG, OWL, \
                           PROF, PROV, RDF, RDFS, SDO, SH, SKOS, SOSA, SSN, TIME, \
                           VOID, XMLNS, XSD
from rdflib import Namespace

In [None]:
newg = rdflib.Graph() # we create a new graph

newg.parse("abox.ttl", format="ttl") # we insert the graph we have created into the new graph
#below you write your query (right now there is an example of retrieving all the subjects)
querytext = '''
PREFIX per: <http://example.org/element/> 
SELECT ?s WHERE {?s ?p ?o} LIMIT 10
'''
qres = newg.query(querytext) # we create a result file in which there are the results retrieved by the query

for row in qres:
    print(row) #we print each line of this file to see the retrieved elements

Let's try to transform the competency questions into sparql queries using our ontology:

CQ1) Which people have siblings who have offspring?
CQ2) What is the name of the dogs whose owner has parents?
CQ3) Which partners don’t have offspring?
CQ4) Which offspring have parents who have parents as well?


CQ1



```
PREFIX per: <http://example.org/element/> 
SELECT ?person WHERE { ?person per:hasSibling ?person2 .
?person2 per:hasOffspring ?offspring }

```

CQ1 expected result



```
Cloud
```
CQ2



```
PREFIX per:  <http://example.org/element/> 
SELECT distinct ?name WHERE { ?dog a per:Dog;
per:hasOwner ?person ;
per:name ?name .
?parent per:hasOffspring ?person . }
```

CQ2 expected result

```
“Lulu”
“Fire”
“Grass”
“Water”
```

CQ3

```
PREFIX per:  <http://example.org/element/> 
SELECT distinct ?partner WHERE { ?partner per:hasPartner ?partner2 .
MINUS {?partner per:hasOffspring ?offspring}
MINUS {?partner2 per:hasOffspring ?offspring} }
```

CQ3 expected result

```
Cloud
Sky
```

CQ4

```
PREFIX per:  <http://example.org/element/> 
SELECT distinct ?offspring WHERE { 
  ?offspring per:hasParent ?parent .
  ?parent per:hasParent ?otherparent .
 }
```

CQ4 expected result

```
Diamond
```

Below you will see block of codes for each of the questions with the retrieved answers.


In [None]:
cq1 = '''
PREFIX per: <http://example.org/element/> 
SELECT ?person WHERE { ?person per:hasSibling ?person2 .
?person2 per:hasOffspring ?offspring }'''

cq1res = newg.query(cq1)

for row in cq1res:
    print(row)


In [None]:
cq2 = '''
PREFIX per:  <http://example.org/element/> 
SELECT distinct ?name WHERE { ?dog a per:Dog;
per:hasOwner ?person ;
per:name ?name .
?parent per:hasOffspring ?person . }
'''

cq2res = newg.query(cq2)

for row in cq2res:
  print(row)

In [None]:
cq3 = '''
PREFIX per:  <http://example.org/element/> 
SELECT distinct ?partner WHERE { ?partner per:hasPartner ?partner2 .
MINUS {?partner per:hasOffspring ?offspring}
MINUS {?partner2 per:hasOffspring ?offspring} }
'''

cq3res = newg.query(cq3)

for row in cq3res:
  print(row)

In [None]:
cq4 = '''
PREFIX per:  <http://example.org/element/> 
SELECT distinct ?offspring WHERE { 
  ?offspring per:hasParent ?parent .
  ?parent per:hasParent ?otherparent .
 }
'''

cq4res = newg.query(cq4)

for row in cq4res:
  print(row)

We can now say that our ontology is able to answer the competency questions!