In [1]:
from rdflib import Graph, Namespace, RDF, URIRef, RDFS
from rdflib.namespace import OWL, SKOS

In [2]:
def create_namespace(graph, namespace, prefix):
    """
    Binds a namespace to a given graph.
    Args:
        - graph: graph object
        - namespace: uri of the namespace to be bound to the graph
        - prefix: the prefix of the given namespace
    Output: namespace instance bound to a specified graph
    """
    ns = Namespace(namespace)
    graph.namespace_manager.bind(prefix, namespace)

    return ns

In [3]:
# load graphs to be merged
base_graph_file = "./graphs/aff_bft_open_graph.ttl"
cn_graph_file = "../data/ConceptNet/parse/cn_graph.ttl"
base_g = Graph()
cn_g = Graph()
base_g.parse(base_graph_file, format= 'turtle')
cn_g.parse(cn_graph_file, format= 'turtle')

<Graph identifier=N5acd101b2e80465fbee28685498ecc56 (<class 'rdflib.graph.Graph'>)>

In [4]:
# combine graphs
merged_g = Graph()
merged_g = base_g + cn_g
len(merged_g)

7011

## Linking CN Relations with aff ontology

In [5]:
cnr_ns = create_namespace(merged_g, "http://api.conceptnet.io/r/", 'cnr')
cnc_ns = create_namespace(merged_g, "http://api.conceptnet.io/c/", 'cnc')
aff_ns = create_namespace(merged_g, "http://test.org/affordance.owl#", 'aff')
kchn_ns = create_namespace(merged_g, "http://test.org/kitchen.owl#", 'kchn')
skos_ns = create_namespace(merged_g, "http://www.w3.org/2004/02/skos/core#", 'skos')

In [6]:
# linking CN relations to aff:quality
# merged_g.add( (cnr_ns[URIRef('AtLocation')], RDFS.subPropertyOf, aff_ns['quality']) )
merged_g.add( (cnr_ns[URIRef('HasProperty')], RDFS.subPropertyOf, aff_ns['quality']) )
merged_g.add( (cnr_ns[URIRef('MadeOf')], RDFS.subPropertyOf, aff_ns['quality']) )
merged_g.add( (cnr_ns[URIRef('PartOf')], RDFS.subPropertyOf, aff_ns['quality']) )
merged_g.add( (cnr_ns[URIRef('HasA')], RDFS.subPropertyOf, aff_ns['quality']) )

In [7]:
len(merged_g)

7015

In [8]:
# changing ReceivesAction to kchn:CanBe
for s,p,o in merged_g.triples( (None, cnr_ns.ReceivesAction, None) ):
    merged_g.remove( (cnc_ns[s[27:]], cnr_ns.ReceivesAction, cnc_ns[o[27:]]) )
    merged_g.add( (cnc_ns[s[27:]], kchn_ns.CanBe, cnc_ns[o[27:]]) )
#     print(cnc_ns[s[27:]], kchn_ns.CanBe, cnc_ns[o[27:]])
len(merged_g)

7015

In [9]:
# add subclasses of Quality
Substance = URIRef('http://test.org/kitchen.owl#Substance')
Property = URIRef('http://test.org/kitchen.owl#Property')
PhysicalProperty = URIRef('http://test.org/kitchen.owl#PhysicalProperty')

merged_g.add( (Substance, RDFS.subClassOf, aff_ns.Quality) )
merged_g.add( (Property, RDFS.subClassOf, aff_ns.Quality) )
merged_g.add( (PhysicalProperty, RDFS.subClassOf, aff_ns.Quality) )

In [10]:
# add subclasses of Affordance
Substance_aff = URIRef('http://test.org/kitchen.owl#SubstanceAffordance')
Property_aff = URIRef('http://test.org/kitchen.owl#PropertyAffordance')
PhysicalProperty_aff = URIRef('http://test.org/kitchen.owl#PhysicalPropertyAffordance')

merged_g.add( (Substance_aff, RDFS.subClassOf, aff_ns.Affordance) )
merged_g.add( (Property_aff, RDFS.subClassOf, aff_ns.Affordance) )
merged_g.add( (PhysicalProperty_aff, RDFS.subClassOf, aff_ns.Affordance) )

In [11]:
# add relations between Quality and Affordance subclasses
merged_g.add( (Substance, aff_ns.implies, Substance_aff) )
merged_g.add( (Property, aff_ns.implies, Property_aff) )
merged_g.add( (PhysicalProperty, aff_ns.implies, PhysicalProperty_aff) )

In [12]:
# make 'B' in cnr:HasProperty an instance of kchn:Property
for s,p,o in merged_g.triples( (None, cnr_ns.HasProperty, None) ):
    merged_g.add( (cnc_ns[o[27:]], RDF.type, kchn_ns.Property) )
    print(cnc_ns[o[27:]], RDF.type, kchn_ns.Property) 
len(merged_g)

http://api.conceptnet.io/c/en/good http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#Property
http://api.conceptnet.io/c/en/paired_with_chairs http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#Property
http://api.conceptnet.io/c/en/soft http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#Property
http://api.conceptnet.io/c/en/empty http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#Property
http://api.conceptnet.io/c/en/dangerous http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#Property
http://api.conceptnet.io/c/en/clean http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#Property
http://api.conceptnet.io/c/en/fragile http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#Property
http://api.conceptnet.io/c/en/refreshing_to_drink http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#Property
h

7053

In [13]:
# make 'B' in cnr:HasA an instance of kchn:Property
for s,p,o in merged_g.triples( (None, cnr_ns.HasProperty, None) ):
    merged_g.add( (cnc_ns[o[27:]], RDF.type, kchn_ns.Property) )
    print(cnc_ns[o[27:]], RDF.type, kchn_ns.Property) 
len(merged_g)

http://api.conceptnet.io/c/en/good http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#Property
http://api.conceptnet.io/c/en/paired_with_chairs http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#Property
http://api.conceptnet.io/c/en/soft http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#Property
http://api.conceptnet.io/c/en/empty http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#Property
http://api.conceptnet.io/c/en/dangerous http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#Property
http://api.conceptnet.io/c/en/clean http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#Property
http://api.conceptnet.io/c/en/fragile http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#Property
http://api.conceptnet.io/c/en/refreshing_to_drink http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#Property
h

7053

In [14]:
# make 'A' in cnr:PartOf an instance of kchn:PhysicalProperty
for s,p,o in merged_g.triples( (None, cnr_ns.PartOf, None) ):
#     print(s,p,o)
    merged_g.add( (cnc_ns[s[27:]], RDF.type, kchn_ns.PhysicalProperty) )
    print(cnc_ns[s[27:]], RDF.type, kchn_ns.PhysicalProperty)
len(merged_g)

http://api.conceptnet.io/c/en/fork http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#PhysicalProperty
http://api.conceptnet.io/c/en/point/n/wn/artifact http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#PhysicalProperty
http://api.conceptnet.io/c/en/blade http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#PhysicalProperty
http://api.conceptnet.io/c/en/prong/n/wn/artifact http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#PhysicalProperty
http://api.conceptnet.io/c/en/point/n/wn/shape http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#PhysicalProperty
http://api.conceptnet.io/c/en/knife_blade/n/wn/artifact http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#PhysicalProperty
http://api.conceptnet.io/c/en/glass/n/wn/substance http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#PhysicalProperty
http://api.conceptnet.io

7065

In [15]:
# make 'A' in cnr:MadeOf an instance of kchn:Substance
for s,p,o in merged_g.triples( (None, cnr_ns.PartOf, None) ):
#     print(s,p,o)
    merged_g.add( (cnc_ns[o[27:]], RDF.type, kchn_ns.Substance) )
    print(cnc_ns[o[27:]], RDF.type, kchn_ns.Substance)
len(merged_g)

http://api.conceptnet.io/c/en/dining_set http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#Substance
http://api.conceptnet.io/c/en/knife/n/wn/artifact http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#Substance
http://api.conceptnet.io/c/en/knife http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#Substance
http://api.conceptnet.io/c/en/fork/n/wn/artifact http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#Substance
http://api.conceptnet.io/c/en/knife/n/wn/artifact http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#Substance
http://api.conceptnet.io/c/en/knife/n/wn/artifact http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#Substance
http://api.conceptnet.io/c/en/glass/n/wn/artifact http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://test.org/kitchen.owl#Substance
http://api.conceptnet.io/c/en/knife/n/wn/artifact http://www.w3.org/1999

7073

In [16]:
# # remove MadeOf triples whose 'A' is not an instance in BFT onto
# for s,p,o in merged_g.triples( (None, cnr_ns.MadeOf, None) ):
#     if s[30:] not in instances:
# #         print(s[30:])
#         merged_g.remove( (cnc_ns[s[27:]], cnr_ns.MadeOf, cnc_ns[o[27:]]) )
# #         print(cnc_ns[s[27:]], cnr_ns.MadeOf, cnc_ns[o[27:]])

In [17]:
# # make 'A' in cnr:HasPrerequisite a type of aff:Action
# for s,p,o in merged_g.triples( (None, cnr_ns.HasPrerequisite, None) ):
#     merged_g.add( (cnc_ns[s[27:]], RDF.type, aff_ns.Action) )
#     print(s[27:])
# len(merged_g)

In [18]:
# allocate 'A' in cnr:HasPrerequisite to a subclass of aff:Action
action_classes = set()
actions = []
for s,p,o in merged_g.triples( (None, cnr_ns.HasPrerequisite, None) ):
#     merged_g.add( (cnc_ns[s[27:]], RDF.type, aff_ns.Action) )
#     actions.append(s[30:])
    actions.append(s)
#     print(s)

# generate action subclasses
for a in actions:
    action = a[30:].split('_')
#     print(action)
    action_classes.add(action[0].capitalize())

# make each action a subclass of Action class in Affordance Ontology
for c in action_classes:
    action_class = URIRef(f'http://test.org/kitchen.owl#{c}')
    merged_g.add( (action_class, RDFS.subClassOf, aff_ns.Action) )
    for a in actions:
        action = c.lower()
        if action in a:
            merged_g.add( (URIRef(a), RDF.type, action_class) )
#             print( (URIRef(a), RDF.type, action_class) )
        
len(merged_g)

7086

## Linking CN Concepts with BFT ontology

In [19]:
distinct_s = cn_g.query(
"""
prefix cn: <http://api.conceptnet.io/> 
prefix cnc: <http://api.conceptnet.io/c/> 
prefix cnr: <http://api.conceptnet.io/r/> 
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
prefix wi: <http://purl.org/ontology/wi/core#> 
prefix xml: <http://www.w3.org/XML/1998/namespace> 
prefix xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT distinct ?s WHERE { 
  ?s a rdfs:Resource. 
  ?assertion rdf:subject ?s;
             rdf:predicate ?p.
  FILTER(! (?p = cnr:ExternalURL) )
}
""")
len(distinct_s)

280

In [20]:
distinct_o = cn_g.query(
"""
prefix cnr: <http://api.conceptnet.io/r/> 
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
prefix wi: <http://purl.org/ontology/wi/core#> 
prefix xml: <http://www.w3.org/XML/1998/namespace> 
prefix xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT distinct ?o WHERE { 
  ?o a rdfs:Resource. 
  ?assertion rdf:object ?o;
             rdf:predicate ?p.
  FILTER(! (?p = cnr:ExternalURL) )
}
""")
len(distinct_o)

510

### Levenshtein Distance to compare strings

In [21]:
# Levenshtein
# import the enchant module 
import enchant 

In [22]:
instances = ['croissant', 'garnish', 'hardboiled_egg', 'scrambled_egg', 'fried_egg', 'orange_juice', 'apple_juice', 'butter', 'salt_shaker', 'pepper_shaker', 'bread_basket', 'egg_cup', 'milk_pitcher', 'fork', 'knife', 'spoon', 'butter_knife', 'glass', 'teacup', 'sauce_dish', 'butter_dish', 'bread_plate', 'teacup_plate', 'dining_table', 'dining_chair', 'cupboard']

In [23]:
# subject case
c=0
with open('test_s.txt', 'w') as file:
    for i in instances:
        str1 = i
        for r in distinct_s:
            str2 = r.s[30:]
#             print(str1, str2)
            dist = enchant.utils.levenshtein(str1, str2)
            ratio = ((len(str1)+len(str2)) - dist) / (len(str1)+len(str2))
            if ratio > 0.83:
                c +=1
                file.write(f'{str(ratio)} -- {i} -- {r.s[30:]}\n')
#                 print(cnc_ns[r.s[32:]])
                merged_g.add( (URIRef(f'http://test.org/bft.owl#{i}'), OWL.sameAs, cnc_ns[r.s[27:]]))
#                 print(cnc_ns[r.s[27:]])
print(c)

30


In [24]:
# object case
c1=0
with open('test_o.txt', 'w') as file:
    for i in instances:
        str1 = i
        for r in distinct_o:
#             print(str1, str2)
            str2 = r.o[30:]
            dist = enchant.utils.levenshtein(str1, str2)
            ratio = ((len(str1)+len(str2)) - dist) / (len(str1)+len(str2))
            if ratio > 0.83:
                c1 += 1
                file.write(f'{str(ratio)} -- {i} -- {r.o[30:]}\n')
                merged_g.add( (URIRef(f'http://test.org/bft.owl#{i}'), OWL.sameAs, cnc_ns[r.o[27:]]))
#                 print(cnc_ns[r.o[27:]])
print(c1)

12


In [25]:
# exact match of string matching (comparing how this works against the Levenshtein method)
cnt = 0
for i in instances:
    for r in distinct_s:
#         print(i, r.s[30:])
        if i == str(r.s[30:]):
            cnt +=1
    for r in distinct_o:
#         print(i, r.o)
        if i == str(r.o[30:]):
            cnt +=1
print(cnt)

21


Observe that more links are created through the Levenshtein algorithm compared to the exact string match.

In [26]:
len(merged_g)

7117

Observe that 67 triples are added to the graph after linking CN graph to base graph. 

### Create closeMatch links

In [27]:
# links instances following that the instance string is in the response string
# eg. if instance is 'croissant' and the repsonse is 'butter_croissant', it would be a match

same_as_objects = []
for s,p,o in merged_g.triples( (None, OWL.sameAs, None) ):
#     print(s,p,o)
    same_as_objects.append(o[30:])
#     print(o[30:])

with open('test_contains.txt', 'w') as file:  # Use file to refer to the file object
    for i in instances:
        for r in distinct_s:
            s_concept = str(r.s[32:])
            if i in s_concept:
                if s_concept in same_as_objects:
                    continue
                else:
                    file.write(f'{i} ------- {str(r.s)}\n')
                    merged_g.add( (URIRef(f'http://test.org/bft.owl#{i}'), skos_ns.closeMatch, cnc_ns[r.s[32:]]) ) 
        for r in distinct_o:
            o_concept = str(r.o[32:])
            if i in o_concept:
                if o_concept in same_as_objects:
                    continue
                else:
                    file.write(f'{i} ------- {str(r.o)}\n')
                    merged_g.add( (URIRef(f'http://test.org/bft.owl#{i}'), skos_ns.closeMatch, cnc_ns[r.o[32:]]) )
    
# if the triple exists with that subject, then continue because there is an owlsameas triple of the instnace, else make the triple with skos. 

### Serialize and save new merged graph with CN entities

In [28]:
merged_g.serialize('aff_bft_cn_TEST.ttl', format='turtle')

In [29]:
len(merged_g)

7197