In [1]:
from rdflib import Graph, Namespace, RDF, URIRef, RDFS
from rdflib.namespace import OWL, SKOS

In [2]:
def create_namespace(graph, namespace, prefix):
    """
    Binds a namespace to a given graph.
    Args:
        - graph: graph object
        - namespace: uri of the namespace to be bound to the graph
        - prefix: the prefix of the given namespace
    Output: namespace instance bound to a specified graph
    """
    ns = Namespace(namespace)
    graph.namespace_manager.bind(prefix, namespace)

    return ns

In [3]:
# load graphs to be merged
base_graph_file = "./graphs/aff_bft_open_graph.ttl"
cn_graph_file = "../data/ConceptNet/parse/cn_graph.ttl"
base_g = Graph()
cn_g = Graph()
base_g.parse(base_graph_file, format= 'turtle')
cn_g.parse(cn_graph_file, format= 'turtle')

<Graph identifier=N16967dc04c9b4142abde30e84eb0f354 (<class 'rdflib.graph.Graph'>)>

In [4]:
# combine graphs
merged_g = Graph()
merged_g = base_g + cn_g
len(merged_g)

7010

### Linking CN Relations with aff ontology

In [5]:
cnr_ns = create_namespace(merged_g, "http://api.conceptnet.io/r/", 'cnr')
cnc_ns = create_namespace(merged_g, "http://api.conceptnet.io/c/", 'cnc')
aff_ns = create_namespace(merged_g, "http://test.org/affordance.owl#", 'aff')
kchn_ns = create_namespace(merged_g, "http://test.org/kitchen.owl#", 'kchn')
skos_ns = create_namespace(merged_g, "http://www.w3.org/2004/02/skos/core#", 'skos')

In [6]:
# linking CN relations to aff:quality
merged_g.add( (cnr_ns[URIRef('AtLocation')], RDFS.subPropertyOf, aff_ns['quality']) )
merged_g.add( (cnr_ns[URIRef('HasProperty')], RDFS.subPropertyOf, aff_ns['quality']) )
merged_g.add( (cnr_ns[URIRef('MadeOf')], RDFS.subPropertyOf, aff_ns['quality']) )
merged_g.add( (cnr_ns[URIRef('PartOf')], RDFS.subPropertyOf, aff_ns['quality']) )
merged_g.add( (cnr_ns[URIRef('HasA')], RDFS.subPropertyOf, aff_ns['quality']) )

In [7]:
len(merged_g)

7015

In [8]:
# changing ReceivesAction to kchn:CanBe
for s,p,o in merged_g.triples( (None, cnr_ns.ReceivesAction, None) ):
    merged_g.remove( (cnc_ns[s], cnr_ns.ReceivesAction, cnc_ns[o]) )
    merged_g.add( (cnc_ns[s], kchn_ns.CanBe, cnc_ns[o]) )
len(merged_g)

7049

In [9]:
# make 'B' in cnr:HasProperty an instance of aff:Quality
for s,p,o in merged_g.triples( (None, cnr_ns.HasProperty, None) ):
    merged_g.add( (cnc_ns[o], RDF.type, aff_ns.Quality) )
len(merged_g)

7078

In [10]:
instances = ['croissant', 'garnish', 'hardboiled_egg', 'scrambled_egg', 'fried_egg', 'orange_juice', 'apple_juice', 'butter', 'salt_shaker', 'pepper_shaker', 'bread_basket', 'egg_cup', 'milk_pitcher', 'fork', 'knife', 'spoon', 'butter_knife', 'glass', 'teacup', 'sauce_dish', 'butter_dish', 'bread_plate', 'teacup_plate', 'dining_table', 'dining_chair', 'cupboard']

In [11]:
# remove MadeOf triples whose 'A' is not an instance in BFT onto
for s,p,o in merged_g.triples( (None, cnr_ns.MadeOf, None) ):
    if s[32:] not in instances:
#         print(s[32:])
        merged_g.remove( (cnc_ns[s], cnr_ns.MadeOf, cnc_ns[o]) )

In [12]:
# make 'A' in cnr:PartOf an instance of aff:Quality
for s,p,o in merged_g.triples( (None, cnr_ns.PartOf, None) ):
    merged_g.add( (cnc_ns[s], RDF.type, aff_ns.Quality) )
len(merged_g)

7090

In [13]:
# make 'A' in cnr:HasPrerequisite a type of aff:Action
for s,p,o in merged_g.triples( (None, cnr_ns.HasPrerequisite, None) ):
    merged_g.add( (cnc_ns[s], RDF.type, aff_ns.Action) )
len(merged_g)

7098

### Linking CN Concepts with BFT ontology

In [14]:
# response = cn_g.query(
#     """
# prefix cn: <http://api.conceptnet.io/> 
# prefix cnc: <http://api.conceptnet.io/c/> 
# prefix cnr: <http://api.conceptnet.io/r/> 
# prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
# prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
# prefix wi: <http://purl.org/ontology/wi/core#> 
# prefix xml: <http://www.w3.org/XML/1998/namespace> 
# prefix xsd: <http://www.w3.org/2001/XMLSchema#>

# SELECT distinct ?s ?o WHERE { 
#   ?s ?relation ?o.
#     VALUES ?relation {cnr:AtLocation cnr:CapableOf cnr:CapableOf cnr:ReceivesAction cnr:HasProperty cnr:MadeOf cnr:PartOf cnr:HasA cnr:HasPrerequisite cnr:SimilarTo}.
# }
#     """
# )
# len(response)

In [15]:
# test_s = cn_g.query(
#     """
# prefix cn: <http://api.conceptnet.io/> 
# prefix cnc: <http://api.conceptnet.io/c/> 
# prefix cnr: <http://api.conceptnet.io/r/> 
# prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
# prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
# prefix wi: <http://purl.org/ontology/wi/core#> 
# prefix xml: <http://www.w3.org/XML/1998/namespace> 
# prefix xsd: <http://www.w3.org/2001/XMLSchema#>

# SELECT distinct ?s WHERE { 
#   ?s ?relation ?o.
#     VALUES ?relation {cnr:AtLocation cnr:CapableOf cnr:CapableOf cnr:ReceivesAction cnr:HasProperty cnr:MadeOf cnr:PartOf cnr:HasA cnr:HasPrerequisite cnr:SimilarTo}.
# }
#     """
# )
# len(test_s)

In [16]:
# test_o = cn_g.query(
#     """
# prefix cn: <http://api.conceptnet.io/> 
# prefix cnc: <http://api.conceptnet.io/c/> 
# prefix cnr: <http://api.conceptnet.io/r/> 
# prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
# prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
# prefix wi: <http://purl.org/ontology/wi/core#> 
# prefix xml: <http://www.w3.org/XML/1998/namespace> 
# prefix xsd: <http://www.w3.org/2001/XMLSchema#>

# SELECT distinct ?o WHERE { 
#   ?s ?relation ?o.
#     VALUES ?relation {cnr:AtLocation cnr:CapableOf cnr:CapableOf cnr:ReceivesAction cnr:HasProperty cnr:MadeOf cnr:PartOf cnr:HasA cnr:HasPrerequisite cnr:SimilarTo}.
# }
#     """
# )
# len(test_o)

In [17]:
distinct_s = cn_g.query(
"""
prefix cn: <http://api.conceptnet.io/> 
prefix cnc: <http://api.conceptnet.io/c/> 
prefix cnr: <http://api.conceptnet.io/r/> 
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
prefix wi: <http://purl.org/ontology/wi/core#> 
prefix xml: <http://www.w3.org/XML/1998/namespace> 
prefix xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT distinct ?s WHERE { 
  ?s a rdfs:Resource. 
  ?assertion rdf:subject ?s;
             rdf:predicate ?p.
  FILTER(! (?p = cnr:ExternalURL) )
}
""")
len(distinct_s)

280

In [18]:
distinct_o = cn_g.query(
"""
prefix cnr: <http://api.conceptnet.io/r/> 
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
prefix wi: <http://purl.org/ontology/wi/core#> 
prefix xml: <http://www.w3.org/XML/1998/namespace> 
prefix xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT distinct ?o WHERE { 
  ?o a rdfs:Resource. 
  ?assertion rdf:object ?o;
             rdf:predicate ?p.
  FILTER(! (?p = cnr:ExternalURL) )
}
""")
len(distinct_o)

510

### Levenshtein Distance to compare strings

In [19]:
# Levenshtein
# import the enchant module 
import enchant 

In [20]:
# subject case
c=0
with open('test_s.txt', 'w') as file:
    for i in instances:
        str1 = i
        for r in distinct_s:
            str2 = r.s[30:]
#             print(str1, str2)
            dist = enchant.utils.levenshtein(str1, str2)
            ratio = ((len(str1)+len(str2)) - dist) / (len(str1)+len(str2))
            if ratio > 0.83:
                c +=1
                file.write(f'{str(ratio)} -- {i} -- {r.s[30:]}\n')
#                 print(cnc_ns[r.s[32:]])
                merged_g.add( (URIRef(f'http://test.org/bft.owl#{i}'), OWL.sameAs, cnc_ns[r.s[27:]]))
#                 print(cnc_ns[r.s[27:]])
print(c)

30


In [21]:
# object case
c1=0
with open('test_o.txt', 'w') as file:
    for i in instances:
        str1 = i
        for r in distinct_o:
#             print(str1, str2)
            str2 = r.o[30:]
            dist = enchant.utils.levenshtein(str1, str2)
            ratio = ((len(str1)+len(str2)) - dist) / (len(str1)+len(str2))
            if ratio > 0.83:
                c1 += 1
                file.write(f'{str(ratio)} -- {i} -- {r.o[30:]}\n')
                merged_g.add( (URIRef(f'http://test.org/bft.owl#{i}'), OWL.sameAs, cnc_ns[r.o[27:]]))
#                 print(cnc_ns[r.o[27:]])
print(c1)

12


In [22]:
# exact match of string matching
cnt = 0
for i in instances:
    for r in distinct_s:
#         print(i, r.s[30:])
        if i == str(r.s[30:]):
            cnt +=1
    for r in distinct_o:
#         print(i, r.o)
        if i == str(r.o[30:]):
            cnt +=1
print(cnt)

croissant knife
croissant high_table/n/wn/artifact
croissant orange_juice/n/wn/food
croissant crown_glass/n/wn/substance
croissant liquor_glass
croissant cupboard/n/wn/artifact
croissant spiders
croissant surgical_knife/n/wn/artifact
croissant eating_ice_cream
croissant canned_food
croissant ground_glass/n/wn/substance
croissant dinner_plate
croissant fruit
croissant glass/n/wn/substance
croissant canned_vegtables
croissant liqueur_glass/n/wn/artifact
croissant slicer/n/wn/artifact
croissant dinnerware
croissant knife/n
croissant another_dimension
croissant brown_butter/n/wn/food
croissant orange_juice/n
croissant dining_table/n
croissant board_game
croissant fried_egg/n/wn/food
croissant table_knife/n
croissant salt_shaker
croissant koolaid
croissant salad_fork/n/wn/artifact
croissant coffee_mug
croissant fried_egg
croissant spoon/n/wn/artifact
croissant opal_glass/n/wn/substance
croissant dustbin
croissant pl_ate
croissant machete
croissant steak_knife/n
croissant fork/n
croissant to

Observe that more links are created through the Levenshtein algorithm compared to the exact string match.

In [23]:
len(merged_g)

7129

Observe that 67 triples are added to the graph after linking CN graph to base graph. 

### Create closeMatch links

In [24]:
# links instances following that the instance string is in the response string
# eg. if instance is 'croissant' and the repsonse is 'butter_croissant', it would be a match

same_as_objects = []
for s,p,o in merged_g.triples( (None, OWL.sameAs, None) ):
    print(s,p,o)
    same_as_objects.append(o[30:])
    print(o[30:])

with open('test_contains.txt', 'w') as file:  # Use file to refer to the file object
    for i in instances:
        for r in distinct_s:
            if i in str(r.s[32:]):
                if str(r.s[32:]) in same_as_objects:
                    continue
                else:
                    file.write(f'{i} ------- {str(r.s)}\n')
                    merged_g.add( (URIRef(f'http://test.org/bft.owl#{i}'), skos_ns.closeMatch, cnc_ns[r.s[32:]]) ) 
        for r in distinct_o:
            if i in str(r.o[32:]):
                if str(r.o[32:]) in same_as_objects:
                    continue
                else:
                    file.write(f'{i} ------- {str(r.o)}\n')
                    merged_g.add( (URIRef(f'http://test.org/bft.owl#{i}'), skos_ns.closeMatch, cnc_ns[r.o[32:]]) )
    
# if the triple exists with that subject, then continue because there is an owlsameas triple of the instnace, else make the triple with skos. 

http://test.org/bft.owl#knife http://www.w3.org/2002/07/owl#sameAs http://api.conceptnet.io/c/en/knife/n
knife/n
http://test.org/bft.owl#glass http://www.w3.org/2002/07/owl#sameAs http://api.conceptnet.io/c/en/glasses
glasses
http://test.org/bft.owl#garnish http://www.w3.org/2002/07/owl#sameAs http://api.conceptnet.io/c/en/garnish
garnish
http://test.org/bft.owl#fried_egg http://www.w3.org/2002/07/owl#sameAs http://api.conceptnet.io/c/en/fried_egg
fried_egg
http://test.org/bft.owl#salt_shaker http://www.w3.org/2002/07/owl#sameAs http://api.conceptnet.io/c/en/saltshaker
saltshaker
http://test.org/bft.owl#cupboard http://www.w3.org/2002/07/owl#sameAs http://api.conceptnet.io/c/en/cupboard
cupboard
http://test.org/bft.owl#dining_table http://www.w3.org/2002/07/owl#sameAs http://api.conceptnet.io/c/en/dining_table/n
dining_table/n
http://test.org/bft.owl#teacup http://www.w3.org/2002/07/owl#sameAs http://api.conceptnet.io/c/en/tea_cup
tea_cup
http://test.org/bft.owl#butter_knife http://www

### Serialize and save new merged graph with CN entities

In [25]:
merged_g.serialize('aff_bft_cn.ttl', format='turtle')

In [None]:
len(merged_g)