This notebook is used to merge the Concept Net graph created to the KitchenConcepts+Affordance graph.

In [1]:
from rdflib import Graph, Namespace, RDF, URIRef, RDFS
from rdflib.namespace import OWL, SKOS
import spacy
nlp = spacy.load('en_core_web_sm')
import pandas as pd

In [2]:
def create_namespace(graph, namespace, prefix):
    """
    Binds a namespace to a given graph.
    Args:
        - graph: graph object
        - namespace: uri of the namespace to be bound to the graph
        - prefix: the prefix of the given namespace
    Output: namespace instance bound to a specified graph
    """
    ns = Namespace(namespace)
    graph.namespace_manager.bind(prefix, namespace)

    return ns

In [3]:
# load graphs to be merged
base_graph_file = "./graphs/aff_bft_open_graph.ttl"
cn_graph_file = "../data/ConceptNet/parse/cn_graph.ttl"
base_g = Graph()
cn_g = Graph()
base_g.parse(base_graph_file, format= 'turtle')
cn_g.parse(cn_graph_file, format= 'turtle')

<Graph identifier=Ne3ef20b46bc94f73997947d0937746e2 (<class 'rdflib.graph.Graph'>)>

In [4]:
# merge graphs
merged_g = Graph()
merged_g = base_g + cn_g
len(merged_g)

2910

In [5]:
# remove unnecessary owl ontology triple
for s,p,o in merged_g.triples((None,RDF.type,OWL.Ontology)):
    if s == 'http://test.org/cn.owl':
        merged_g.remove((s,p,o))

## Linking CN Relations with aff ontology

In [6]:
cnr_ns = create_namespace(merged_g, "http://api.conceptnet.io/r/", 'cnr')
cnc_ns = create_namespace(merged_g, "http://api.conceptnet.io/c/", 'cnc')
aff_ns = create_namespace(merged_g, "http://test.org/affordance.owl#", 'aff')
kchn_ns = create_namespace(merged_g, "http://test.org/kitchen.owl#", 'kchn')
skos_ns = create_namespace(merged_g, "http://www.w3.org/2004/02/skos/core#", 'skos')

#### Relation: _cnr:UsedFor_ --> action instances and affordance subclasses

In [7]:
# create Actions from cnr:UsedFor using SpaCy

# create a df to store data about the cnr:UsedFor relation
aff_df = pd.DataFrame(columns=['entities', 'usage_o', 'usage_phrase_list', 'aff_class'])

# lists to store df column values
entities = []
usages = []
usage_phrase_list = []

# iterate through graph triples with UsedFor relation
# Object values are the usages that will be actions and their VERB will be subclasses of affordances
for s,p,o in merged_g.triples( (None, cnr_ns.UsedFor, None) ):
    usages.append(o[30:])
    entities.append(s[27:])
    usage_phrase = ' '.join(o[30:].split('_'))
    usage_phrase_list.append(usage_phrase)

# fill df
aff_df.entities = entities
aff_df.usage_o = usages
aff_df.usage_phrase_list = usage_phrase_list
aff_df.head(50)

Unnamed: 0,entities,usage_o,usage_phrase_list,aff_class
0,en/apple,eating,eating,
1,en/spoon,stir_soup,stir soup,
2,en/knife,taking_off_ends,taking off ends,
3,en/knife,forceful_moves,forceful moves,
4,en/knife,prying_things_out,prying things out,
5,en/cupboard,dishes,dishes,
6,en/knife,carving_features,carving features,
7,en/apple,make_pie,make pie,
8,en/refrigerator,chilling_food,chilling food,
9,en/cupboard,storing_junk,storing junk,


In [8]:
def get_aff_subclass(text):
    """
    Determines the affordance action from the object value of the cnr:UsedFor triples. 
    Args:
        text: value in dataframe cell
    Returns:
        action (str): action value that will be a subclass of Affordance
    """
    verb_exists = False
    words = []
    pos = []
    tokens = []
    doc = nlp(text)
    for tok in doc:
        word = tok.text
        pos_ = tok.pos_
        tokens.append(tok)
        words.append(word)
        pos.append(pos_)
        
    if "VERB" in pos:
        pos_index = pos.index("VERB")
        verb_word = words[pos_index]
        verb_exists = True
    else:
        action = None
    
    if verb_exists:
        if verb_word[-3:] == 'ing':
            action = verb_word.capitalize()
        else:
            action = verb_word.capitalize() +'ing'
    
    return action

In [9]:
# apply the function above to the df
aff_df['aff_class'] = aff_df['usage_phrase_list'].apply(get_aff_subclass)

In [10]:
# check if there are any mispellings with the affordances created
affs = set(aff_df['aff_class'].to_list())
affs

{'Arranging',
 'Attaching',
 'Attacking',
 'Breaking',
 'Carving',
 'Causing',
 'Chilling',
 'Chopping',
 'Complementing',
 'Completing',
 'Conserving',
 'Consuming',
 'Cooling',
 'Counting',
 'Cracking',
 'Cuting',
 'Cutting',
 'Decorating',
 'Digging',
 'Dishing',
 'Dividing',
 'Doing',
 'Drinking',
 'Eating',
 'Engraving',
 'Enjoying',
 'Entertaining',
 'Extending',
 'Fighting',
 'Flinging',
 'Freezing',
 'Gashing',
 'Getting',
 'Giving',
 'Gouging',
 'Hanging',
 'Having',
 'Holding',
 'Illustrating',
 'Keeping',
 'Killing',
 'Laying',
 'Leaveing',
 'Leaving',
 'Letting',
 'Lifting',
 'Living',
 'Makeing',
 'Making',
 'Measuring',
 'Mixing',
 'Moving',
 None,
 'Pareing',
 'Paring',
 'Peeling',
 'Picking',
 'Piercing',
 'Placing',
 'Playing',
 'Posting',
 'Preserving',
 'Preventing',
 'Pricking',
 'Providing',
 'Prying',
 'Putting',
 'Refrigerating',
 'Removing',
 'Representing',
 'Ripping',
 'Saving',
 'Scooping',
 'Scoring',
 'Scratching',
 'Seeing',
 'Separating',
 'Serving',
 'Se

In [11]:
def filter_col(text):
    """
    For the action affordances determined, filter out specific words that resulted with typos.
    Args:
        text: value in dataframe cell
    Returns:
        action (str): edited value
    """            
    edit_out = ['Storeing', 'Stiring', 'Runing', 'Pareing', 'Serveding', 'Leaveing', 'Makeing', 'Cuting', 'Leaveing', 'Storeing']
    edit_in = ['Storing', 'Stirring', 'Running', 'Paring', 'Serving', 'Leaving', 'Making', 'Cutting', 'Leaving', 'Storing']
    
    if text in edit_out:
        index = edit_out.index(text)
        new_text = edit_in[index]
        return new_text
    else:
        return text
        
# apply the function to the df
aff_df['aff_class'] = aff_df['aff_class'].apply(filter_col)           

In [12]:
# create the desired triples for the graph

# < affordance subclass, rdfs:subClassOf, aff:Affordance >
action_affs = aff_df.aff_class.to_list()
action_affs_set = set(action_affs)
action_affs_set.remove(None)
for aff in action_affs_set:
    merged_g.add( (aff_ns[URIRef(aff)], RDFS.subClassOf, aff_ns.Affordance) )

for index, row in aff_df.iterrows():
    # < UsedFor object, a, aff:Action >
    merged_g.add( (cnc_ns['en/' + row['usage_o']], RDF.type, aff_ns.Action) )
    merged_g.add( (cnc_ns[row['entities']], aff_ns.potential_action, cnc_ns['en/' + row['usage_o']]) )
    # < UsedFor object, a, Affordance Subclass >
    if row['aff_class'] == None:
        continue
    else:
        merged_g.add( (cnc_ns['en/' + row['usage_o']], aff_ns.action_affordance, aff_ns[row['aff_class']]) )
        merged_g.add( (cnc_ns[row['entities']], aff_ns.affords, aff_ns[row['aff_class']]) )
#         print((cnc_ns[row['entities']], aff_ns.affords, aff_ns[row['aff_class']]))
#         print(aff_ns[row['aff_class']])
    

#### Relation: _cnr:ReceivesAction_ --> removing these triples because other relations give similar information

In [13]:
for s,p,o in merged_g.triples( (None, cnr_ns.ReceivesAction, None) ):
    merged_g.remove( (cnc_ns[s[27:]], cnr_ns.ReceivesAction, cnc_ns[o[27:]]) )

#### Relation: _cnr:MadeOf_ --> only keep triples in which the subject of the triple is an instance of the BFT onto

In [14]:
instances = ['milk', 'apple', 'lemon', 'refrigerator', 'coffee', 'croissant', 'garnish', 'hardboiled_egg', 'scrambled_egg', 'fried_egg', 'orange_juice', 'apple_juice', 'butter', 'salt_shaker', 'pepper_shaker', 'bread_basket', 'egg_cup', 'milk_pitcher', 'fork', 'knife', 'spoon', 'butter_knife', 'glass', 'teacup', 'sauce_dish', 'butter_dish', 'bread_plate', 'teacup_plate', 'dining_table', 'dining_chair', 'cupboard']

In [16]:
for s,p,o in merged_g.triples( (None, cnr_ns.MadeOf, None) ):
    if s[30:] not in instances:
        merged_g.remove( (cnc_ns[s[27:]], cnr_ns.MadeOf, cnc_ns[o[27:]]) )

#### Relation: _cnr:PartOf_ --> remove triples about glass because they are not very relevant to the kitchen domain

In [17]:
for s,p,o in merged_g.triples( (None, cnr_ns.PartOf, None) ):
    if "glass" in s[30:]:
        merged_g.remove( (cnc_ns[s[27:]], cnr_ns.PartOf, cnc_ns[o[27:]]) )

#### Relation: _cnr:HasPrequisite_ --> allocate 'A' in cnr:HasPrerequisite to a instance of aff:Action

In [18]:
action_classes = set()
actions = []
for s,p,o in merged_g.triples( (None, cnr_ns.HasPrerequisite, None) ):
    action_verb = s[30:].split('_')
    action_aff = action_verb[0]
    action_aff_uri = URIRef(action_aff.capitalize())
    merged_g.add( (cnc_ns[s[27:]], RDF.type, aff_ns.Action) )
    merged_g.add( (aff_ns[action_aff_uri], RDFS.subClassOf, aff_ns.Affordance) )
    merged_g.add( (cnc_ns[s[27:]], RDF.type, aff_ns[action_aff_uri]) )
    merged_g.add( (cnc_ns[o[27:]], aff_ns.potential_action, cnc_ns[s[27:]]) )

## Linking CN Concepts with BFT ontology

In [20]:
# query that gives me all of the unique subjects in the graph
distinct_s = cn_g.query(
"""
prefix cn: <http://api.conceptnet.io/> 
prefix cnc: <http://api.conceptnet.io/c/> 
prefix cnr: <http://api.conceptnet.io/r/> 
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
prefix wi: <http://purl.org/ontology/wi/core#> 
prefix xml: <http://www.w3.org/XML/1998/namespace> 
prefix xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT distinct ?s WHERE { 
  ?s ?p ?o.
  FILTER(! (?p = cnr:ExternalURL) )
}
""")
len(distinct_s)

1306

In [21]:
# query that gives me all of the unique objects in the graph
distinct_o = cn_g.query(
"""
prefix cnr: <http://api.conceptnet.io/r/> 
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
prefix wi: <http://purl.org/ontology/wi/core#> 
prefix xml: <http://www.w3.org/XML/1998/namespace> 
prefix xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT distinct ?o WHERE { 
  ?s ?p ?o.
  FILTER(! (?p = cnr:ExternalURL) )
}
""")
len(distinct_o)

749

### Levenshtein Distance to compare strings

In [22]:
import enchant 

In [23]:
# entity linking of subjects 
c=0
with open('test_s.txt', 'w') as file:  # using file to test out ratio threshold
    for i in instances:
        str1 = i
        for r in distinct_s:
            str2 = r.s[30:]
            dist = enchant.utils.levenshtein(str1, str2)
            ratio = ((len(str1)+len(str2)) - dist) / (len(str1)+len(str2))
            if ratio > 0.83:
                c +=1
                file.write(f'{str(ratio)} -- {i} -- {r.s[30:]}\n')
                merged_g.add( (URIRef(f'http://test.org/bft.owl#{i}'), OWL.sameAs, cnc_ns[r.s[27:]]))
print(c)

55


In [24]:
# entity linking of objects 
c1=0
with open('test_o.txt', 'w') as file:  # using file to test out ratio threshold
    for i in instances:
        str1 = i
        for r in distinct_o:
            str2 = r.o[30:]
            dist = enchant.utils.levenshtein(str1, str2)
            ratio = ((len(str1)+len(str2)) - dist) / (len(str1)+len(str2))
            if ratio > 0.83:
                c1 += 1
                file.write(f'{str(ratio)} -- {i} -- {r.o[30:]}\n')
                merged_g.add( (URIRef(f'http://test.org/bft.owl#{i}'), OWL.sameAs, cnc_ns[r.o[27:]]))
print(c1)

24


In [25]:
# exact match of string matching (comparing how this works against the Levenshtein method)
cnt = 0
for i in instances:
    for r in distinct_s:
        if i == str(r.s[30:]):
            cnt +=1
    for r in distinct_o:
        if i == str(r.o[30:]):
            cnt +=1
print(cnt)

35


Observe that more links are created through the Levenshtein algorithm compared to the exact string match.

### Create closeMatch links

In [26]:
# links instances following that the instance string is in the response string
# eg. if instance is 'croissant' and the repsonse is 'butter_croissant', it would be a match

same_as_objects = []
for s,p,o in merged_g.triples( (None, OWL.sameAs, None) ):
    same_as_objects.append(o[30:])

with open('test_contains.txt', 'w') as file: 
    for i in instances:
        for r in distinct_s:
            s_concept = str(r.s[30:])
            if i in s_concept:
                if s_concept in same_as_objects: # if the concept is already linked with SameAs, don't link it with closeMatch
                    continue
                else:
                    file.write(f'{i} ------- {str(r.s)}\n')
                    merged_g.add( (URIRef(f'http://test.org/bft.owl#{i}'), skos_ns.closeMatch, cnc_ns[r.s[30:]]) ) 
        for r in distinct_o:
            o_concept = str(r.o[30:])
            if i in o_concept:
                if o_concept in same_as_objects:
                    continue
                else:
                    file.write(f'{i} ------- {str(r.o)}\n')
                    merged_g.add( (URIRef(f'http://test.org/bft.owl#{i}'), skos_ns.closeMatch, cnc_ns[r.o[30:]]) )

In [27]:
# gets data to create affords triples
affords_q = merged_g.query(
"""
prefix aff: <http://test.org/affordance.owl#>
prefix cn: <http://api.conceptnet.io/> 
prefix cnc: <http://api.conceptnet.io/c/> 
prefix cnr: <http://api.conceptnet.io/r/> 
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
prefix xml: <http://www.w3.org/XML/1998/namespace> 
prefix xsd: <http://www.w3.org/2001/XMLSchema#>

select ?ke ?aff where
{
  ?ke aff:potential_action ?action.
  ?action a ?aff.
  ?aff rdfs:subClassOf aff:Affordance.}

""")

In [28]:
for row in affords_q:
    merged_g.add( (URIRef(cnc_ns[row.ke[27:]]), aff_ns.affords, URIRef(aff_ns[row.aff[31:]])) )

### Serialize and save new merged graph with CN entities

In [29]:
# first remove noisy triples
merged_g.remove( (URIRef('http://test.org/bft.owl#fork'), OWL.sameAs, URIRef('http://api.conceptnet.io/c/en/pork')) )
merged_g.remove( (URIRef('http://test.org/bft.owl#fork'), OWL.sameAs, URIRef('http://api.conceptnet.io/c/en/work')) )

In [32]:
merged_g.serialize('./graphs/aff_bft_cn.ttl', format='turtle')

In [33]:
len(merged_g)

4167