In [1]:
import numpy as np
import tensorflow as tf

In [2]:
data = np.load('./data/human_data.npz')
NUM_DIMS = 10

In [3]:
X = data['X']
relations = data['relations'].tolist()
relation_embeddings = data['relation_embeddings']
entities = data['entities'].tolist()
entity_embeddings = data['entity_embeddings']
X

array([['Eve', 'type', 'Person'],
       ['Eve', 'name', 'Eve'],
       ['John', 'shoesize', '14'],
       ['Alice', 'hasFriend', 'John'],
       ['Mark', 'name', 'Mark'],
       ['John', 'type', 'Person'],
       ['Eve', 'hasFriend', 'Alice'],
       ['John', 'age', '37'],
       ['Jack', 'name', 'Jack'],
       ['Jennifer', 'type', 'Woman'],
       ['David', 'type', 'Researcher'],
       ['Flora', 'name', 'Flora'],
       ['Mark', 'hasFather', 'John'],
       ['Mark', 'age', '14'],
       ['Eve', 'type', 'Lecturer'],
       ['Eve', 'hasSpouse', 'David'],
       ['Alice', 'name', 'Alice'],
       ['Harry', 'hasSpouse', 'Sophie'],
       ['Flora', 'age', '95'],
       ['Harry', 'age', '25'],
       ['Alice', 'type', 'Woman'],
       ['Laura', 'hasFriend', 'Alice'],
       ['David', 'name', 'David'],
       ['Mark', 'shoesize', '8'],
       ['Laura', 'type', 'Person'],
       ['Mark', 'trouserssize', '36'],
       ['Flora', 'type', 'Woman'],
       ['Harry', 'phoneNumber', '32 08 98'],


In [4]:
head_idx = entities.index('Mark')
tail_idx = entities.index('14')
scores = []
for idx, rel in enumerate(relations):
    
    rel_embed = relation_embeddings[idx]
    #score = -np.linalg.norm(entity_embeddings[head_idx] +rel_embed -entity_embeddings[tail_idx])
    score = np.sum(entity_embeddings[head_idx] * rel_embed * entity_embeddings[tail_idx])
    scores.append((rel, score))

In [5]:
sorted(scores, key=lambda x:x[1], reverse=True)

[('age', 0.18151794),
 ('hasFriend', 0.08699002),
 ('hasFather', 0.040083323),
 ('name', 0.029876247),
 ('type', 0.026286818),
 ('hasSpouse', 0.0060162283),
 ('hasChild', 0.003967628),
 ('trouserssize', 0.0022677071),
 ('shoesize', -0.052538656),
 ('shirtsize', -0.06385116),
 ('phoneNumber', -0.068703905)]

In [6]:
for h,r,t in X:
    if h=='Mark' or t=='Mark':
        print(h,r,t)

Mark name Mark
Mark hasFather John
Mark age 14
Mark shoesize 8
Mark trouserssize 36
Mark shirtsize 9
Mark type Person


In [7]:
idx_data = []

for h,r,t in X:
    
    head_idx = entities.index(h)
    rel_idx = relations.index(r)
    tail_idx = entities.index(t)
    
    head_entity = entity_embeddings[head_idx]
    relation = relation_embeddings[rel_idx]
    tail_entity = entity_embeddings[tail_idx]
    
    idx_data.append((head_entity,relation,tail_entity))
    
idx_data = np.array(idx_data)

In [8]:
#denoising autoencoder vs baseline autoencoder
#use gradients??

In [99]:
NUM_ENTITIES = len(entities)
NUM_RELATIONS = len(relations)

head_input = tf.keras.layers.Input(shape=(1,),name='head_input')
relation_input = tf.keras.layers.Input(shape=(1,), name='relation_input')
tail_input = tf.keras.layers.Input(shape=(1,),name='tail_input')

entity_embed = tf.keras.layers.Embedding(
    input_dim=NUM_ENTITIES, 
    output_dim=NUM_DIMS, 
    weights=[entity_embeddings],
    trainable=False,
    name='entity_embed')

relation_embed = tf.keras.layers.Embedding(
    input_dim=NUM_RELATIONS, 
    output_dim=NUM_DIMS, 
    weights=[relation_embeddings],
    trainable=False,
    name='relation_embed')

In [85]:
head_entity = entity_embed(head_input)
tail_entity = entity_embed(tail_input)
relation_entity = relation_embed(relation_input)

In [107]:
vector = tf.keras.layers.Multiply()([head_entity, relation_entity, tail_entity])

In [108]:
dense = tf.keras.layers.Dense(NUM_DIMS, activation='relu', name='dense_1')(vector)
flatten = tf.keras.layers.Flatten()(dense)

In [109]:
entity_output = tf.keras.layers.Dense(NUM_ENTITIES, name='entity_output')
relation_output = tf.keras.layers.Dense(NUM_RELATIONS, name='relation_output')

head_output = entity_output(flatten)
tail_output = entity_output(flatten)
rel_output = relation_output(flatten)

In [None]:
#('Eve', 'type', 'Person') [('Eve', 'type', 'Lecturer'), ('Lecturer', 'subClassOf', 'Person')]

In [10]:
import rdflib

In [11]:
lines = []

with open('../traces/entailment.ttl', 'r') as f:
    for line in f:
        lines.append(line)

In [12]:
def get_tup(line_str):
    
    line_str = line_str.split()[:-1]
    
    source_tup = []
    for i in line_str:
        source_tup.append(i.split(':')[-1])
        
    return tuple(source_tup)

In [28]:
from collections import defaultdict

traces = defaultdict(list)

for idx, line in enumerate(lines):

    if "graph us:construct" in line:
        
        source_tup = get_tup(lines[idx+1])        

        assert len(source_tup) == 3

        traces[source_tup] = []
        
    if 'graph us:where' in line:
        
        for sub_line in lines[idx+1:]:
            
            if sub_line.strip() == '}':      
                break
                
            exp_tup = get_tup(sub_line)
            traces[source_tup].append(exp_tup)
            assert len(exp_tup) == 3        

In [81]:
for i in X:
    if tuple(i) in traces:
        print(tuple(i),traces[tuple(i)])

('Eve', 'type', 'Person') [('Eve', 'type', 'Lecturer'), ('Lecturer', 'subClassOf', 'Person')]
('David', 'type', 'Person') [('David', 'type', 'Researcher'), ('Researcher', 'subClassOf', 'Person')]
