In [1]:
import numpy as np
import utils
import os
import tensorflow as tf

In [55]:
RULE = 'uncle'
TRACE_LENGTH = 2

data = np.load(os.path.join('..','data','royalty.npz'))

triples,traces,nopred,entities,relations = utils.get_data(data,RULE)

NUM_ENTITIES = len(entities)
NUM_RELATIONS = len(relations)

ent2idx = dict(zip(entities, range(NUM_ENTITIES)))
rel2idx = dict(zip(relations, range(NUM_RELATIONS)))

idx2ent = dict(zip(range(NUM_ENTITIES),entities))
idx2rel = dict(zip(range(NUM_RELATIONS),relations))

In [3]:
explaine_data = np.load(os.path.join('..','data','preds','explaine_'+RULE+'_preds.npz'),allow_pickle=True)

In [4]:
true_triples = triples[explaine_data['test_idx']]
true_traces = traces[explaine_data['test_idx']][:,0:TRACE_LENGTH,:]

In [5]:
pred_traces = utils.idx2array(explaine_data['preds'],idx2ent,idx2rel)

In [6]:
adj_data = np.concatenate([triples,traces[:,0:TRACE_LENGTH,:].reshape(-1,3)],axis=0)

adj_data_sparse = utils.array2idx(adj_data,ent2idx,rel2idx)

In [7]:
adj_mats = utils.get_adj_mats(
    data=adj_data_sparse,
    num_entities=NUM_ENTITIES,
    num_relations=NUM_RELATIONS
)

In [None]:
#np.argwhere(tf.sparse.to_dense(adj_mats[4])[0,ent2idx["Al-Ma'mun>"],:].numpy())

In [114]:
import joblib

In [117]:
def get_count(i,true_triples,pred_traces,ent2idx,adj_mats,num_relations):
    
    current_count = 0
    
    head,_,tail = true_triples[i]
    pred_i = pred_traces[i]
    
    head_idx = ent2idx[head]
    tail_idx = ent2idx[tail]
    
    neighbor_indices = []
    
    for rel_idx in range(num_relations):
        
        dense_mat = tf.sparse.to_dense(adj_mats[rel_idx]).numpy()[0]
        
        head_neighbors = np.argwhere(dense_mat[head_idx,:]).flatten()
        tail_neighbors = np.argwhere(dense_mat[:,tail_idx]).flatten()
        
        neighbor_indices += head_neighbors.tolist()
        neighbor_indices += tail_neighbors.tolist()
    
    neighbors = [idx2ent[idx] for idx in neighbor_indices]
    
    pred_entities = np.unique(np.concatenate((pred_i[:,0],pred_i[:,2]),axis=0)).tolist()
    
    for p in pred_entities:
        if p in neighbors:
            current_count += 1
            break
    
    if current_count >= 1:
        return 1
    else:
        return 0

In [120]:
total_count = joblib.Parallel(n_jobs=-2, verbose=20)(
            joblib.delayed(get_count)(i,true_triples,pred_traces,ent2idx,adj_mats,num_relations=NUM_RELATIONS)
                for i in range(len(true_triples))
            )

[Parallel(n_jobs=-2)]: Using backend LokyBackend with 3 concurrent workers.
[Parallel(n_jobs=-2)]: Done   1 tasks      | elapsed:    4.5s
[Parallel(n_jobs=-2)]: Done   2 out of   5 | elapsed:    4.5s remaining:    6.7s
[Parallel(n_jobs=-2)]: Done   3 out of   5 | elapsed:    4.6s remaining:    3.1s
[Parallel(n_jobs=-2)]: Done   5 out of   5 | elapsed:    5.4s remaining:    0.0s
[Parallel(n_jobs=-2)]: Done   5 out of   5 | elapsed:    5.4s finished


In [125]:
#sum(total_count) / true_triples.shape[0]

In [116]:
# total_count = 0

# for i in range(len(true_triples)):
    
#     current_count = 0
    
#     head,_,tail = true_triples[i]
#     pred_i = pred_traces[i]
    
#     head_idx = ent2idx[head]
#     tail_idx = ent2idx[tail]
    
#     neighbor_indices = []
    
#     for rel_idx in range(NUM_RELATIONS):
        
#         dense_mat = tf.sparse.to_dense(adj_mats[rel_idx]).numpy()[0]
        
#         head_neighbors = np.argwhere(dense_mat[head_idx,:]).flatten()
#         tail_neighbors = np.argwhere(dense_mat[:,tail_idx]).flatten()
        
#         neighbor_indices += head_neighbors.tolist()
#         neighbor_indices += tail_neighbors.tolist()
    
#     neighbors = [idx2ent[idx] for idx in neighbor_indices]
    
#     pred_entities = np.unique(pred_i[:,0]).tolist() + np.unique(pred_i[:,2]).tolist()
    
#     for p in pred_entities:
#         if p in neighbors:
#             current_count += 1
    
#     if current_count >= 1:
#         total_count += 1

In [215]:
2654/true_traces.shape[0]

0.9183391003460207

In [242]:
true_triples[0]

array(['Abbhantripaja', 'uncle', 'Devawongse_Varopakarn'], dtype='<U76')

In [243]:
np.unique(pred_traces[0][:,0]).tolist() + np.unique(pred_traces[0][:,2]).tolist()

['Abbhantripaja', 'Adisaya_Suriyabha', 'Prabha_Bannabilaya']

In [244]:
true_traces[0]

array([['Chulalongkorn', 'brother', 'Devawongse_Varopakarn'],
       ['Abbhantripaja', 'parent', 'Chulalongkorn']], dtype='<U76')

In [188]:
unique = []

for list_ in pred_traces[:,:,1]:
    list_ = list(list_)
    if list_ not in unique:
        
        tup = tuple(list_)
        
        unique.append(tup)
        
d = {}

for tup in unique:
        
    count = (tup == pred_traces[:,:,1]).all(axis=1).sum() 
    
    d[tup] = count
    
output = sorted(d.items(),key=lambda x:x[1],reverse=True)

# bp_idx = (pred_traces[:,:,1] == ['brother','parent']).all(axis=1)
# pb_idx = (pred_traces[:,:,1] == ['parent','brother']).all(axis=1)

# print(np.argwhere(bp_idx)[0])
# print(np.argwhere(pb_idx)[0])

In [214]:
for i in range(NUM_RELATIONS):
    
    dense_mat = tf.sparse.to_dense(adj_mats[i]).numpy()[0]
    
    np.argwhere(dense_mat[head_idx,:]).flatten()

In [233]:
for i in range(len(true_traces)):

    pred_i = pred_traces[i]
    true_i = triples[i]
    
    if (['uncle','sister'] == pred_i[:,1]).all():
        print(i)
        print(pred_i)
        print(true_i)
        print(traces[i])
        break

10
[['Abdulaziz_bin_Abdullah_bin_Abdulaziz_Al_Saud' 'uncle'
  'Sattam_bin_Abdulaziz_Al_Saud']
 ['Nayef_bin_Abdul-Aziz_Al_Saud' 'sister'
  'Sultana_bint_Abdulaziz_Al_Saud']]
['Abdul_Hamid_II' 'uncle' 'Abdülaziz_of_the_Ottoman_Empire']
[['Abdülmecid_I' 'brother' 'Abdülaziz_of_the_Ottoman_Empire']
 ['Abdul_Hamid_II' 'parent' 'Abdülmecid_I']
 ['UNK_ENT' 'UNK_REL' 'UNK_ENT']]


In [None]:
# 3 most common predictions: (uncle, uncle), (brother, brother), (brother, uncle)
# (a,hasUncle,b): % of triples using 1st degree neighbors of either a or b
# %of predictions that did not include brother or parent:

In [191]:
count = 0
for tup in output:
    if 'parent' not in tup[0]:
        print(tup[0])
        count += tup[1]

('uncle', 'uncle')
('brother', 'brother')
('brother', 'uncle')
('uncle', 'sister')
('uncle', 'brother')
('sister', 'uncle')
('sister', 'sister')
('brother', 'sister')
('sister', 'brother')


In [192]:
count/true_triples.shape[0]

0.6899653979238755