In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Embedding
import numpy as np
import os
import utils
import random as rn
import RGCN

In [257]:
SEED = 123
os.environ['PYTHONHASHSEED'] = str(SEED)
os.environ['TF_DETERMINISTIC_OPS'] = '1'
tf.random.set_seed(SEED)
np.random.seed(SEED)
rn.seed(SEED)

RULE = 'aunt'

data = np.load(os.path.join('..','data','royalty.npz'))

triples, traces = data[RULE + '_triples'], data[RULE + '_traces']

entities = data[RULE + '_entities'].tolist()
relations = data[RULE + '_relations'].tolist()

NUM_ENTITIES = len(entities)
NUM_RELATIONS = len(relations)
EMBEDDING_DIM = 25
OUTPUT_DIM = 50
LEARNING_RATE = 1e-3
NUM_EPOCHS = 1
BATCH_SIZE = 1

ent2idx = dict(zip(entities, range(NUM_ENTITIES)))
rel2idx = dict(zip(relations, range(NUM_RELATIONS)))

train2idx = utils.array2idx(triples, ent2idx,rel2idx)

NUM_TRIPLES = train2idx.shape[0]

# adj_mats = utils.get_adjacency_matrix_list(
#     num_relations=NUM_RELATIONS,
#     num_entities=NUM_ENTITIES,
#     data=train2idx
# )

train2idx = np.expand_dims(train2idx,axis=0)
all_indices = np.arange(NUM_ENTITIES).reshape(1,-1)

In [258]:
trainexp2idx = utils.array2idx(traces, ent2idx,rel2idx)

In [5]:
# indices = np.concatenate([train2idx[0][:,[0,2]],train2idx[0][:,[2,0]]],axis=0)

# a = tf.sparse.SparseTensor(indices=indices,values=np.ones((indices.shape[0])),dense_shape=(NUM_ENTITIES,NUM_ENTITIES))
# a = tf.sparse.reorder(
#     a
# )


In [6]:
# def get_adj_mats(data,num_entities,num_relations):

#         adj_mats = []

#         for i in range(num_relations):

#             data_i = data[data[:,1] == i]

#             indices = np.concatenate([data_i[:,[0,2]],data_i[:,[2,0]]],axis=0)

#             sparse_mat = tf.sparse.SparseTensor(
#                 indices=indices,
#                 values=np.ones((indices.shape[0])),
#                 dense_shape=(num_entities,num_entities)
#                 )

#             sparse_mat = tf.sparse.reorder(sparse_mat)

#             sparse_mat = tf.sparse.reshape(sparse_mat, shape=(1,num_entities,num_entities))

#             adj_mats.append(sparse_mat)

#         return adj_mats

# adj_mats = get_adj_mats(train2idx[0],NUM_ENTITIES,NUM_RELATIONS)

In [7]:
tf_train2idx = tf.convert_to_tensor(train2idx[0])

def computation_graph_mats(head,tail,data,num_relations,num_entities):
    pass
    

In [8]:
# neighbors_head = np.concatenate([train2idx[0][(train2idx[0][:,0] == 7874)],train2idx[0][(train2idx[0][:,2] == 7874)]])
# neighbors_tail = np.concatenate([train2idx[0][(train2idx[0][:,0] == 8589)],train2idx[0][(train2idx[0][:,2] == 8589)]])

# all_neighbors = np.concatenate([neighbors_head,neighbors_tail], axis=0)

# indices = np.unique(all_neighbors[:,[1,0,2]], axis=0)

# a = tf.sparse.SparseTensor(indices=indices,values=np.ones((indices.shape[0])),dense_shape=(1,NUM_ENTITIES,NUM_ENTITIES))
# a = tf.sparse.reorder(
#     a
#)   
#k_hop_adj_mats = tf.sparse.expand_dims(a, axis=0)
# for i in neighbors_1_head[:,2]:
#     print(i,train2idx[0][train2idx[0][:,0] == i])

In [9]:
h,r,t = train2idx[0,3,:]

#filter train2idx to get neighbors -> feed into utils.get_adj_mats

head = np.array([h])
rel = np.array([r])
tail = np.array([t])

tf_k_hop_adj_mats = []

for i in range(NUM_RELATIONS):
    
    data = tf_train2idx[tf_train2idx[:,1] == i]

    tf_neighbors_head = tf.concat([data[data[:,0] == head],data[data[:,2] == head]],axis=0)
    tf_neighbors_tail = tf.concat([data[(data[:,0] == tail) & (data[:,0] != head)],data[(data[:,2] == tail) & (data[:,0] != head)]],axis=0)

    tf_all_neighbors = tf.concat([tf_neighbors_head,tf_neighbors_tail],axis=0)
    
    tf_indices = tf.transpose(tf.stack([tf_all_neighbors[:,0],tf_all_neighbors[:,2]]))

    tf_k_hop_adj_mat = tf.sparse.SparseTensor(
        indices=tf_indices,
        values=tf.ones((tf_indices.shape[0])),
        dense_shape=(NUM_ENTITIES,NUM_ENTITIES)
    )
    
    tf_k_hop_adj_mat = tf.sparse.reorder(tf_k_hop_adj_mat)
    
    tf_k_hop_adj_mats.append(tf_k_hop_adj_mat)

In [111]:
def get_computation_graph(head,rel,tail,data,num_relations):
    
    '''Get 1st degree neighbors of head and tail'''
     
    subset = data[data[:,1] == rel]

    neighbors_head = tf.concat([data[data[:,0] == head],
                                data[data[:,2] == head]],axis=0)
    neighbors_tail = tf.concat([data[(data[:,0] == tail) & (data[:,0] != head)],
                                data[(data[:,2] == tail) & (data[:,0] != head)]],axis=0)

    all_neighbors = tf.concat([neighbors_head,neighbors_tail],axis=0)

    return all_neighbors

In [296]:
def get_adj_mats(data,num_entities,num_relations,reshape=True):

    adj_mats = []

    for i in range(num_relations):

        data_i = data[data[:,1] == i]
   
        indices = tf.concat([tf.gather(data_i,[0,2],axis=1),tf.gather(data_i,[2,0],axis=1)],axis=0)
        
        sparse_mat = tf.sparse.SparseTensor(
            indices=indices,
            values=tf.ones((indices.shape[0])),
            dense_shape=(num_entities,num_entities)
            )

        sparse_mat = tf.sparse.reorder(sparse_mat)
        
        if reshape:
            
            sparse_mat = tf.sparse.reshape(sparse_mat, shape=(1,num_entities,num_entities))

        adj_mats.append(sparse_mat)

    return adj_mats

In [297]:
#tf.sparse.SparseTensor(indices=indices,values=np.ones((indices.shape[0])),dense_shape=(NUM_RELATIONS,NUM_ENTITIES,NUM_ENTITIES))
#tf_indices = tf.transpose(tf.stack([tf_all_neighbors[:,1],tf_all_neighbors[:,0],tf_all_neighbors[:,2]]))

c_graph = get_computation_graph(head,rel,tail,tf_train2idx,NUM_RELATIONS)
get_adj_mats(c_graph, NUM_ENTITIES, NUM_RELATIONS,reshape=False)[1].indices

#c_graph[c_graph[:,1] == 0]
#tf.concat([tf_train2idx[tf_train2idx[:,0] == head],tf_train2idx[tf_train2idx[:,2] == head]],axis=0)


<tf.Tensor: shape=(34, 2), dtype=int64, numpy=
array([[ 2425,  7667],
       [ 2425,  7684],
       [ 2425,  7699],
       [ 2425,  7706],
       [ 2425,  7712],
       [ 2425,  7715],
       [ 2425,  7839],
       [ 2425,  7850],
       [ 2425,  8589],
       [ 2425,  8628],
       [ 2425, 10731],
       [ 2425, 11181],
       [ 7667,  2425],
       [ 7684,  2425],
       [ 7699,  2425],
       [ 7706,  2425],
       [ 7712,  2425],
       [ 7715,  2425],
       [ 7805,  8589],
       [ 7826,  8589],
       [ 7833,  8589],
       [ 7839,  2425],
       [ 7850,  2425],
       [ 7874,  8589],
       [ 8589,  2425],
       [ 8589,  7805],
       [ 8589,  7826],
       [ 8589,  7833],
       [ 8589,  7874],
       [ 8589,  8624],
       [ 8624,  8589],
       [ 8628,  2425],
       [10731,  2425],
       [11181,  2425]])>

In [None]:
# tf_k_hop_adj_mats = tf.sparse.SparseTensor(
#     indices=tf_indices,
#     values=tf.ones((tf_indices.shape[0])),
#     dense_shape=(NUM_RELATIONS,NUM_ENTITIES,NUM_ENTITIES))
# tf_k_hop_adj_mats = tf.sparse.reorder(tf_k_hop_adj_mats)   
# tf_k_hop_adj_mats = tf.sparse.expand_dims(tf_k_hop_adj_mats, axis=0)

In [10]:
model = RGCN.get_RGCN_Model(
        num_triples=NUM_TRIPLES,
        num_entities=NUM_ENTITIES,
        num_relations=NUM_RELATIONS,
        embedding_dim=EMBEDDING_DIM,
        output_dim=OUTPUT_DIM,
        seed=SEED
    )
#model.load_weights(os.path.join('..','data','weights','rgcn.h5'))

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'


In [11]:
# tf_data = tf.expand_dims(tf_train2idx,axis=0)

# model([all_indices, tf_data[:,:,0],tf_data[:,:,1],tf_data[:,:,2],tf_k_hop_adj_mats]) < .5

In [12]:
masks = [tf.Variable(
            initial_value=tf.random.normal(
                (NUM_ENTITIES,NUM_ENTITIES), 
                mean=0, 
                stddev=1, 
                dtype=tf.dtypes.float32, 
                seed=SEED),
            name='mask_'+str(i),
            trainable=True) for i in range(NUM_RELATIONS)]

In [13]:
masks

[<tf.Variable 'mask_0:0' shape=(14154, 14154) dtype=float32, numpy=
 array([[ 0.5693312 , -0.35097048,  0.60993767, ..., -0.3841859 ,
         -1.5260998 , -2.1384628 ],
        [-0.491991  ,  0.2880641 ,  0.48281935, ...,  2.5248125 ,
         -0.13874033, -0.73854965],
        [-1.024216  ,  0.24099654,  0.80837035, ...,  1.5315689 ,
         -0.03924537,  0.4544497 ],
        ...,
        [-0.01932065,  0.92647284, -0.16475141, ..., -0.03513337,
          2.0643313 ,  1.5485798 ],
        [ 1.1980947 , -1.5618074 ,  0.66932935, ..., -0.5422449 ,
         -0.22707364, -0.9244645 ],
        [ 1.427597  ,  0.82098234,  1.7894857 , ...,  1.6456954 ,
          0.18594119,  0.6266803 ]], dtype=float32)>,
 <tf.Variable 'mask_1:0' shape=(14154, 14154) dtype=float32, numpy=
 array([[ 0.7596297 ,  0.71485424,  0.05565393, ..., -0.99534994,
         -0.6636354 , -1.4355375 ],
        [-1.0351917 , -0.6631321 , -1.4279053 , ...,  0.5082331 ,
         -1.2639966 , -0.25292748],
        [ 0.57050

In [14]:
#bce = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.SGD(learning_rate=LEARNING_RATE)

In [114]:
# y_init = model(
#             [
#             all_indices,
#             head.reshape(1,1),
#             rel.reshape(1,1),
#             tail.reshape(1,1),
#             tf_k_hop_adj_mats
#             ]
#         )
# y_init

In [16]:
# model([
#             all_indices,
#             train2idx[:,:,0],
#             train2idx[:,:,1],
#             train2idx[:,:,2],
#             tf_k_hop_adj_mats
#             ]
#         )

In [17]:
for epoch in range(2):
    
    with tf.GradientTape() as tape:
        
        tape.watch(masks)
        
        masked_adj = []
        
        for i in range(NUM_RELATIONS):
            
            masked_adj.append(tf_k_hop_adj_mats[i] * tf.nn.sigmoid(masks[i]))
            
        y_pred = model(
            [
            all_indices,
            head.reshape(1,1),
            rel.reshape(1,1),
            tail.reshape(1,1),
            masked_adj
            ]
        )
        
        print(y_pred)
        loss = -1*tf.math.log(y_pred+.00001) + tf.reduce_mean(tf.nn.sigmoid(masks))
        
        print(loss)
        
    grads = tape.gradient(loss,masks)
    optimizer.apply_gradients(zip(grads,masks))

tf.Tensor([[0.9755638]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.52470076]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.9766091]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.5236299]], shape=(1, 1), dtype=float32)


In [22]:
#pred_subgraph = tf.cast((tf_k_hop_adj_mats*tf.nn.sigmoid(masks)) > .5,dtype=tf.int32)

In [49]:
# pred_subgraph = [tf.cast((tf_k_hop_adj_mats[i]*tf.nn.sigmoid(masks[i])) > .5,dtype=tf.int32)
#                  for i in range(NUM_RELATIONS)]

In [233]:
true_subgraphs = get_adj_mats(tf_trainexp2idx[0],NUM_ENTITIES,NUM_RELATIONS,reshape=False)

for i in range(NUM_RELATIONS):
    
    mask_i = tf_k_hop_adj_mats[i]*masks[i]
    
    if mask_i.indices.shape[0]:

        non_masked_indices = mask_i.indices[mask_i.values > .5]

        pred_graph = tf.sparse.SparseTensor(
            indices=non_masked_indices,
            values=tf.ones(non_masked_indices.shape[0]),
            dense_shape=(NUM_ENTITIES,NUM_ENTITIES)
        )
        
        pred_graph = tf.sparse.to_dense(pred_graph)
        
        true_graph = true_subgraphs[i]
        
        print(tf_binary_jaccard(true_graph,pred_graph))
#    print(tf.sparse.to_dense(computation_graph))


tf.Tensor(1.0, shape=(), dtype=float64)


In [None]:
#TODAY:
#jaccard gnn explainer
#name weight file -> retrain RGCN?

In [83]:
#tf.math.logical_and(tf_k_hop_adj_mats[1]==1.,tf_k_hop_adj_mats[1]==1)

In [235]:
#(tf_k_hop_adj_mats[1]* masks[1]).values > .5
#a = tf.sparse.to_dense(tf_k_hop_adj_mats[1])
#tf.reduce_sum(tf.cast(tf.math.logical_and(a==1,a==1), tf.float32))
#tf_k_hop_adj_mats[1].indices[(tf_k_hop_adj_mats[1]* masks[1]).values > .5]

In [232]:
def tf_binary_jaccard(true_graph,pred_graph):
    
    m11 = tf.reduce_sum(tf.cast(tf.math.logical_and(true_graph==1,
                                                    pred_graph==1),dtype=tf.int32))
    m01 = tf.reduce_sum(tf.cast(tf.math.logical_and(true_graph==0,
                                                    pred_graph==1),dtype=tf.int32))
    m10 = tf.reduce_sum(tf.cast(tf.math.logical_and(true_graph==1,
                                                    pred_graph==0),dtype=tf.int32))
    
    return m11 / (m01 + m10 + m11)

In [261]:
tf_trainexp2idx = tf.convert_to_tensor(trainexp2idx)

In [286]:
tf.gather(tf_trainexp2idx[0][tf_trainexp2idx[0][:,1] == 2],[0,2],axis=1)

<tf.Tensor: shape=(1, 2), dtype=int64, numpy=array([[1405,  808]])>

In [299]:
true_subgraphs[0].indices

<tf.Tensor: shape=(2, 2), dtype=int64, numpy=
array([[2628, 2628],
       [2628, 2628]])>

In [None]:
#indices = np.concatenate([train2idx[0][:,[1,0,2]],train2idx[0][:,[1,2,0]]],axis=0)

#a = tf.sparse.expand_dims(a, axis=0)

# a = tf.sparse.SparseTensor(indices=np.concatenate([np.zeros((train2idx.shape[1],1),dtype=np.int64),train2idx[0][:,[1,0,2]]], axis=1),
#                           values=np.ones(train2idx.shape[1]),dense_shape=(1,NUM_RELATIONS,NUM_ENTITIES,NUM_ENTITIES))
# a = tf.sparse.reorder(
#     a
# )

#np.concatenate([np.zeros((train2idx.shape[1],1),dtype=np.int64),train2idx[0][:,[1,0,2]]],axis=1)
#np.concatenate([np.zeros((train2idx.shape[1],1),dtype=np.int64),train2idx[0][:,[1,2,0]]],axis=1)


#[0,4,823,4246]
#0,4,1532,9141

# np.argwhere(train2idx[0,:,0] == 1532)
# triples[5313,:]

#np.concatenate([train2idx[0,:,1].reshape(1,-1),train2idx[0,:,0].reshape(-1,1),train2idx[0,:,2].reshape(-1,1)],axis=1)
#np.swapaxis(train2idx,)

#tf.sparse.to_dense(a)

#tf.nn.embedding_lookup_sparse(a, tf.convert_to_tensor([3,3]),sp_weights=None)

In [None]:
#tf.nn.embedding_lookup_sparse(a, train2idx[:,0:5,0],sp_weights=None)


In [None]:
tf.matmul(a_dense[8,15:20,:],tf.ones((14154,5),tf.float64))

In [None]:
tf.sparse.sparse_dense_matmul(a,tf.ones((14154,5),tf.float64))[15:20]

In [None]:
model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(), 
    optimizer=tf.keras.optimizers.SGD(learning_rate=LEARNING_RATE)
)

model.fit(
    x=[
        all_indices,
        train2idx[:,:,0],
        train2idx[:,:,1],
        train2idx[:,:,2],
        adj_mats
        ],
    y=np.ones(NUM_TRIPLES).reshape(1,-1),
    epochs=NUM_EPOCHS,
    batch_size=1,
    verbose=1
)

In [None]:
# optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3)
# bce = tf.keras.losses.BinaryCrossentropy()

# data = tf.data.Dataset.from_tensor_slices((
#         train2idx[0,:,0],
#         train2idx[0,:,1],
#         train2idx[0,:,2], 
#         np.ones(train2idx.shape[1])
#     )
# ).batch(BATCH_SIZE)

# for epoch in range(NUM_EPOCHS):

#     for pos_head,rel,pos_tail,y in data:

#         neg_head, neg_tail = utils.get_negative_triples(
#             head=pos_head, 
#             rel=rel, 
#             tail=pos_tail,
#             num_entities=NUM_ENTITIES
#         )

#         with tf.GradientTape() as tape:
            
# #             print(all_indices.shape)
# #             print(pos_head.shape)
# #             print(adj_mats.shape)

#             y_pos_pred = model([
#                 all_indices,
#                 pos_head,
#                 rel,
#                 pos_tail,
#                 adj_mats
#                 ],
#                 training=True
#             )

#             y_neg_pred = model([
#                 all_indices,
#                 neg_head,
#                 rel,
#                 neg_tail,
#                 adj_mats
#                 ],
#                 training=True
#             )

#             y_pred = tf.concat([y_pos_pred,y_neg_pred],axis=0)
#             y_true = tf.concat([y,tf.zeros_like(y)],axis=0)

#             loss = bce(y_true,y_pred)

#         grads = tape.gradient(loss, model.trainable_weights)
#         optimizer.apply_gradients(zip(grads, model.trainable_weights))

#     print(f'loss {loss} after epoch {epoch}')

In [None]:
print(all_indices.shape)
print(train2idx[:,:,0].shape)
print(train2idx[:,:,1].shape)
print(train2idx[:,:,2].shape)
print(adj_mats.shape)

In [None]:
y_true = model.predict(
    x=[
        all_indices,
        train2idx[:,:,0],
        train2idx[:,:,1],
        train2idx[:,:,2],
        adj_mats
    ]
)

In [None]:
tf.where(adj_mats[0,2,:][train2idx[:,0:1,0]]==1.)[:,-1]

In [None]:
model([
    all_indices,
    train2idx[:,0:1,0],
    train2idx[:,0:1,1],
    train2idx[:,0:1,2],
    k_hop_adj_mats])

In [None]:
#embeddings = model.get_layer('entity_embeddings').get_weights()[0]

bce = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)

In [None]:
h,_,t = train2idx[0,0,:]

k_hop_adj_mats = []

for r in range(NUM_RELATIONS):

    k_hop_adj_mat = np.zeros((NUM_ENTITIES,NUM_ENTITIES))

    head_neighbors = tf.where(adj_mats[0,r,:][h]==1.)[:,-1]
    tail_neighbors = tf.where(adj_mats[0,r,:][t]==1.)[:,-1]

    for h_i in head_neighbors:
        k_hop_adj_mat[h,h_i] = 1.
        k_hop_adj_mat[h_i,h] = 1.
        
    for t_i in tail_neighbors:
        k_hop_adj_mat[t,t_i] = 1.
        k_hop_adj_mat[t_i,t] = 1.

    k_hop_adj_mats.append(k_hop_adj_mat)

k_hop_adj_mats = np.expand_dims(k_hop_adj_mats,axis=0)

In [None]:
NUM_EPOCHS = 1

tf_train2idx = tf.convert_to_tensor(train2idx)
tf_k_hop_adj_mats = tf.convert_to_tensor(k_hop_adj_mats,dtype=tf.float32)
tf_all_indices = tf.convert_to_tensor(all_indices)

y_true = model([
    all_indices,
    train2idx[:,0:1,0],
    train2idx[:,0:1,1],
    train2idx[:,0:1,2],
    k_hop_adj_mats])

masks = tf.Variable(
        initial_value=tf.random.normal(
            (adj_mats.shape), 
            mean=0, 
            stddev=1, 
            dtype=tf.dtypes.float32, 
            seed=SEED),
        name='mask',
        trainable=True
    )
# for epoch in range(NUM_EPOCHS):
with tf.GradientTape() as tape:
    
    #tape.watch(tf_train2idx)
    #tape.watch(tf_k_hop_adj_mats)
    tape.watch(masks)

    
    masked_adj = tf_k_hop_adj_mats*tf.nn.sigmoid(masks)
    
    y_pred = model([
        tf_all_indices,
        tf_train2idx[:,0:1,0],
        tf_train2idx[:,0:1,1],
        tf_train2idx[:,0:1,2],
        masked_adj])

    loss = bce(y_true,y_pred)
    
grads = tape.gradient(loss,masks)
optimizer.apply_gradients(zip(grads,masks))

In [None]:
#subgraph = tf.cast((tf_k_hop_adj_mats*tf.nn.sigmoid(masks)) > .5,dtype=tf.int32)

In [None]:
#tf.reduce_sum(tape.gradient(loss,tf_k_hop_adj_mats)[0][2])

In [None]:
#exp to subgraph function
#list for jaccard scores

In [None]:
trainexp2idx[0]

In [None]:
true_subgraph = []

for i in range(NUM_RELATIONS):
    
    mat = np.zeros((NUM_ENTITIES,NUM_ENTITIES))

    exp_triples = trainexp2idx[0][trainexp2idx[0][:,1] == i]
    
    for exp_h,_,exp_t in exp_triples:
        
        mat[exp_h,exp_t] = 1
        mat[exp_t,exp_h] = 1
        
    true_subgraph.append(mat)

true_subgraph = np.expand_dims(true_subgraph,axis=0)

In [None]:
true_subgraph.shape

In [None]:
def binary_jaccard(truth,pred):
    
    m11 = np.logical_and(truth==1,pred==1).sum()
    m01 = np.logical_and(truth==0,pred==1).sum()
    m10 = np.logical_and(truth==1,pred==0).sum()
    
    return (m11 / (m01+m10+m11))

binary_jaccard(truth,pred)

In [None]:
# truth = np.array([[1,1,1],[1,0,0],[1,0,0]])
# pred = np.array([[1,1,1],[1,0,0],[1,0,0]])

# a = truth.sum()
# b = pred.sum()
# intersect = np.logical_and(truth, pred)
# print(truth)
# print(pred)
# print(intersect)
# print(intersect.sum()/(a+b-intersect.sum()))

In [None]:
# tail_triples = []
# for i in tail_indices:
#     tail_triples.append((h,r,i.numpy()[0]))
    
# tail_triples

In [None]:
# class New_RGCN_Layer(tf.keras.layers.Layer):
#     def __init__(self,num_relations,output_dim,**kwargs):
#         super(New_RGCN_Layer,self).__init__(**kwargs)
#         self.num_relations = num_relations
#         self.output_dim = output_dim
        
#     def build(self,input_shape):

#         input_dim = int(input_shape[-2][-1])
        
#         self.relation_kernel = self.add_weight(
#             shape=(self.num_relations,input_dim, self.output_dim),
#             name="relation_kernels",
#             trainable=True,
#             initializer=tf.keras.initializers.RandomNormal(
#                 mean=0.0,
#                 stddev=1,
#                 seed=SEED
#             )
#         )


#         self.self_kernel = self.add_weight(
#             shape=(input_dim, self.output_dim),
#             name="self_kernel",
#             trainable=True,
#             initializer=tf.keras.initializers.RandomNormal(
#                 mean=0.0,
#                 stddev=1,
#                 seed=SEED
#             )
#         )
    
#     def call(self, inputs):
        
#         embeddings,head_idx,head_e,tail_idx,tail_e,adj_mats = inputs
        
# #         print('embeddings',embeddings.shape)
# #         print('head_idx',head_idx.shape)
# #         print('head_e',head_e.shape)
# #         print('adj_mats',adj_mats.shape)
            
#         head_output = tf.matmul(head_e,self.self_kernel)
#         tail_output = tf.matmul(tail_e,self.self_kernel)
        
#         #print('head_output',head_output.shape)
        
#         for i in range(self.num_relations):
            
#             adj_i = adj_mats[i]

#             #print('adj_i',adj_i.shape)
            
#             head_adj = tf.nn.embedding_lookup(adj_i,head_idx)
#             tail_adj = tf.nn.embedding_lookup(adj_i,tail_idx)
            
#             #print('head_adj',head_adj.shape)
            
#             #print('head_adj',head_adj.shape)
#             #print('embeddings',embeddings.shape)
            
#             head_update = tf.matmul(head_adj,embeddings)
#             tail_update = tf.matmul(tail_adj,embeddings)

#             head_output += tf.matmul(head_update,self.relation_kernel[i])
#             tail_output += tf.matmul(tail_update,self.relation_kernel[i])
       
#         return head_output, tail_output
    
# class DistMult(tf.keras.layers.Layer):
#     def __init__(self, num_relations,**kwargs):
#         super(DistMult,self).__init__(**kwargs)
#         self.num_relations = num_relations
        
#     def build(self,input_shape):
        
#         embedding_dim = input_shape[0][-1]
        
#         self.kernel = self.add_weight(
#             shape=(self.num_relations,embedding_dim),
#             trainable=True,
#             initializer=tf.keras.initializers.RandomNormal(
#                 mean=0.0,
#                 stddev=1,
#                 seed=SEED
#             ),
#             name='rel_embedding'
#         )
        
#     def call(self,inputs):
        
#         head_e,rel_idx,tail_e = inputs
        
#         rel_e = tf.nn.embedding_lookup(self.kernel,rel_idx)
        
#         score = tf.sigmoid(tf.reduce_sum(head_e*rel_e*tail_e,axis=-1))
        
#         return tf.expand_dims(score,axis=0)
#         embeddings,head_idx,tail_idx,head_e,tail_e,adj_mats = inputs

#         adj_mats = tf.squeeze(adj_mats,axis=0)
#         embeddings = tf.squeeze(embeddings,axis=0)

#         head_output = tf.matmul(head_e,self.self_kernel)
#         tail_output = tf.matmul(tail_e,self.self_kernel)
        
#         for i in range(self.num_relations):
            
#             adj_i =x adj_mats[i]

#             head_adj = tf.nn.embedding_lookup(adj_i,head_idx)
#             tail_adj = tf.nn.embedding_lookup(adj_i,tail_idx)
            
#             h_head = tf.matmul(head_adj,embeddings)
#             h_tail = tf.matmul(head_adj,embeddings)
            
#             head_output += tf.matmul(h_head,self.relation_kernel[i])
#             tail_output += tf.matmul(h_tail,self.relation_kernel[i])

#         return head_output,tail_output

In [None]:
# data = tf.data.Dataset.from_tensor_slices((
#         train2idx[:,:,0],
#         train2idx[:,:,1],
#         train2idx[:,:,2], 
#         np.ones(train2idx.shape[1]).reshape(1,-1)
#     )
# ).batch(1)

# for h,r,t,y in data:

#     neg_head, neg_tail = utils.get_negative_triples(head=h,rel=r,tail=t,num_entities=NUM_ENTITIES)
#     print(h)
#     print(neg_head)
#     print(t) 
#     print(neg_tail)
#     break

In [None]:
# all_entities = tf.keras.Input(shape=(NUM_ENTITIES,), name='all_entities',dtype=tf.int64)
# head_input = tf.keras.Input(shape=(None,), name='head_input',batch_size=1,dtype=tf.int64)
# rel_input = tf.keras.Input(shape=(None,), name='rel_input',batch_size=1,dtype=tf.int64)
# tail_input = tf.keras.Input(shape=(None,), name='tail_input',batch_size=1,dtype=tf.int64)

# adj_inputs = tf.keras.Input(
#         shape=(
#             NUM_RELATIONS,
#             NUM_ENTITIES,
#             NUM_ENTITIES
#         ),
#         dtype=tf.float32,
#         name='adj_inputs'
#     )

# entity_embeddings = Embedding(
#         input_dim=NUM_ENTITIES,
#         output_dim=EMBEDDING_DIM,
#         name='entity_embeddings',
#         embeddings_initializer=tf.keras.initializers.RandomUniform(
#             minval=-1,
#             maxval=1,
#             seed=SEED
#         )
#     )

# all_e = entity_embeddings(all_entities)
# head_e = entity_embeddings(head_input)
# tail_e = entity_embeddings(tail_input)

# all_e = tf.keras.layers.Lambda(lambda x:x[0,:,:])(all_e)
# head_e = tf.keras.layers.Lambda(lambda x:x[0,:,:])(head_e)
# tail_e = tf.keras.layers.Lambda(lambda x:x[0,:,:])(tail_e)

# head_index = tf.keras.layers.Lambda(lambda x:x[0,:])(head_input)
# rel_index = tf.keras.layers.Lambda(lambda x:x[0,:])(rel_input)
# tail_index = tf.keras.layers.Lambda(lambda x:x[0,:])(tail_input)

# adj_mats_layer = tf.keras.layers.Lambda(lambda x:x[0,:,:])(adj_inputs)
# #embeddings,head_idx,head_e,tail_idx,tail_e,adj_mats

# new_head,new_tail = New_RGCN_Layer(NUM_RELATIONS,OUTPUT_DIM)([all_e,head_index,head_e,tail_index,tail_e,adj_mats_layer])
# #new_head = New_RGCN_Layer(NUM_RELATIONS,OUTPUT_DIM)([all_entities,])

# output = DistMult(num_relations=NUM_RELATIONS,name='output')([new_head,rel_index,new_tail])

# output = tf.keras.layers.Dense(train2idx.shape[1],activation='sigmoid')(output)

# #output = tf.keras.layers.Dense(1)(output)
# #output = tf.keras.layers.Reshape((1,))(output)
# # m = tf.keras.Model([head_input],[out])
# # m([train2idx[:,0:32,0]])
# m = tf.keras.Model([all_entities,head_input,rel_input,tail_input,adj_inputs],[output])

# m([np.arange(NUM_ENTITIES).reshape(1,-1),train2idx[:,:,0],train2idx[:,:,1],train2idx[:,:,2],adj_mats])

In [None]:
# m.compile(
#     loss=tf.keras.losses.BinaryCrossentropy(), 
#     optimizer=tf.keras.optimizers.SGD(learning_rate=1e-3)
# )

# #m.summary()
# m.fit(x=[
#     all_indices,
#     train2idx[:,:,0],
#     train2idx[:,:,1],
#     train2idx[:,:,2],
#     adj_mats
# ],
#     y=np.ones(train2idx.shape[1]).reshape(1,-1),
#     epochs=NUM_EPOCHS,
#     batch_size=1,
#     verbose=1
# )

In [None]:
# preds = m.predict(
#     x=[
#         all_indices,
#         train2idx[:,:,0],
#         train2idx[:,:,1],
#         train2idx[:,:,2],
#         adj_mats
#     ]
# )

In [None]:
#m([all_indices,train2idx[:,0:1,0],train2idx[:,0:1,1],train2idx[:,0:1,2],masks])

In [None]:
#embeddings_model = tf.keras.Model(inputs=m.input,outputs=m.get_layer('entity_embeddings').output)

#embeddings = m.get_layer('entity_embeddings').get_weights()[0]

In [None]:
# head_embeddings,tail_embeddings = embeddings_model([
#     all_indices,
#     train2idx[:,0:1,0],
#     train2idx[:,0:1,1],
#     train2idx[:,0:1,2],
#     adj_mats
# ])

In [None]:
#train2idx[0,:,:][0:5]

In [None]:
#tf.where(adj_mats[0][2][100]==1.).numpy()
#np.argwhere(train2idx[0,:,2]== 100)

# for h,_,t in train2idx[:,822,:]:
    
#     head_neighbors = tf.where(adj_mats[0][2][h]==1.).numpy()
#     tail_neighbors = tf.where(adj_mats[0][2][t]==1.).numpy()
#tf.where(adj_mats[0][2][100]==1.).numpy()
#tf.where(adj_mats[0,2,[100,100,100]]==1.)[:,-1]
#train2idx[:,822,[0,2]]

In [None]:
#tf.nn.embedding_lookup(embeddings,train2idx[0,0:1,0])
#k_hop_adj_mats*tf.nn.sigmoid(masks)

In [None]:
#for triple in test set:
    #get k hop subgraph of each triple (all neighbors of head/tail)
    #compute adjacency matrix of subgraph (1,NUM_RELATIONS,NUM_ENTITIES,NUM_ENTITIES)
    #compute pred of triple
    #for i in iter:
        #learn mask
        #mask * adj matrix 
    #mask * adj matrix-> reduce output to ints
#edge_mask = tf.cast((k_hop_adj_mats*tf.nn.sigmoid(masks)) > .1,dtype=tf.int32)
#tf.nn.sigmoid(masks)

In [None]:
# head_indices = tf.where(edge_mask[0,2,h,:] == 1)
# tail_indices = tf.where(edge_mask[0,2,t,:] == 1)
# swap = tf.concat([tf.reshape(indices[:,1],(-1,1)),tf.reshape(indices[:,0],(-1,1))],axis=1)

# unique_indices = tf.concat([indices,swap],axis=0)