In [1]:
import tensorflow as tf
import numpy as np
import os
import utils
import random as rn
import RGCN

In [2]:
SEED = 123
os.environ['PYTHONHASHSEED'] = str(SEED)
os.environ['TF_DETERMINISTIC_OPS'] = '1'
tf.random.set_seed(SEED)
np.random.seed(SEED)
rn.seed(SEED)

In [3]:
data = np.load(os.path.join('..','data','royalty.npz'))

In [4]:
RULE = 'spouse'

In [5]:
# triples,traces,nopred = utils.concat_triples(data, data['rules'])
# entities = data['all_entities'].tolist()
# relations = data['all_relations'].tolist()

triples,traces,nopred = utils.concat_triples(data, [RULE,'brother','sister'])
sister_relations = data['sister_relations'].tolist()
sister_entities = data['sister_entities'].tolist()

brother_relations = data['brother_relations'].tolist()
brother_entities = data['brother_entities'].tolist()

entities = np.unique(data[RULE + '_entities'].tolist()+brother_entities+sister_entities).tolist()
relations = np.unique(data[RULE + '_relations'].tolist()+brother_relations+sister_relations).tolist()

In [6]:
NUM_ENTITIES = len(entities)
NUM_RELATIONS = len(relations)
EMBEDDING_DIM = 50
OUTPUT_DIM = 50
LEARNING_RATE = .01
NUM_EPOCHS = 2
THRESHOLD = .01
K = 1

In [7]:
ent2idx = dict(zip(entities, range(NUM_ENTITIES)))
rel2idx = dict(zip(relations, range(NUM_RELATIONS)))

all_indices = tf.reshape(tf.range(0,NUM_ENTITIES,1,dtype=tf.int64), (1,-1))

model = RGCN.get_RGCN_Model(
    num_entities=NUM_ENTITIES,
    num_relations=NUM_RELATIONS,
    embedding_dim=EMBEDDING_DIM,
    output_dim=OUTPUT_DIM,
    seed=SEED
)


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'


In [43]:
model.load_weights(os.path.join('..','data','weights',RULE+'.h5'))

optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)

relation_embeddings = model.get_layer('output').get_weights()[0]

relation_kernel, self_kernel = model.get_layer('rgcn__layer').get_weights()

entity_embeddings = model.get_layer('entity_embeddings').get_weights()[0]

In [9]:
jaccard_scores = []
preds = []

train2idx = utils.array2idx(triples,ent2idx,rel2idx)
trainexp2idx = utils.array2idx(traces,ent2idx,rel2idx)
nopred2idx = utils.array2idx(nopred,ent2idx,rel2idx)

adjacency_data = tf.concat([train2idx,trainexp2idx.reshape(-1,3),nopred2idx],axis=0)

test2idx = utils.array2idx(triples,ent2idx,rel2idx)
testexp2idx = utils.array2idx(traces,ent2idx,rel2idx)

In [44]:
def get_neighbors(data_subset,node_idx):
    
    neighbors = tf.concat([data_subset[data_subset[:,0] == node_idx],
                           data_subset[data_subset[:,2] == node_idx]],axis=0)
    
    return neighbors

def get_computation_graph(head,rel,tail,k,data,num_relations):

    '''Get k hop neighbors (may include duplicates)'''
         
    # subset = data[data[:,1] == rel]

    # neighbors_head = get_neighbors(subset,head)
    # neighbors_tail = get_neighbors(subset,tail)

    neighbors_head = get_neighbors(data,head)
    neighbors_tail = get_neighbors(data,tail)

    all_neighbors = tf.concat([neighbors_head,neighbors_tail],axis=0)

    if k > 1:
        num_indices = all_neighbors.shape[0]

        seen_nodes = []
        
        for _ in range(k-1):#-1 since we already computed 1st degree neighbors above

            for idx in range(num_indices):

                head_neighbor_idx = all_neighbors[idx,0]
                tail_neighbor_idx = all_neighbors[idx,2]

                if head_neighbor_idx not in seen_nodes:
                    
                    seen_nodes.append(head_neighbor_idx)

                    more_head_neighbors = get_neighbors(data,head_neighbor_idx)

                    all_neighbors = tf.concat([all_neighbors,more_head_neighbors],axis=0)

                if tail_neighbor_idx not in seen_nodes:

                    seen_nodes.append(tail_neighbor_idx)

                    more_tail_neighbors = get_neighbors(data,tail_neighbor_idx)

                    all_neighbors = tf.concat([all_neighbors,more_tail_neighbors],axis=0)

    return all_neighbors


In [53]:
i = 0

head = test2idx[i,0]
rel = test2idx[i,1]
tail = test2idx[i,2]

comp_graph = get_computation_graph(head,rel,tail,K,adjacency_data,NUM_RELATIONS)

adj_mats = utils.get_adj_mats(comp_graph, NUM_ENTITIES, NUM_RELATIONS)

masks = [tf.Variable(
        initial_value=tf.random.normal(
            (1,NUM_ENTITIES,NUM_ENTITIES), 
            mean=0, 
            stddev=1, 
            dtype=tf.float32, 
            seed=SEED),
        name='mask_'+str(i),
        trainable=True) for i in range(NUM_RELATIONS)
]

In [89]:
for epoch in range(1):

    with tf.GradientTape(watch_accessed_variables=False) as tape:
    #with tf.GradientTape() as tape:

        tape.watch(masks)
        
        masked_adjs = [adj_mats[i] * tf.sigmoid(masks[i]) for i in range(NUM_RELATIONS)]
        
        pred = model([all_indices,
           tf.reshape(head,(1,-1)),
           tf.reshape(rel,(1,-1)),
           tf.reshape(tail,(1,-1)),
           masked_adjs])

        loss = -1 * tf.math.log(pred+0.00001)# + tf.reduce_mean(masks)

    print(f"Loss {tf.squeeze(loss).numpy()} @ epoch {epoch}")

    grads = tape.gradient(loss,masks)
    optimizer.apply_gradients(zip(grads,masks))                                    
#         head_output = tf.matmul(tf.reshape(entity_embeddings[head],(1,-1)),self_kernel)
#         tail_output = tf.matmul( tf.reshape(entity_embeddings[tail],(1,-1)),self_kernel)

#         for i in range(NUM_RELATIONS):
            
#             adj_i = tf.sparse.reshape(adj_mats[i] * tf.sigmoid(masks[i]), 
#                                       (NUM_ENTITIES,NUM_ENTITIES))
            
#             adj_indices = adj_i.indices
            
#             unique_vals, _ = tf.unique(tf.reshape(adj_indices,(-1)))
            
#             embeddings = tf.nn.embedding_lookup(entity_embeddings,unique_vals)
            
#             print(embeddings)
            
#             break

#             sum_embeddings = tf.matmul(adj_i,entity_embeddings)

#             head_update = tf.reshape(sum_embeddings[head],(1,-1))
#             tail_update = tf.reshape(sum_embeddings[tail],(1,-1))

#             head_output += tf.matmul(head_update,relation_kernel[i])
#             tail_output += tf.matmul(tail_update,relation_kernel[i])

    #             #adj_i = tf.sparse.to_dense(adj_mats[i])[0] * tf.sigmoid(masks[i][0])
#             adj_i = tf.sparse.reshape(adj_mats[i] * tf.sigmoid(masks[i]), (NUM_ENTITIES,NUM_ENTITIES))
            
# #             idx = adj_i.indices
            
            
            
# #             head_update = tf.nn.embedding_lookup(entity_embeddings,)
            
            
# # #             tf.nn.embedding_lookup(entity_embeddings,
# # #                        tf.concat([tf.gather(adj_mats[5].indices,[1],axis=1),
# # #                                   tf.gather(adj_mats[5].indices, [2],axis=1)],
# # #                                  axis=0)[:,0])

#             head_update = tf.sparse.sparse_dense_matmul(adj_i, tf.reshape(entity_embeddings[head], (1,-1)))
#             tail_update = tf.sparse.sparse_dense_matmul(adj_i, tf.reshape(entity_embeddings[tail], (1,-1)))

#             sum_embeddings = tf.sparse.sparse_dense_matmul(adj_i,entity_embeddings)

#             head_update = tf.reshape(sum_embeddings[head],(1,-1))
#             tail_update = tf.reshape(sum_embeddings[tail],(1,-1))

            #head_output += tf.matmul(head_update,relation_kernel[i])
            #tail_output += tf.matmul(tail_update,relation_kernel[i])            
                
    

#         head_output = tf.sigmoid(head_output)
#         tail_output = tf.sigmoid(tail_output)



Loss 0.04588475078344345 @ epoch 0


In [None]:
tf.sparse.to_dense(tf.sparse.reshape(adj_i, (NUM_ENTITIES,NUM_ENTITIES)))[head]

In [None]:
#tf.sparse.to_dense(tf.sparse.slice(adj_i, [head,head+1],size=(1,NUM_ENTITIES)))

In [None]:
#tf.unique(tf.concat([tf.gather(adj_mats[5].indices,[1],axis=1),tf.gather(adj_mats[5].indices, [2],axis=1)],axis=0))

#tf.unique(tf.reshape(tf.concat([tf.gather(adj_mats[5].indices,[1],axis=1),tf.gather(adj_mats[5].indices, [2],axis=1)],axis=0),(-1)))

adj_mats[5].indices

In [11]:
true_subgraphs = utils.get_adj_mats(testexp2idx[i],NUM_ENTITIES,NUM_RELATIONS)

In [16]:
true_subgraphs[5].indices

<tf.Tensor: shape=(2, 3), dtype=int64, numpy=
array([[   0,  922, 8507],
       [   0, 8507,  922]])>

In [14]:
from sklearn.metrics import jaccard_score
from scipy.sparse import csr_matrix

In [42]:
a = tf.convert_to_tensor([[9892,9892]],dtype=tf.int64)

csr_matrix((tf.ones(1),(a[:,0],a[:,1])),shape=(NUM_ENTITIES,NUM_ENTITIES))

<10358x10358 sparse matrix of type '<class 'numpy.float32'>'
	with 1 stored elements in Compressed Sparse Row format>

In [None]:
# np.savez(os.path.join('..','data','preds','gnn_explainer_'+RULE+'_preds.npz'),
#     preds=pred_graphs,embedding_dim=EMBEDDING_DIM,k=K,
#     threshold=THRESHOLD,learning_rate=LEARNING_RATE,num_epochs=NUM_EPOCHS
#     )

In [None]:
import numpy as np
import os
in_file=np.load(os.path.join('..','data','preds','gnn_explainer_'+'spouse'+'_preds.npz'),allow_pickle=True)

In [None]:

# head_output = tf.matmul(tf.reshape(entity_embeddings[head],(1,-1)),self_kernel)
# tail_output = tf.matmul(tf.reshape(entity_embeddings[tail],(1,-1)),self_kernel)

# for i in range(NUM_RELATIONS):

#     adj_i = tf.sparse.to_dense(adj_mats[i])[0] * tf.sigmoid(masks[i][0])

#     sum_embeddings = tf.matmul(adj_i,entity_embeddings)

#     head_update = tf.reshape(sum_embeddings[head],(1,-1))
#     tail_update = tf.reshape(sum_embeddings[tail],(1,-1))

#     head_output += tf.matmul(head_update,relation_kernel[i])
#     tail_output += tf.matmul(tail_update,relation_kernel[i])

# # for i in range(NUM_RELATIONS):

# #     adj_i = tf.sparse.reshape(adj_mats[i] * tf.sigmoid(masks[i]), (NUM_ENTITIES,NUM_ENTITIES))

# head_output = tf.sigmoid(head_output)
# tail_output = tf.sigmoid(tail_output)

# pred = tf.sigmoid(tf.reduce_sum(head_output*relation_kernel[rel]*tail_output))