In [1]:
import tensorflow as tf
import utils
import numpy as np
import random as rn
import os

In [2]:
SEED = 123
os.environ['PYTHONHASHSEED'] = str(SEED)
os.environ['TF_DETERMINISTIC_OPS'] = '1'
tf.random.set_seed(SEED)
np.random.seed(SEED)
rn.seed(SEED)

data = np.load(os.path.join('..','data','royalty.npz'))

# triples = data['triples']
# traces = data['traces']
entities = data['entities'].tolist()
num_entities = len(entities)
relations = data['relations'].tolist()
num_relations = len(relations)
embedding_dim = 3
ent2idx = dict(zip(entities, range(num_entities)))
rel2idx = dict(zip(relations, range(num_relations)))

In [3]:
triples, traces = data['grandmother_triples'], data['grandmother_traces']

In [4]:
train2idx = utils.array2idx(triples, ent2idx,rel2idx)

In [5]:
# def get_neighbors(train_triples,entities):
    
#     adj_list = [[] for _ in entities]
#     for i,triplet in enumerate(train_triples):
#         adj_list[triplet[0]].append([i, triplet[2]])
#         #adj_list[triplet[2]].append([i, triplet[0]])

#     degrees = np.array([len(a) for a in adj_list])
#     adj_list = [np.array(a) for a in adj_list]

#     return adj_list, degrees

# adj_list, degrees = get_neighbors(train2idx,entities)

In [295]:
def neighbors(train2idx, entities):
    adj_list = [[] for _ in entities]
    for h,r,t in train2idx:
        adj_list[h].append([r, t])
        adj_list[t].append([r, h])

    degrees = np.array([len(a) for a in adj_list])
    adj_list = [np.array(a) for a in adj_list]
                 
    return adj_list, degrees

adj_list, degrees = neighbors(train2idx,entities)

In [439]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.initializers import RandomUniform
from tensorflow.python.ops import embedding_ops

entity_embeddings = Embedding(
    input_dim=num_entities,
    output_dim=embedding_dim,
    name='entity_embeddings',
    embeddings_initializer=RandomUniform(
        minval=-1,
        maxval=1,
        seed=SEED
        )
    )

In [446]:
head_input = tf.keras.Input(shape=(), name='head_input',dtype=tf.int64)
rel_input = tf.keras.Input(shape=(), name='rel_input',dtype=tf.int64)
tail_input = tf.keras.Input(shape=(), name='tail_input',dtype=tf.int64)
all_entities = tf.keras.Input(shape=(num_entities), name='all_entities',dtype=tf.int64)

In [444]:
adj_inputs = [tf.keras.Input(shape=(num_entities,num_entities),dtype=tf.float32) for _ in relations]

In [427]:
head_e = entity_embeddings(head_input)
tail_e = entity_embeddings(tail_input)
all_e = entity_embeddings(all_entities)

In [428]:
#tf.matmul(tf.ones((1,5)), tf.ones((5,10)))
#tf.keras.backend.dot(tf.ones((10,1)),tf.ones((1,5)))
#tf.squeeze(tf.tensordot(tf.ones((5,5)),tf.ones((5,1)),axes=1),axis=1)
#tf.tensordot(tf.ones((5,5)),tf.ones((5,5)),axes=1)

In [461]:
# class RGCN_Layer(tf.keras.layers.Layer):
#     def __init__(self,num_relations,output_dim,**kwargs):
#         super(RGCN_Layer,self).__init__(**kwargs)
#         self.num_relations = num_relations
#         self.output_dim = output_dim
    
#     def build(self,input_shape):

#         in_shape = input_shape[-1][-1]

#         self.W_r = self.add_weight(
#             shape=(self.num_relations,self.output_dim,in_shape),
#             trainable=True,
#             initializer="random_normal",
#             name='W_r'
#         )
        
#         self.W0 = self.add_weight(
#             shape=(self.output_dim,in_shape),
#             trainable=True,
#             initializer='random_normal',
#             name='W0'
#         )
        
#     def call(self,inputs):
        
#         head_input,tail_input = inputs
        
#         #filter_W_r = embedding_ops.embedding_lookup_v2(self.W_r,rel_input)
        
#         tail_update = tf.matmul(self.W_r,tail_input,transpose_b=True)
        
#         head_update = tf.matmul(self.W0,head_input,transpose_b=True)
        
#         update = tf.reduce_sum(tail_update + head_update, axis=0)

#         return tf.transpose(update)

class RGCN_Layer(tf.keras.layers.Layer):
    def __init__(self,num_relations,output_dim,**kwargs):
        super(RGCN_Layer,self).__init__(**kwargs)
        self.num_relations = num_relations
        self.output_dim = output_dim
        
    def build(self,input_shape):
        
        input_dim = int(input_shape[0][-1])
        
        self.relation_kernels = [
                self.add_weight(
                    shape=(input_dim, self.output_dim),
                    name="relation_kernels",
                    trainable=True,
                    initializer=tf.keras.initializers.RandomNormal(mean=0.0,stddev=1,seed=SEED)
                )
                for _ in range(self.num_relations)
            ]

        self.self_kernel = self.add_weight(
            shape=(input_dim, self.output_dim),
            name="self_kernel",
            trainable=True,
            initializer=tf.keras.initializers.RandomNormal(mean=0.0,stddev=1,seed=SEED)
        )
        
    def call(self, inputs):
        
        features, *A_mats = inputs

        output = tf.matmul(features,self.self_kernel)
        
        for i in range(self.num_relations):
            
            h = tf.tensordot(A_mats[i], features,axes=1)
            output += tf.tensordot(h,self.relation_kernels[i],axes=1)
            
        return tf.squeeze(output,axis=0)

# class RGCN_Layer(tf.keras.layers.Layer):
#     def __init__(self,num_relations,input_dim,output_dim,**kwargs):
#         super(RGCN_Layer,self).__init__(**kwargs)
#         self.num_relations = num_relations
#         self.input_dim = input_dim
#         self.output_dim = output_dim
        
#     def build(self,input_shape):
                
#         self.relation_kernels = self.add_weight(
#             shape=(self.num_relations,self.input_dim, self.output_dim),
#             name="relation_kernels",
#             trainable=True,
#             initializer=tf.keras.initializers.RandomNormal(mean=0.0,stddev=1,seed=SEED)
#         )

#         self.self_kernel = self.add_weight(
#             shape=(self.input_dim, self.output_dim),
#             name="self_kernel",
#             trainable=True,
#             initializer=tf.keras.initializers.RandomNormal(mean=0.0,stddev=1,seed=SEED)
#         )
        
#     def call(self,inputs):
        
#         adj_list, head_idx,rel_idx,tail_idx = inputs
        
#         #head_e = embedding_ops.embedding_lookup_v2(adj_list,head_idx)
#         head_e = adj_list[head_idx]
#         rel_e = 
                
#         return adj_list[head_idx]


#RGCN_Layer(num_relations=2,input_dim=3,output_dim=5)([adj_list,[6,7],[1,1],[2,2]])
#RGCN_Layer(num_relations=2,input_dim=3,output_dim=5)([adj_list,6,1,2])

#update dist mult -> input layer for entity,rel ints
#update custom training loop

In [462]:
#RGCN_Layer(num_relations=num_relations,output_dim=5)([all_e] + adj_inputs)


#RGCN_Layer(num_relations=num_relations,output_dim=5)([entity_embeddings(np.arange(num_entities))]+[np.eye(num_entities) for _ in relations])

RGCN_Layer(num_relations=num_relations,output_dim=5)([all_e]+adj_inputs)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'


<tf.Tensor 'rgcn__layer_110/Squeeze:0' shape=(5983, 5) dtype=float32>

In [464]:
class DistMult(tf.keras.layers.Layer):
    def __init__(self, num_relations,**kwargs):
        super(DistMult,self).__init__(**kwargs)
        self.num_relations = num_relations
        
    def build(self,input_shape):
        
        embedding_dim = input_shape[0][-1]
        
        self.kernel = self.add_weight(
            shape=(self.num_relations,embedding_dim),
            trainable=True,
            initializer=tf.keras.initializers.RandomNormal(mean=0.0,stddev=1,seed=SEED),
            name='rel_embedding'
        )
        
    def call(self,inputs):
        
        embeddings,head_idx,rel_idx,tail_idx = inputs
        
        head_e = embedding_ops.embedding_lookup_v2(embeddings,head_idx)
        rel_e = embedding_ops.embedding_lookup_v2(self.kernel,rel_idx)
        tail_e = embedding_ops.embedding_lookup_v2(embeddings,tail_idx)

        return tf.sigmoid(tf.reduce_sum(head_e*rel_e*tail_e, axis=-1))

In [465]:
# new_head = RGCN_Layer(num_relations=num_relations,output_dim=5)([head_e,tail_e])
# new_head = tf.keras.layers.Activation('sigmoid')(new_head)

# new_tail = RGCN_Layer(num_relations=num_relations,output_dim=5)([tail_e,head_e])
# new_tail = tf.keras.layers.Activation('sigmoid')(new_tail)

# output = DistMult(num_relations=num_relations)([new_head,rel_input,new_tail])
#DistMult(num_relations=num_relations)([np.ones((6,5)),[2,2],[2,2],[1,1]])

DistMult(num_relations=num_relations)([RGCN_Layer(num_relations=num_relations,output_dim=5)([all_e] + adj_inputs),head_input,rel_input,tail_input])


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'


<tf.Tensor 'dist_mult_45/Sigmoid:0' shape=(None,) dtype=float32>

In [129]:
#model = tf.keras.Model([head_input,rel_input,tail_input],[output])

In [13]:
#model.compile(loss='binary_crossentropy', optimizer='sgd')

In [14]:
#model.fit(x=[train2idx[:,0],train2idx[:,1],train2idx[:,2]],y=np.ones(train2idx.shape[0]),epochs=1)

In [15]:
class RGCN_Model(tf.keras.Model):
    def __init__(self,num_entities,*args,**kwargs):
        super(RGCN_Model,self).__init__(*args, **kwargs)
        self.num_entities = num_entities
        
    def train_step(self,data):

        pos_head,rel,pos_tail = data[0]
        y = data[1]
        
        neg_head, neg_tail = utils.get_negative_triples(
            head=pos_head, 
            rel=rel, 
            tail=pos_tail,
            num_entities=self.num_entities
            )
        
        head = tf.concat([pos_head,neg_head],axis=0)
        rel = tf.concat([rel,rel],axis=0)
        tail = tf.concat([pos_tail,neg_tail],axis=0)
        
        y_neg = tf.zeros_like(y)
        
        y = tf.concat([y,y_neg],axis=0)
        
        with tf.GradientTape() as tape:
            
            y_pred = self([head,rel,tail],training=True)
            
            loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
            
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)

        self.optimizer.apply_gradients(zip(gradients, trainable_vars))

        self.compiled_metrics.update_state(y, y_pred)

        return {m.name: m.result() for m in self.metrics}

In [16]:
model = RGCN_Model(
    inputs=[
        head_input,
        rel_input,
        tail_input
    ],
    outputs=[
        output
    ],
    num_entities=num_entities
)

In [17]:
model.compile(loss='binary_crossentropy', optimizer='sgd')

In [18]:
model.fit(x=[train2idx[:,0],train2idx[:,1],train2idx[:,2]],y=np.ones(train2idx.shape[0]),epochs=2000,verbose=0)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'


<tensorflow.python.keras.callbacks.History at 0x1941e2da10>

In [19]:
# import dgl
# import torch as th
# from dgl.nn import RelGraphConv
# g = dgl.graph(([0,1,2,3,2,5,0], [1,2,3,4,0,3,1]))
# conv = RelGraphConv(10, 5, 3, regularizer='basis')
# feat = th.ones(6, 10)
# etype = th.tensor(np.array([0,1,2,0,1,2,1]).astype(np.int64))
# conv(g, feat, etype)

In [20]:
(model.predict([train2idx[:,0],train2idx[:,1],train2idx[:,2]]) > .5).sum()/train2idx.shape[0]

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'


0.7962226640159046

In [None]:
#tf.sparse.sparse_dense_matmul(sparse_tensor, zeros)

In [None]:
#zeros = tf.zeros(shape=[num_entities,num_relations,num_entities],dtype=tf.float64)
# adj = np.zeros(shape=[num_entities,num_relations,num_entities])

# for h,r,t in train2idx:
    
#     adj[h,r,t] = 1
    
# adj = tf.convert_to_tensor(adj)

In [None]:

#get neighbors of head+tail
#update rgcn -> for each triple, feed in neighbors (update weight matrix?, normalize by degrees)
#GNNEXPLAINER: 

In [None]:
#train_data = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(BATCH_SIZE)

In [None]:
mask = tf.Variable(
    initial_value=tf.random.uniform(
        minval=0,
        maxval=1,
        shape=[
            num_entities,
            num_relations,
            num_entities
        ]
    ),
    trainable=True,
    shape=[
        num_entities,
        num_relations,
        num_entities
    ],
    name='mask'
)


In [None]:
i = 100

# model.predict(
#     x=[
#         train2idx[i,0].reshape(-1,1),
#         train2idx[i,1].reshape(-1,1),
#         train2idx[i,2].reshape(-1,1)
#     ],
#     batch_size=1
# )
model.predict(
    x=[
        train2idx[:,0],
        train2idx[:,1],
        train2idx[:,2]
    ]
)

In [None]:
#for triple i:
    #get k hop subgraph?
    #compute pred 
    #define masks
    #optimize loss, return masks

In [None]:
# def get_adjlist(train2idx,entities):

#     adj_list = [[] for _ in entities]

#     for i,triplet in enumerate(train2idx):
#         adj_list[triplet[0]].append([i, triplet[2]])
#         adj_list[triplet[2]].append([i, triplet[0]])

#     degrees = np.array([len(a) for a in adj_list])
#     adj_list = [np.array(a) for a in adj_list]
    
#     return adj_list,degrees

In [None]:
#get_adjlist(train2idx,entities)