In [1]:
import tensorflow as tf
import utils
import numpy as np
import random as rn
import os

SEED = 123
os.environ['PYTHONHASHSEED'] = str(SEED)
os.environ['TF_DETERMINISTIC_OPS'] = '1'
tf.random.set_seed(SEED)
np.random.seed(SEED)
rn.seed(SEED)

In [316]:
import scipy.sparse as sp

def encode_onehot(labels):
    classes = set(labels)
    classes_dict = {c: np.identity(len(classes))[i, :] for i, c in enumerate(classes)}
    labels_onehot = np.array(list(map(classes_dict.get, labels)), dtype=np.int32)
    return labels_onehot


def load_data(path="/Users/nhalliwe/Desktop/keras-gcn-master/kegra/data/cora/", dataset="cora"):
    """Load citation network dataset (cora only for now)"""
    print('Loading {} dataset...'.format(dataset))

    idx_features_labels = np.genfromtxt("{}{}.content".format(path, dataset), dtype=np.dtype(str))
    features = sp.csr_matrix(idx_features_labels[:, 1:-1], dtype=np.float32)
    labels = encode_onehot(idx_features_labels[:, -1])

    # build graph
    idx = np.array(idx_features_labels[:, 0], dtype=np.int32)
    idx_map = {j: i for i, j in enumerate(idx)}
    edges_unordered = np.genfromtxt("{}{}.cites".format(path, dataset), dtype=np.int32)
    edges = np.array(list(map(idx_map.get, edges_unordered.flatten())),
                     dtype=np.int32).reshape(edges_unordered.shape)
    adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),
                        shape=(labels.shape[0], labels.shape[0]), dtype=np.float32)

    # build symmetric adjacency matrix
    adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)

    print('Dataset has {} nodes, {} edges, {} features.'.format(adj.shape[0], edges.shape[0], features.shape[1]))

    return features.todense(), adj, labels


def normalize_adj(adj, symmetric=True):
    if symmetric:
        d = sp.diags(np.power(np.array(adj.sum(1)), -0.5).flatten(), 0)
        a_norm = adj.dot(d).transpose().dot(d).tocsr()
    else:
        d = sp.diags(np.power(np.array(adj.sum(1)), -1).flatten(), 0)
        a_norm = d.dot(adj).tocsr()
    return a_norm


def preprocess_adj(adj, symmetric=True):
    adj = adj + sp.eye(adj.shape[0])
    adj = normalize_adj(adj, symmetric)
    return adj


def sample_mask(idx, l):
    mask = np.zeros(l)
    mask[idx] = 1
    return np.array(mask, dtype=np.bool)


def get_splits(y):
    idx_train = range(140)
    idx_val = range(200, 500)
    idx_test = range(500, 1500)
    y_train = np.zeros(y.shape, dtype=np.int32)
    y_val = np.zeros(y.shape, dtype=np.int32)
    y_test = np.zeros(y.shape, dtype=np.int32)
    y_train[idx_train] = y[idx_train]
    y_val[idx_val] = y[idx_val]
    y_test[idx_test] = y[idx_test]
    train_mask = sample_mask(idx_train, y.shape[0])
    return y_train, y_val, y_test, idx_train, idx_val, idx_test, train_mask
X, A, y = load_data(dataset='cora')
y_train, y_val, y_test, idx_train, idx_val, idx_test, train_mask = get_splits(y)

Loading cora dataset...
Dataset has 2708 nodes, 5429 edges, 1433 features.


In [349]:
from sklearn.metrics import accuracy_score

In [308]:
data = np.load(os.path.join('.','data','royalty.npz'))

triples = data['triples']
traces = data['traces']
entities = data['entities'].tolist()
num_entities = len(entities)
relations = data['relations'].tolist()
num_relations = len(relations)

In [327]:
from sklearn.model_selection import train_test_split

num_triples = triples.shape[0]
train_idx, test_idx = train_test_split(range(num_triples),random_state=SEED,test_size=0.33)

In [328]:
train_mask = np.zeros(num_triples,dtype=bool)
train_mask[train_idx] = 1

test_mask = np.zeros(num_triples,dtype=bool)
test_mask[test_idx] = 1

In [329]:
indices = []
self_loop = [[i,i] for i in range(num_entities)]

In [330]:
for h,_,t in triples:
    h_idx = entities.index(h)
    t_idx = entities.index(t)
    indices.append([h_idx,t_idx])

indices += self_loop

In [378]:
indices = np.unique(indices,axis=0).tolist()

In [379]:
values = np.ones(len(indices))

In [380]:
A_hat_sparse = tf.sparse.SparseTensor(indices,values,dense_shape=[num_entities,num_entities])

In [381]:
diag = tf.sparse.reduce_sum(A_hat_sparse,axis=0)

In [382]:
D_hat = tf.sparse.SparseTensor(self_loop,diag,dense_shape=[num_entities,num_entities])

In [335]:
X = tf.convert_to_tensor(np.random.rand(num_entities,5))

In [386]:
#y = np.array([relations.index(i) for i in triples[:,1]])


(9186,)

In [340]:
y_train = tf.keras.utils.to_categorical(y[train_mask],num_classes=num_relations)
y_test = tf.keras.utils.to_categorical(y[test_mask],num_classes=num_relations)

In [4]:
from networkx import karate_club_graph, to_numpy_matrix
zkc = karate_club_graph()
order = sorted(list(zkc.nodes()))
A = to_numpy_matrix(zkc, nodelist=order)
I = np.eye(zkc.number_of_nodes())
A_hat = A + I
D_hat = np.array(np.sum(A_hat, axis=0))[0]
#D_hat = np.matrix(np.diag(D_hat))**-1
D_hat = np.linalg.inv(np.diag(D_hat))

#degree_indices = [[i,i] for i in range(zkc.number_of_nodes())]
#D_hat_sparse = tf.sparse.SparseTensor(degree_indices,values=D_hat,dense_shape=[34,34])
D_indices = []
D_values = []
for i in range(len(D_hat)):
    
    for j in range(len(D_hat)):
        
        if D_hat[i,j] != 0:
            
            D_indices.append([i,j])
            D_values.append(np.sqrt(D_hat[i,j]))

D_hat_inv_sparse = tf.sparse.SparseTensor(D_indices,values=D_values,dense_shape=[34,34])      
# W_1 = np.random.normal(
#     loc=0, scale=1, size=(zkc.number_of_nodes(), 4))
# W_2 = np.random.normal(
#     loc=0, size=(W_1.shape[1], 2))

# def gcn_layer(A_hat, D_hat, X, W):
#     return D_hat**-1 * A_hat * X * W
# H_1 = gcn_layer(A_hat, D_hat, I, W_1)
# H_2 = gcn_layer(A_hat, D_hat, H_1, W_2)
# output = H_2

In [5]:
#from scipy.sparse import csr_matrix

# A_hat = csr_matrix(np.array(A_hat))
# I = csr_matrix(np.array(I))
# D_hat = csr_matrix(np.array(D_hat))
X_indices = []
X_values = []
for i in range(len(I)):
    
    for j in range(len(I)):
        
        if I[i,j] != 0:
            
            X_indices.append([i,j])
            X_values.append(I[i,j])
            
X_sparse = tf.sparse.SparseTensor(X_indices,values=X_values,dense_shape=[34,34])         

In [6]:
adj_indices = []
for i in range(len(A_hat)):
    
    for j in range(len(A_hat)):
        
        if A_hat[i,j] == 1.:
            
            adj_indices.append([i,j])
            
#indices = tf.cast(tf.convert_to_tensor(indices),dtype=tf.int64)
#values = tf.cast(tf.convert_to_tensor(np.ones(len(indices))),dtype=tf.int64)
A_hat_sparse = tf.sparse.SparseTensor(adj_indices,np.ones(len(adj_indices)), dense_shape=[34,34])

In [7]:
# indices = tf.cast(tf.convert_to_tensor(indices),dtype=tf.int64)
# values = tf.cast(tf.convert_to_tensor(np.ones(len(indices))),dtype=tf.int64)

In [8]:
#tf.matmul(np.randn(A_hat.shape))
#tf.matmul(A_hat,A_hat,a_is_sparse=True,b_is_sparse=True)

#tf.matmul(a_sparse,a_sparse,a_is_sparse=True,b_is_sparse=True)

In [9]:
class GCN(tf.keras.layers.Layer):
    def __init__(self,units,**kwargs):
        super(GCN,self).__init__(**kwargs)
        self.units=units
        
    def build(self,input_shape):

        self.kernel = self.add_weight(
            shape=(input_shape[-1][-1],self.units),
            trainable=True,
            name='kernel',
            initializer=tf.keras.initializers.RandomNormal(seed=SEED)
        )

    def call(self,inputs):

        D_hat_inv,A_hat,H = inputs
        
        #DHW = tf.matmul(D_hat_inv,tf.matmul(H,self.kernel))
        #output = tf.matmul(D_hat_inv,tf.matmul(A_hat,DHW))

        DHW = tf.keras.backend.dot(D_hat_inv,tf.keras.backend.dot(H,self.kernel))
        output = tf.keras.backend.dot(D_hat_inv,tf.keras.backend.dot(A_hat,DHW))

        return output
    
    def get_config(self):
        base_config = super(GCN, self).get_config()
        config = {'units': self.units}
        return dict(list(base_config.items()) + list(config.items()))

In [10]:
#k hop subgraph -> Adjacency matrix of k neighbors? Features for k neighbors
#define masks (2 vectors, 1 for each node, feature mask?)
#feed masked data into model to compute predictions: subgraph preds vs masked subgraph preds

#why return node and edge feature mask?

#a = tf.keras.layers.ReLU()(GCN(4)([D_hat**-1,A_hat,I])).numpy()

In [11]:
NUM_FEATURES = I.shape[0]
feature_input = tf.keras.layers.Input(shape=(5,),sparse=False,name='feature_input')
adjacency_input = tf.keras.layers.Input(shape=(NUM_FEATURES,),sparse=True,name='adjacency_input')
degree_input = tf.keras.layers.Input(shape=(NUM_FEATURES,),sparse=True,name='degree_input')

# feature_mask = tf.keras.layers.Masking(mask_value=-1,name='feature_mask')(feature_input)
# adjacency_mask = tf.keras.layers.Masking(mask_value=-1,name='adjacency_mask')(adjacency_input)
# degree_mask = tf.keras.layers.Masking(mask_value=-1,name='degree_mask')(degree_input)

gcn_ = GCN(4,name='gcn')([degree_input,adjacency_input,feature_input])
a2 = tf.keras.layers.Activation('sigmoid')(gcn_)
model = tf.keras.Model(inputs=[degree_input,adjacency_input,feature_input], outputs=a2)

model.compile(optimizer='sgd',loss='categorical_crossentropy')

#model.fit(x=[D_hat,A_hat,I],y=tf.keras.utils.to_categorical(np.ones(34),4),batch_size=34)
model.fit(x=[D_hat_inv_sparse,A_hat_sparse,X],y=tf.keras.utils.to_categorical(np.ones(34),4),
          batch_size=34,epochs=1)



<tensorflow.python.keras.callbacks.History at 0x14ef5f610>

In [344]:
# input_data = np.array([
        
#             [0, 1, 2],
#             [2, 3, 4],
#             [4, 5, 6],
#             [7, 7, 8],
        
#     ], dtype=K.floatx())
# input_edge = np.array([
        
#             [1, 1, 1, 0],
#             [1, 1, 0, 0],
#             [1, 0, 1, 0],
#             [0, 0, 0, 1],
        
#     ], dtype='int32')


In [13]:
mask = np.zeros(A_hat.shape[0])
mask[0] = 1
mask[1] = 1
mask

array([1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [14]:
#tf.boolean_mask(A_hat_sparse,mask)
def boolean_mask_sparse_1d(sparse_tensor, mask, axis=0):  # mask is assumed to be 1D
    mask = tf.convert_to_tensor(mask)
    ind = sparse_tensor.indices[:, axis]
    mask_sp = tf.gather(mask, ind)
    new_size = tf.math.count_nonzero(mask)
    new_shape = tf.concat([sparse_tensor.shape[:axis], [new_size],
                           sparse_tensor.shape[axis + 1:]], axis=0)
    new_shape = tf.dtypes.cast(new_shape, tf.int64)
    mask_count = tf.cumsum(tf.dtypes.cast(mask, tf.int64), exclusive=True)
    masked_idx = tf.boolean_mask(sparse_tensor.indices, mask_sp)
    new_idx_axis = tf.gather(mask_count, masked_idx[:, axis])
    new_idx = tf.concat([masked_idx[:, :axis],
                         tf.expand_dims(new_idx_axis, 1),
                         masked_idx[:, axis + 1:]], axis=1)
    new_values = tf.boolean_mask(sparse_tensor.values, mask_sp)
    return tf.SparseTensor(new_idx, new_values, new_shape)

boolean_mask_sparse_1d(A_hat_sparse,mask).indices

<tf.Tensor: shape=(27, 2), dtype=int64, numpy=
array([[ 0,  0],
       [ 0,  1],
       [ 0,  2],
       [ 0,  3],
       [ 0,  4],
       [ 0,  5],
       [ 0,  6],
       [ 0,  7],
       [ 0,  8],
       [ 0, 10],
       [ 0, 11],
       [ 0, 12],
       [ 0, 13],
       [ 0, 17],
       [ 0, 19],
       [ 0, 21],
       [ 0, 31],
       [ 1,  0],
       [ 1,  1],
       [ 1,  2],
       [ 1,  3],
       [ 1,  7],
       [ 1, 13],
       [ 1, 17],
       [ 1, 19],
       [ 1, 21],
       [ 1, 30]])>

In [15]:
def mask_sparse_matrix(sparse_matrix,mask):
    
    mask = tf.convert_to_tensor(mask)
    ind = sparse_matrix.indices[:, 0]
    new_size = tf.math.count_nonzero(mask)
    mask_sp = tf.gather(mask, ind)

    new_shape = tf.convert_to_tensor((tf.math.count_nonzero(mask),sparse_matrix.shape[0]))
    new_idx = tf.boolean_mask(sparse_matrix.indices, mask_sp)

    new_values = tf.boolean_mask(sparse_matrix.values, mask_sp)
    
    return tf.SparseTensor(new_idx, new_values, new_shape)

#convert index data to mask!!!!!
#create train test data, must be same size: train graph only has training data, test graph has all data

In [37]:
tf.sparse.to_dense(mask_sparse_matrix(A_hat_sparse,mask))

<tf.Tensor: shape=(2, 34), dtype=float64, numpy=
array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 0., 0.,
        0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
        0., 0.],
       [1., 1., 1., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0.,
        0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
        0., 0.]])>