In [1]:
!sudo apt-get install libmetis-dev
!pip install metis
import metis
import random
from itertools import chain
import tensorflow as tf
import numpy as np
from scipy import sparse
import scipy.io as sio

Reading package lists... Done
Building dependency tree       
Reading state information... Done
libmetis-dev is already the newest version (5.1.0.dfsg-5).
0 upgraded, 0 newly installed, 0 to remove and 30 not upgraded.


In [2]:
# hyperparameters
hidden = 512 # number of hidden units in the encoder layer
latent = 256 # dimension of the latent variables
learning_rate = 0.01
epochs = 200
nparts = 1500 # number of partitions
batch_size = 20 # number of clusters per batch
K = 3 # number of iterations
T = 2 # number of threads

In [3]:
filename = '/content/drive/MyDrive/GRAPH DATA/reddit.mat' # dataset

In [4]:
mat_dict = sio.loadmat(filename)
A = mat_dict['A'].ceil()
X = mat_dict['X']
Y = mat_dict['Y']
train_mask = mat_dict['train_mask'].squeeze().astype(bool)
val_mask = mat_dict['val_mask'].squeeze().astype(bool)
test_mask = mat_dict['test_mask'].squeeze().astype(bool)

In [5]:
def cluster_graph(A, nparts):
  if nparts == 1:
    edge_cuts, parts = 0, [0, ] * A.shape[0]
  else:
    edge_cuts, parts = metis.part_graph([neighbors for neighbors in A.tolil().rows], nparts=nparts)
  print('Number of edge cuts: %d.' % edge_cuts)
  cluster_dict = {}
  for index, part in enumerate(parts):
    if part not in cluster_dict:
      cluster_dict[part] = []
    cluster_dict[part].append(index)
  return cluster_dict

# the clustering algorithm (METIS)
cluster_dict = cluster_graph(A, nparts)

Number of edge cuts: 9609639.


In [6]:
def preprocess_support(A):
  N = A.shape[1]
  D = sparse.csr_matrix(A.sum(axis=1))
  norm = D.power(-0.5)
  P = A.multiply(norm).T.multiply(norm)
  return P

def toTensorSparse(S):
  return tf.constant(S.todense())

def toTensor(T):
  return tf.constant(T)

In [7]:
# layer classes

class bilinear_layer:

  def __init__(self, indim, outdim):
    pass

  def __call__(self, tensor):
    return tf.linalg.matmul(tensor, tf.transpose(tensor))

# unused
class FC_layer:

  def __init__(self, indim, outdim):
    initial_value = tf.initializers.he_normal()((indim, outdim,))
    self.weight = tf.Variable(initial_value=initial_value, trainable=True)

  def __call__(self, tensor):
    return tf.linalg.matmul(tensor, self.weight)

class GC_layer:

  def __init__(self, indim, outdim):
    global K
    global T
    self.K = K
    self.T = T
    self.ws = []
    self.vs = []
    for t in range(T):
      w_threads = []
      v_threads = []
      for k in range(self.K):
        initial_value = tf.initializers.ones()((1,1))
        w_threads.append(tf.Variable(initial_value=(1.0,) * outdim, trainable=True))
        initial_value = tf.initializers.ones()((1,1))
        v_threads.append(tf.Variable(initial_value=(0.0,) * outdim, trainable=True))
      self.ws.append(w_threads)
      self.vs.append(v_threads)
    initial_value = tf.initializers.he_normal()((indim, outdim,))
    self.weight = tf.Variable(initial_value=initial_value, trainable=True)

  def __call__(self, tensor, support, embed=False):
    if embed: # numpy pipeline
      tensor = tensor.numpy().dot(self.weight.numpy())
      results = []
      for t in range(self.T):
         # Personalized PageRank
        tensor_ = tensor
        for k in range(self.K):
          tensor_ = self.ws[t][k].numpy() * support.dot(tensor_) + self.vs[t][k].numpy() * tensor
        results.append(tensor_)
      return sum(results)
    else: # tensorflow pipeline
      tensor = tf.linalg.matmul(tensor, self.weight)
      results = []
      for t in range(self.T):
         # Personalized PageRank
        tensor_ = tensor
        for k in range(self.K):
          tensor_ = self.ws[t][k] * tf.linalg.matmul(support, tensor_) + self.vs[t][k] * tensor
        results.append(tensor_)
      return sum(results)

In [8]:
# our model class (for the paper "Scalable Graph Variational Autoencoders")

class Model:

  def __init__(self, size_tuple, optimizer, nonlinear):
    self.sources = [] # variables to optimize
    self.build(size_tuple) # builds the model by stacking layers on each other
    self.optimizer = optimizer
    self.nonlinear = nonlinear
    self.Z_mean = None # mean embedding layer
    self.Z_var = None # variance embedding layer
    self.noise = None # the noise sample
    self.sample = None # self.Z_mean + self.Z_var * self.noise
    self.A_gamma = None # the reconstructions
  
  def build(self, size_tuple):
    X_dim, hidden, latent = size_tuple
    self.enc_layer = GC_layer(X_dim, hidden)
    self.enc_mean_layer = GC_layer(hidden, latent)
    self.enc_var_layer = GC_layer(hidden, latent)
    self.A_dec_gamma_layer = bilinear_layer(latent, latent)
    # filling the source array with weights
    layers = [self.enc_layer, self.enc_mean_layer, self.enc_var_layer]
    for layer in layers:
      self.sources.append(layer.weight)
      self.sources += list(chain.from_iterable(layer.ws)) + list(chain.from_iterable(layer.vs))
  
  # forward propagation in the encoder
  def encode(self, X, S):
    enc = self.nonlinear(self.enc_layer(X, S))
    enc_mean = self.enc_mean_layer(enc, S)
    enc_var = tf.math.exp(self.enc_var_layer(enc, S))
    return enc_mean, enc_var

  # returns only the node embeddings
  def embed(self, X, S):
    enc = self.nonlinear(self.enc_layer(X, S, embed=True))
    enc_mean = self.enc_mean_layer(enc, S, embed=True)
    return enc_mean

  # forward propagation in the decoder
  def decode(self, sample):
    A_dec_gamma = self.A_dec_gamma_layer(sample)
    return A_dec_gamma

  def predict(self, X, S):
    self.Z_mean, self.Z_var = self.encode(X, S)
    self.noise = tf.random.normal(self.Z_var.shape)
    self.sample = self.Z_mean + self.Z_var * self.noise # reparameterization trick
    self.A_gamma = self.decode(self.sample)

  def train(self, X, A, cluster_dict, batch_size, epochs):
    for epoch in range(epochs):
      # only a subgraph is used in the training process
      samples = random.sample(cluster_dict.keys(), batch_size)
      nodes = sum([cluster_dict[sample] for sample in samples], [])
      S_batch = toTensorSparse(preprocess_support(A[nodes].T[nodes]))
      A_batch = toTensor(A.T[nodes].T[nodes].todense())
      X_batch = tf.math.l2_normalize(toTensor(X[nodes]), axis=1)
      # optimization
      with tf.GradientTape() as tape:
        self.predict(X_batch, S_batch)
        losses = self.loss(A_batch, X_batch)
        loss_ = tf.reduce_sum(losses)
      print(epoch, [loss.numpy() for loss in losses], loss_.numpy())
      grads = tape.gradient(loss_, self.sources)
      self.optimizer.apply_gradients(zip(grads, self.sources))

  # Kullback–Leibler divergence
  def KL_Divergence(self):
    loss = 0.5 * tf.reduce_mean(self.Z_mean**2.0 + self.Z_var**2.0 - 2.0 * tf.math.log(self.Z_var) - 1.0)
    return loss

  # reconstruction loss
  def re_A_loss(self, A):
    density = tf.reduce_sum(A) / tf.size(A, out_type=tf.float32)
    pos_weight = (1.0 - density) / density
    loss = -0.5 * tf.reduce_mean(1.0 / (1.0 - density) * tf.nn.weighted_cross_entropy_with_logits(labels=A, logits=self.A_gamma, pos_weight=pos_weight))
    return -loss

  # list of all loss functions
  def loss(self, A, X):
    return self.KL_Divergence(), self.re_A_loss(A)


In [9]:
size_tuple = (X.shape[1], hidden, latent)
optimizer = tf.optimizers.Adam(learning_rate=learning_rate)
nonlinear = tf.nn.relu

model = Model(size_tuple, optimizer, nonlinear)

print('Training...')
model.train(X, A, cluster_dict, batch_size, epochs)


Training...
0 [0.05253221, 7.752583] 7.805115
1 [0.1316293, 6.8970757] 7.028705
2 [0.103592224, 3.284567] 3.3881593
3 [0.11459511, 3.2251737] 3.339769
4 [0.16292262, 2.8630779] 3.0260005
5 [0.2922866, 2.5808218] 2.8731084
6 [0.40841094, 2.2481785] 2.6565895
7 [0.38590783, 1.7438947] 2.1298025
8 [0.44854495, 1.4378425] 1.8863875
9 [0.57055163, 1.1967516] 1.7673032
10 [0.6777655, 1.0805383] 1.7583038
11 [0.76226115, 0.96510106] 1.7273622
12 [0.7841671, 0.8878813] 1.6720483
13 [0.8641766, 0.80950844] 1.6736851
14 [0.7478354, 0.8804591] 1.6282945
15 [0.7398347, 0.89880925] 1.638644
16 [0.644827, 0.98742694] 1.6322539
17 [0.6643279, 1.0158188] 1.6801467
18 [0.63837653, 1.0234615] 1.661838
19 [0.63903457, 1.0091599] 1.6481946
20 [0.6166922, 0.9896429] 1.6063352
21 [0.63435096, 0.94981396] 1.5841649
22 [0.6792267, 0.90958005] 1.5888067
23 [0.7050769, 0.8658333] 1.5709102
24 [0.76702887, 0.8356003] 1.6026292
25 [0.73512214, 0.8259166] 1.5610387
26 [0.72359884, 0.83582884] 1.5594277
27 [0.63721

In [10]:
S = preprocess_support(A)
X = tf.math.l2_normalize(toTensor(X), axis=1)
embs = model.embed(X, S) # node embeddings

In [11]:
# node clustering using the KMeans algorithm
from sklearn.cluster import KMeans
y_pred = KMeans(n_clusters=Y.shape[1]).fit(embs).predict(embs)
y_true = np.argmax(Y, axis=1)

In [12]:
# result
from sklearn.metrics import adjusted_mutual_info_score
print(adjusted_mutual_info_score(y_true, y_pred))

0.42917532706246153
