In [1]:
!sudo apt-get install libmetis-dev
!pip install metis
import metis
import random
import tensorflow as tf
import numpy as np
from scipy import sparse
import scipy.io as sio

Reading package lists... Done
Building dependency tree       
Reading state information... Done
libmetis-dev is already the newest version (5.1.0.dfsg-5).
0 upgraded, 0 newly installed, 0 to remove and 30 not upgraded.


In [2]:
# hyperparameters
hidden = 512 # number of hidden units in the encoder layer
latent = 256 # dimension of the latent variables
learning_rate = 0.01
epochs = 200
nparts = 1500 # number of partitions
batch_size = 20 # number of clusters per batch
K = 3 # degree of polynomial filter

In [3]:
filename = '/content/drive/MyDrive/GRAPH DATA/reddit.mat' # dataset

In [4]:
mat_dict = sio.loadmat(filename)
A = mat_dict['A'].ceil()
X = mat_dict['X']
Y = mat_dict['Y']
train_mask = mat_dict['train_mask'].squeeze().astype(bool)
val_mask = mat_dict['val_mask'].squeeze().astype(bool)
test_mask = mat_dict['test_mask'].squeeze().astype(bool)

In [5]:
def cluster_graph(A, nparts):
  if nparts == 1:
    edge_cuts, parts = 0, [0, ] * A.shape[0]
  else:
    edge_cuts, parts = metis.part_graph([neighbors for neighbors in A.tolil().rows], nparts=nparts)
  print('Number of edge cuts: %d.' % edge_cuts)
  cluster_dict = {}
  for index, part in enumerate(parts):
    if part not in cluster_dict:
      cluster_dict[part] = []
    cluster_dict[part].append(index)
  return cluster_dict

# the clustering algorithm (METIS)
cluster_dict = cluster_graph(A, nparts)

Number of edge cuts: 9609639.


In [6]:
def preprocess_support(A):
  N = A.shape[1]
  D = sparse.csr_matrix(A.sum(axis=1))
  norm = D.power(-0.5)
  L = sparse.eye(N, dtype='float32') - A.multiply(norm).T.multiply(norm)
  max_eigval = sparse.linalg.eigsh(L, k=1, return_eigenvectors=False)[0]
  L_ = 2.0 / max_eigval * L - sparse.eye(N, dtype='float32')
  return L_

def toTensorSparse(S):
  return tf.constant(S.todense())

def toTensor(T):
  return tf.constant(T)

In [7]:
# layer classes

class bilinear_layer:

  def __init__(self, indim, outdim):
    pass

  def __call__(self, tensor):
    return tf.linalg.matmul(tensor, tf.transpose(tensor))

# unused
class FC_layer:

  def __init__(self, indim, outdim):
    initial_value = tf.initializers.he_normal()((indim, outdim,))
    self.weight = tf.Variable(initial_value=initial_value, trainable=True)

  def __call__(self, tensor):
    return tf.linalg.matmul(tensor, self.weight)

class GC_layer:

  def __init__(self, indim, outdim):
    global K
    initial_value = tf.initializers.he_normal()((indim, outdim,))
    self.weight = tf.Variable(initial_value=initial_value, trainable=True)
    delta = np.zeros((K + 1, outdim), dtype='float32')
    for o in range(outdim):
      delta[0, o] = 1.0
    self.coeffs = tf.Variable(initial_value=delta, trainable=True)

  def __call__(self, tensor, support, embed=False):
    global K
    if embed: # numpy pipeline
      transform = tensor.numpy().dot(self.weight.numpy())
      # Legendre polynomials
      basis = [transform]
      if (K > 0):
        basis.append(support.dot(transform))
      if (K > 1):
        for k in range(2, K + 1):
          basis.append((2.0 * k - 1.0) / k * support.dot(basis[k-1]) - (k - 1.0) / k * basis[k-2])
      # linear combination
      result = np.zeros(transform.shape)
      for coeff, base in zip(self.coeffs.numpy(), basis):
        result += base * coeff
      return result
    else: # tensorflow pipeline
      transform = tf.linalg.matmul(tensor, self.weight)
      # Legendre polynomials
      basis = [transform]
      if (K > 0):
        basis.append(tf.linalg.matmul(support, transform))
      if (K > 1):
        for k in range(2, K + 1):
          basis.append((2.0 * k - 1.0) / k * tf.linalg.matmul(support, basis[k-1]) - (k - 1.0) / k * basis[k-2])
      # linear combination
      result = tf.zeros(transform.shape)
      for k in range(K + 1):
        result += self.coeffs[k] * basis[k]
      return result

In [8]:
# our model class (for the paper "Scalable Graph Variational Autoencoders")

class Model:

  def __init__(self, size_tuple, optimizer, nonlinear):
    self.sources = [] # variables to optimize
    self.build(size_tuple) # builds the model by stacking layers on each other
    self.optimizer = optimizer
    self.nonlinear = nonlinear
    self.Z_mean = None # mean embedding layer
    self.Z_var = None # variance embedding layer
    self.noise = None # the noise sample
    self.sample = None # self.Z_mean + self.Z_var * self.noise
    self.A_gamma = None # the reconstructions
  
  def build(self, size_tuple):
    X_dim, hidden, latent = size_tuple
    self.enc_layer = GC_layer(X_dim, hidden)
    self.enc_mean_layer = GC_layer(hidden, latent)
    self.enc_var_layer = GC_layer(hidden, latent)
    self.A_dec_gamma_layer = bilinear_layer(latent, latent)
    # filling the source array with weights
    layers = [self.enc_layer, self.enc_mean_layer, self.enc_var_layer]
    for layer in layers:
      self.sources.append(layer.weight)
      self.sources.append(layer.coeffs)
  
  # forward propagation in the encoder
  def encode(self, X, S):
    enc = self.nonlinear(self.enc_layer(X, S))
    enc_mean = self.enc_mean_layer(enc, S)
    enc_var = tf.math.exp(self.enc_var_layer(enc, S))
    return enc_mean, enc_var

  # returns only the node embeddings
  def embed(self, X, S):
    enc = self.nonlinear(self.enc_layer(X, S, embed=True))
    enc_mean = self.enc_mean_layer(enc, S, embed=True)
    return enc_mean

  # forward propagation in the decoder
  def decode(self, sample):
    A_dec_gamma = self.A_dec_gamma_layer(sample)
    return A_dec_gamma

  def predict(self, X, S):
    self.Z_mean, self.Z_var = self.encode(X, S)
    self.noise = tf.random.normal(self.Z_var.shape)
    self.sample = self.Z_mean + self.Z_var * self.noise # reparameterization trick
    self.A_gamma = self.decode(self.sample)

  def train(self, X, A, cluster_dict, batch_size, epochs):
    for epoch in range(epochs):
      # only a subgraph is used in the training process
      samples = random.sample(cluster_dict.keys(), batch_size)
      nodes = sum([cluster_dict[sample] for sample in samples], [])
      S_batch = toTensorSparse(preprocess_support(A[nodes].T[nodes]))
      A_batch = toTensor(A.T[nodes].T[nodes].todense())
      X_batch = tf.math.l2_normalize(toTensor(X[nodes]), axis=1)
      # optimization
      with tf.GradientTape() as tape:
        self.predict(X_batch, S_batch)
        losses = self.loss(A_batch, X_batch)
        loss_ = tf.reduce_sum(losses)
      print(epoch, [loss.numpy() for loss in losses], loss_.numpy())
      grads = tape.gradient(loss_, self.sources)
      self.optimizer.apply_gradients(zip(grads, self.sources))

  # Kullback–Leibler divergence
  def KL_Divergence(self):
    loss = 0.5 * tf.reduce_mean(self.Z_mean**2.0 + self.Z_var**2.0 - 2.0 * tf.math.log(self.Z_var) - 1.0)
    return loss

  # reconstruction loss
  def re_A_loss(self, A):
    density = tf.reduce_sum(A) / tf.size(A, out_type=tf.float32)
    pos_weight = (1.0 - density) / density
    loss = -0.5 * tf.reduce_mean(1.0 / (1.0 - density) * tf.nn.weighted_cross_entropy_with_logits(labels=A, logits=self.A_gamma, pos_weight=pos_weight))
    return -loss

  # list of all loss functions
  def loss(self, A, X):
    return self.KL_Divergence(), self.re_A_loss(A)


In [9]:
size_tuple = (X.shape[1], hidden, latent)
optimizer = tf.optimizers.Adam(learning_rate=learning_rate)
nonlinear = tf.nn.relu

model = Model(size_tuple, optimizer, nonlinear)

print('Training...')
model.train(X, A, cluster_dict, batch_size, epochs)


Training...
0 [0.0051376955, 6.520226] 6.525364
1 [0.027653715, 5.017509] 5.0451627
2 [0.109020725, 3.3549967] 3.4640174
3 [0.3321186, 2.01661] 2.3487284
4 [0.697624, 1.6535718] 2.3511958
5 [1.0100601, 1.4767085] 2.4867687
6 [1.2009507, 1.0275496] 2.2285004
7 [1.1872467, 0.882879] 2.0701256
8 [1.1127493, 0.8707761] 1.9835255
9 [1.0165356, 0.8938532] 1.9103888
10 [0.86723447, 0.88719606] 1.7544305
11 [0.710264, 0.962594] 1.672858
12 [0.5993235, 1.1049657] 1.7042892
13 [0.50550866, 1.2503694] 1.7558781
14 [0.4586492, 1.35502] 1.8136692
15 [0.45413637, 1.3559142] 1.8100506
16 [0.48488876, 1.274455] 1.7593437
17 [0.53360116, 1.1820498] 1.7156509
18 [0.608522, 1.0834162] 1.6919382
19 [0.68783253, 0.9825391] 1.6703717
20 [0.77941525, 0.90234995] 1.6817652
21 [0.843075, 0.8656452] 1.7087202
22 [0.8571064, 0.84708875] 1.7041951
23 [0.86449516, 0.835911] 1.7004061
24 [0.8564016, 0.8371655] 1.693567
25 [0.79637, 0.87500024] 1.6713703
26 [0.73373586, 0.9233536] 1.6570895
27 [0.67990917, 0.9745232

In [10]:
S = preprocess_support(A)
X = tf.math.l2_normalize(toTensor(X), axis=1)
embs = model.embed(X, S) # node embeddings

In [11]:
# node clustering using the KMeans algorithm
from sklearn.cluster import KMeans
y_pred = KMeans(n_clusters=Y.shape[1]).fit(embs).predict(embs)
y_true = np.argmax(Y, axis=1)

In [12]:
# result
from sklearn.metrics import adjusted_mutual_info_score
print(adjusted_mutual_info_score(y_true, y_pred))

0.4113758211028403
