Implement a stochastic block model example (#715)

* Implement a stochastic block model example * Joint originally separated MAP and follow PEP8 * Test the model using Zachary’s Karate Club graph * Follow PEP8 * Put graph and label data in data/ * Delete unnecessary file and fix indent
blei-lab · Jul 28, 2017 · de5d0ac · de5d0ac
1 parent f3b396e
commit de5d0ac
Show file tree

Hide file tree

Showing 3 changed files with 180 additions and 0 deletions.
diff --git a/examples/data/karate_edgelist.txt b/examples/data/karate_edgelist.txt
@@ -0,0 +1,78 @@
+0 1
+0 2
+0 3
+0 4
+0 5
+0 6
+0 7
+0 8
+0 10
+0 11
+0 12
+0 13
+0 17
+0 19
+0 21
+0 31
+1 2
+1 3
+1 7
+1 13
+1 17
+1 19
+1 21
+1 30
+2 3
+2 7
+2 8
+2 9
+2 13
+2 27
+2 28
+2 32
+3 7
+3 12
+3 13
+4 6
+4 10
+5 6
+5 10
+5 16
+6 16
+8 30
+8 32
+8 33
+9 33
+13 33
+14 32
+14 33
+15 32
+15 33
+18 32
+18 33
+19 33
+20 32
+20 33
+22 32
+22 33
+23 25
+23 27
+23 29
+23 32
+23 33
+24 25
+24 27
+24 31
+25 31
+26 29
+26 33
+27 33
+28 31
+28 33
+29 32
+29 33
+30 32
+30 33
+31 32
+31 33
+32 33
diff --git a/examples/data/karate_labels.txt b/examples/data/karate_labels.txt
@@ -0,0 +1,34 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+1
+0
+0
+0
+0
+1
+1
+0
+0
+1
+0
+1
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
diff --git a/examples/stochastic_block_model.py b/examples/stochastic_block_model.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+"""Stochastic Block Model
+
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import edward as ed
+import numpy as np
+import tensorflow as tf
+
+from sklearn.metrics.cluster import adjusted_rand_score
+from edward.models import Bernoulli, Multinomial, Beta, Dirichlet, PointMass
+
+ed.set_seed(42)
+
+
+def build_dataset(label_filepath, graph_filepath):
+  Z = np.loadtxt(label_filepath, dtype=np.int)
+  N = Z.shape[0]
+
+  X = np.zeros((N, N))
+  for line in open(graph_filepath, 'r'):
+    src, dst = map(int, line.strip().split(' '))
+    X[src, dst] = 1
+
+  return X, Z
+
+
+# DATA
+label_filepath = 'data/karate_labels.txt'
+graph_filepath = 'data/karate_edgelist.txt'
+X_data, Z_true = build_dataset(label_filepath, graph_filepath)
+N = X_data.shape[0]  # number of vertices
+K = 2  # number of clusters
+
+# MODEL
+gamma = Dirichlet(concentration=tf.ones([K]))
+Pi = Beta(concentration0=tf.ones([K, K]), concentration1=tf.ones([K, K]))
+Z = Multinomial(total_count=1., probs=gamma, sample_shape=N)
+X = Bernoulli(probs=tf.matmul(Z, tf.matmul(Pi, tf.transpose(Z))))
+
+# INFERENCE (EM algorithm)
+qgamma = PointMass(params=tf.nn.softmax(tf.Variable(tf.random_normal([K]))))
+qPi = PointMass(params=tf.nn.sigmoid(tf.Variable(tf.random_normal([K, K]))))
+qZ = PointMass(params=tf.nn.softmax(tf.Variable(tf.random_normal([N, K]))))
+
+inference = ed.MAP({gamma: qgamma, Pi: qPi, Z: qZ}, data={X: X_data})
+
+n_iter = 100
+inference.initialize(n_iter=n_iter)
+
+tf.global_variables_initializer().run()
+
+for _ in range(inference.n_iter):
+  info_dict = inference.update()
+  inference.print_progress(info_dict)
+inference.finalize()
+
+# CRITICISM
+Z_pred = qZ.mean().eval().argmax(axis=1)
+print("Result (label filp can happen):")
+print("Predicted")
+print(Z_pred)
+print("True")
+print(Z_true)
+print("Adjusted Rand Index =", adjusted_rand_score(Z_pred, Z_true))