Skip to content

Commit

Permalink
Implement a stochastic block model example (#715)
Browse files Browse the repository at this point in the history
* Implement a stochastic block model example

* Joint originally separated MAP and follow PEP8

* Test the model using Zachary’s Karate Club graph

* Follow PEP8

* Put graph and label data in data/

* Delete unnecessary file and fix indent
  • Loading branch information
yamaguchiyuto authored and dustinvtran committed Jul 28, 2017
1 parent f3b396e commit de5d0ac
Show file tree
Hide file tree
Showing 3 changed files with 180 additions and 0 deletions.
78 changes: 78 additions & 0 deletions examples/data/karate_edgelist.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
0 1
0 2
0 3
0 4
0 5
0 6
0 7
0 8
0 10
0 11
0 12
0 13
0 17
0 19
0 21
0 31
1 2
1 3
1 7
1 13
1 17
1 19
1 21
1 30
2 3
2 7
2 8
2 9
2 13
2 27
2 28
2 32
3 7
3 12
3 13
4 6
4 10
5 6
5 10
5 16
6 16
8 30
8 32
8 33
9 33
13 33
14 32
14 33
15 32
15 33
18 32
18 33
19 33
20 32
20 33
22 32
22 33
23 25
23 27
23 29
23 32
23 33
24 25
24 27
24 31
25 31
26 29
26 33
27 33
28 31
28 33
29 32
29 33
30 32
30 33
31 32
31 33
32 33
34 changes: 34 additions & 0 deletions examples/data/karate_labels.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
0
0
0
0
0
0
0
0
0
1
0
0
0
0
1
1
0
0
1
0
1
0
1
1
1
1
1
1
1
1
1
1
1
1
68 changes: 68 additions & 0 deletions examples/stochastic_block_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/usr/bin/env python
"""Stochastic Block Model
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import edward as ed
import numpy as np
import tensorflow as tf

from sklearn.metrics.cluster import adjusted_rand_score
from edward.models import Bernoulli, Multinomial, Beta, Dirichlet, PointMass

ed.set_seed(42)


def build_dataset(label_filepath, graph_filepath):
Z = np.loadtxt(label_filepath, dtype=np.int)
N = Z.shape[0]

X = np.zeros((N, N))
for line in open(graph_filepath, 'r'):
src, dst = map(int, line.strip().split(' '))
X[src, dst] = 1

return X, Z


# DATA
label_filepath = 'data/karate_labels.txt'
graph_filepath = 'data/karate_edgelist.txt'
X_data, Z_true = build_dataset(label_filepath, graph_filepath)
N = X_data.shape[0] # number of vertices
K = 2 # number of clusters

# MODEL
gamma = Dirichlet(concentration=tf.ones([K]))
Pi = Beta(concentration0=tf.ones([K, K]), concentration1=tf.ones([K, K]))
Z = Multinomial(total_count=1., probs=gamma, sample_shape=N)
X = Bernoulli(probs=tf.matmul(Z, tf.matmul(Pi, tf.transpose(Z))))

# INFERENCE (EM algorithm)
qgamma = PointMass(params=tf.nn.softmax(tf.Variable(tf.random_normal([K]))))
qPi = PointMass(params=tf.nn.sigmoid(tf.Variable(tf.random_normal([K, K]))))
qZ = PointMass(params=tf.nn.softmax(tf.Variable(tf.random_normal([N, K]))))

inference = ed.MAP({gamma: qgamma, Pi: qPi, Z: qZ}, data={X: X_data})

n_iter = 100
inference.initialize(n_iter=n_iter)

tf.global_variables_initializer().run()

for _ in range(inference.n_iter):
info_dict = inference.update()
inference.print_progress(info_dict)
inference.finalize()

# CRITICISM
Z_pred = qZ.mean().eval().argmax(axis=1)
print("Result (label filp can happen):")
print("Predicted")
print(Z_pred)
print("True")
print(Z_true)
print("Adjusted Rand Index =", adjusted_rand_score(Z_pred, Z_true))

0 comments on commit de5d0ac

Please sign in to comment.