In [10]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import edward as ed
import numpy as np
import tensorflow as tf
import pandas as pd
import networkx as nx

from edward.models import Bernoulli, Multinomial, Beta, Dirichlet, PointMass
from observations import karate
from sklearn.metrics.cluster import adjusted_rand_score

ed.set_seed(42)

## Data 

In [11]:
X = pd.read_csv("/Users/oliver/Downloads/network_subset.csv")
X.columns = ['id', 'dst_id',"src_id"]
X.loc[:1000, 'dst_id':'src_id'].to_csv("bitcoin.edgelist", sep = " ", index = False) # write edgelist
G = nx.read_edgelist("bitcoin.edgelist") # read and parse edgelist to (networkx) graph
A = nx.adjacency_matrix(G) # make Adjacency matrix

In [12]:
x_train = np.asarray(A.todense()) # convert Adjacency matrix to numpy array

In [15]:
N = x_train.shape[0]  # number of vertices
K = 2  # number of clusters

## Model

In [16]:
gamma = Dirichlet(concentration=tf.ones([K]))
Pi = Beta(concentration0=tf.ones([K, K]), concentration1=tf.ones([K, K]))
Z = Multinomial(total_count=1.0, probs=gamma, sample_shape=N)
X = Bernoulli(probs=tf.matmul(Z, tf.matmul(Pi, tf.transpose(Z))))

## Inference (EM algorithm)

In [17]:
qgamma = PointMass(tf.nn.softmax(tf.get_variable("qgamma/params", [K])))
qPi = PointMass(tf.nn.sigmoid(tf.get_variable("qPi/params", [K, K])))
qZ = PointMass(tf.nn.softmax(tf.get_variable("qZ/params", [N, K])))

In [22]:
inference = ed.MAP({gamma: qgamma, Pi: qPi, Z: qZ}, data={X: x_train})
inference.initialize(n_iter=2500)

  not np.issubdtype(value.dtype, np.float) and \
  not np.issubdtype(value.dtype, np.int) and \


In [23]:
tf.global_variables_initializer().run()

In [24]:
for _ in range(inference.n_iter):
    info_dict = inference.update()
    inference.print_progress(info_dict)

2500/2500 [100%] ██████████████████████████████ Elapsed: 84s | Loss: 8746.961

## Criticism 

In [25]:
Z_pred = qZ.mean().eval().argmax(axis=1)
print("Result (label flip can happen):")
print("Predicted")
print(Z_pred)
print("True")
#print(Z_true)
#print("Adjusted Rand Index =", adjusted_rand_score(Z_pred, Z_true))

Result (label flip can happen):
Predicted
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1