In [1]:
import os
import numpy as np
import scipy.io as sio
import scipy.sparse as sp

from paican.paican import PAICAN
from sklearn.metrics import normalized_mutual_info_score as nmi

#### Read the data

In [2]:
path = 'data/parliament/'

A = sio.mmread(os.path.join(path, 'A.mtx')).tocsr()
X = sio.mmread(os.path.join(path, 'X.mtx')).tocsr()
z = np.load(os.path.join(path, 'z.npy'))
K = len(np.unique(z))

print(A.shape, X.shape, K)

(451, 451) (451, 108) 7


#### Preprocessing: make undirected + filter singletons + (optinally) select largest connected component

In [3]:
# make sure the graph is undirected
A = A.maximum(A.T)

# remove singleton nodes (without any edges)
filter_singletons = A.sum(1).A1 != 0
A = A[filter_singletons][:, filter_singletons]
X = X[filter_singletons]
z = z[filter_singletons]

# (optionally) make sure the graph has a single connected component
cc = sp.csgraph.connected_components(A)[1]
cc_filter = cc == np.bincount(cc).argmax()

A = A[cc_filter][:, cc_filter]
X = X[cc_filter]
z = z[cc_filter]

#### Fit PAICAN

In [4]:
paican = PAICAN(A, X, K, verbose=True)
z_pr, ca_pr, cx_pr = paican.fit_predict()

iter   0, ELBO: -1751.73962
iter   1, ELBO: -1590.77063
iter   2, ELBO: -1579.55896
iter   3, ELBO: -1578.55103
iter   4, ELBO: -1578.30579
iter   5, ELBO: -1578.20215
iter   6, ELBO: -1578.14893
iter   7, ELBO: -1578.12830
iter   8, ELBO: -1578.10156
iter   9, ELBO: -1578.05591
iter  10, ELBO: -1577.97839
iter  11, ELBO: -1577.84412
iter  12, ELBO: -1577.63074
iter  13, ELBO: -1577.29712
iter  14, ELBO: -1576.77478
iter  15, ELBO: -1576.05420
iter  16, ELBO: -1575.55151
iter  17, ELBO: -1575.44434
iter  18, ELBO: -1575.41663
iter  19, ELBO: -1575.38794
iter  20, ELBO: -1575.34827
iter  21, ELBO: -1575.30627
iter  22, ELBO: -1575.28784
iter  23, ELBO: -1575.25049
iter  24, ELBO: -1575.20581
iter  25, ELBO: -1575.17957
iter  26, ELBO: -1575.16504
iter  27, ELBO: -1575.15735
iter  28, ELBO: -1575.15710


#### Evaluate NMI

In [5]:
print('NMI: {:.2f}'.format(nmi(z_pr, z) * 100))

NMI: 80.30
