In [1]:
import os
import numpy as np
import scipy.io as sio
import scipy.sparse as sp

from paican.paican import PAICAN
from sklearn.metrics import normalized_mutual_info_score as nmi

#### Read the data

In [2]:
path = 'data/parliament/'

A = sio.mmread(os.path.join(path, 'A.mtx')).tocsr()
X = sio.mmread(os.path.join(path, 'X.mtx')).tocsr()
z = np.load(os.path.join(path, 'z.npy'))
K = len(np.unique(z))

print(A.shape, X.shape, K)

(451, 451) (451, 108) 7


#### Preprocessing: make undirected + filter singletons + (optinally) select largest connected component

In [None]:
# make sure the graph is undirected
A = A.maximum(A.T)

# remove singleton nodes (without any edges)
filter_singletons = A.sum(1).A1 != 0
A = A[filter_singletons][:, filter_singletons]
X = X[filter_singletons]
z = z[filter_singletons]

# (optionally) make sure the graph has a single connected component
cc = sp.csgraph.connected_components(A)[1]
cc_filter = cc == np.bincount(cc).argmax()

A = A[cc_filter][:, cc_filter]
X = X[cc_filter]
z = z[cc_filter]

#### Fit PAICAN

In [None]:
paican = PAICAN(A, X, K, verbose=True)
z_pr, ca_pr, cx_pr = paican.fit_predict()

#### Evaluate NMI

In [None]:
print('NMI: {:.2f}'.format(nmi(z_pr, z) * 100))