In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from signet.cluster import Cluster 
from signet.block_models import SSBM
from sklearn.metrics import adjusted_rand_score
from sklearn.metrics.cluster import normalized_mutual_info_score
import matplotlib.pyplot as plt
from functions import *

In [2]:
# Iterative refinement methods

def sIR_SBM(A,Z_init,k, iter=10):
    n = Z_init.shape[0]
    W = Z_init/Z_init.sum(0)
    clust = convertZ(Z_init)
    for rep in range(iter):
        C = A @ W
        Pi = np.transpose(W)@A@W
        for i in range(n):
            ls = [np.linalg.norm(C[i,]-Pi[kk,]) for kk in range(k)]
            clust[i] = np.argmin(ls)
        W = convertClust(clust)/convertClust(clust).sum(0)
        return clust
    
def IR_SSBM(A,Z_init,k, iter=10):
    n = Z_init.shape[0]
    W = Z_init/Z_init.sum(0)
    clust = convertZ(Z_init)
    for rep in range(iter):
        C = A @ W
        for i in range(n):
            clust[i] = np.argmax(C[i,:])
        W = convertClust(clust)/convertClust(clust).sum(0)
        return clust

def IR_SSBM2(A,Z_init,k, iter=10):
    n = Z_init.shape[0]
    W = Z_init/Z_init.sum(0)
    clust = convertZ(Z_init)
    mask = np.ones((K,K))-np.diag(np.ones(K))
    for rep in range(iter):
        C = A @ W
        Pi = np.transpose(W)@A@W
        diag = np.mean(np.diagonal(Pi))
        #np.fill_diagonal(Pi, diag)
        out_diag = np.average(Pi,weights=mask)
        Pi = out_diag*np.ones((K,K))+(diag-out_diag)*np.diag(np.ones(K))
        for i in range(n):
            ls = [np.linalg.norm(C[i,]-Pi[kk,]) for kk in range(k)]
            clust[i] = np.argmin(ls)
        W = convertClust(clust)/convertClust(clust).sum(0)
        return clust

In [63]:
Pi= np.zeros((3, 3), int)
np.fill_diagonal(Pi, 5)
np.mean(np.diagonal(Pi))
K=3
mask = np.ones((K,K))-np.diag(np.ones(K))
np.average(Pi,weights=mask)

0.0

In [3]:
# Data generation
n=5000
K=15
p=0.02
eta=0.10

# sample a random matrix and a cluster object
A, assignments = SSBM(n=n, k=K, pin=p, etain=eta, sizes='uniform')
m = Cluster(A)

In [9]:
predictions1 = m.SPONGE(K, tau_p=1, tau_n=1, eigens=None, mi=None)
predictions2 = m.SPONGE_sym(K, tau_p=1, tau_n=1, eigens=None, mi=None)
predictions3 = sIR_SBM(A[0]-A[1],convertClust(predictions2), K,iter=10)
predictions4 = IR_SSBM(A[0]-A[1],convertClust(predictions2), K,iter=10)
predictions5 = IR_SSBM2(A[0]-A[1],convertClust(predictions2), K,iter=10)

In [10]:
normalized_mutual_info_score(predictions3 ,assignments)


0.9585834964159978

In [11]:
def xp(n,K,p,eta,rep):
    scores = np.zeros((rep,5))
    for r in range(rep):
        A, assignments = SSBM(n=n, k=K, pin=p, etain=eta, sizes='uniform')
        m = Cluster(A)
        predictions1 = m.SPONGE(K, tau_p=1, tau_n=1, eigens=None, mi=None)
        predictions2 = m.SPONGE_sym(K, tau_p=1, tau_n=1, eigens=None, mi=None)
        predictions3 = sIR_SBM(A[0]-A[1],convertClust(predictions2), K,iter=10)
        predictions4 = IR_SSBM(A[0]-A[1],convertClust(predictions2), K,iter=10)
        predictions5 = IR_SSBM2(A[0]-A[1],convertClust(predictions2), K,iter=10)
        scores[r,0]= normalized_mutual_info_score(predictions1 ,assignments)
        scores[r,1]= normalized_mutual_info_score(predictions2 ,assignments)
        scores[r,2]= normalized_mutual_info_score(predictions3 ,assignments)
        scores[r,3]= normalized_mutual_info_score(predictions4 ,assignments)
        scores[r,4]= normalized_mutual_info_score(predictions5 ,assignments)
    return np.mean(scores,axis=0)

In [89]:

xp(n,K,p,eta,2)

array([0.04, 0.41, 0.52, 0.51, 0.51])

In [12]:
etas = np.round(np.linspace(0., 0.25, 30), 5)
scores = np.zeros((len(etas),5))
for i in range(len(etas)):
    scores[i,:] = xp(n,K,p,etas[i],20)

In [13]:
cmap = plt.cm.get_cmap('tab10')
markers = ('o', 'v', '^', '<', '>',  's', 'p', '*', 'h', 'H', 'D', 'd', 'P', 'X', '8')

df=pd.DataFrame(scores, columns=['Sponge','Sponge_sym',"sIR-SBM","IR-SSBM","IR-SSBM2"]) 
df['eta']=etas
df

df.to_csv('scores.csv', index=False)

In [1]:
plt.figure()

ax=df.plot(y=['Sponge'],x='eta')
for i, line in enumerate(ax.get_lines()):
    line.set_marker(markers[i])
plt.title(r'K = 15, n=5000, p =0.02 ')
plt.xlabel(r'Sign flip probability $(\eta)$')
plt.ylabel(r'Recovery score')
plt.legend(loc = 5)

plt.show()

NameError: name 'plt' is not defined