In [None]:
import igraph as ig
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.linear_model import LinearRegression
from collections import Counter
import os
import umap
import pickle
import partition_igraph
import subprocess
from sklearn.metrics import adjusted_mutual_info_score as AMI

## set those accordingly
datadir = '../Datasets/'
abcd_path = '../../../ABCD/utils/'

## we use those for the book, but you can change for other colors
cls_edges = 'gainsboro'
cls = ['silver','dimgray','black']


In [None]:
## To produce LaTeX from a DataFrame
#df = df.round(decimals=3)
#print(df.to_latex(index=False))
#print(df.to_latex(index=True))

## Zachary (karate) graph


In [None]:
z = ig.Graph.Famous('zachary')
z.vs['size'] = 7
z.vs['name'] = [str(i) for i in range(z.vcount())]
z.es['color'] = cls_edges
z.vs['comm'] = [0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1]
z.vs['color'] = [cls[i*2] for i in z.vs['comm']]
ig.plot(z, bbox=(0,0,300,200))

## hierarchical clustering and dendrogram


In [None]:
## Girvan-Newman
gn = z.community_edge_betweenness()
ig.plot(gn,'zachary_dendrogram.eps',bbox=(0,0,300,400))

In [None]:
q = []
for i in np.arange(z.vcount()):
    q.append(z.modularity(gn.as_clustering(n=i+1)))
plt.plot(np.arange(1,1+z.vcount()),q,'o-',color='black')
plt.xlabel('number of clusters',fontsize=14)
plt.ylabel('modularity',fontsize=14);

In [None]:
## 2 clusters
z.vs['gn'] = gn.as_clustering(n=2).membership
z.vs['color'] = [cls[i*2] for i in z.vs['gn']]
ig.plot(z, bbox=(0,0,300,200))

In [None]:
## optimal modularity (5 clusters)
## 2 clusters
cls = ['silver', 'dimgray', 'gainsboro', 'gray', 'white']
z.vs['label'] = gn.as_clustering(n=5).membership
z.vs['color'] = [cls[i] for i in z.vs['gn']]
z.vs['size'] = 12
z.vs['label_size'] = 8
ig.plot(z, bbox=(0,0,300,200))

In [None]:
## the following also produces a dendrogram (ref: Pons-Latapy)
z.vs['label'] = z.vs['name']
wt = z.community_walktrap()
ig.plot(wt,'zachary_dendrogram_wt.eps',bbox=(0,0,300,400))

In [None]:
q = []
for i in np.arange(z.vcount()):
    q.append(z.modularity(wt.as_clustering(n=i+1)))
plt.plot(np.arange(1,1+z.vcount()),q,'o-',color='black')
plt.xlabel('number of clusters',fontsize=14)
plt.ylabel('modularity',fontsize=14);

In [None]:
## read graph and communities
g = ig.Graph.Read_Ncol(datadir+'ABCD/abcd_small_cm.dat',directed=False)
c = np.loadtxt(datadir+'ABCD/abcd_small_cm_comms.dat',dtype='uint16',usecols=(1))
g.vs['comm'] = [c[int(x['name'])-1] for x in g.vs]
gt = {k:(v-1) for k,v in enumerate(g.vs['comm'])}
## map between int(name) to key
n2k = {int(v):k for k,v in enumerate(g.vs['name'])}
g.vs['size'] = 7
g.es['color'] = cls_edges
g.vs['color'] = [cls[i-1] for i in g.vs['comm']]
ig.plot(g, bbox=(0,0,300,200))

In [None]:
q = []
a = []
gn = g.community_edge_betweenness()
for i in np.arange(g.vcount()):
    q.append(g.modularity(gn.as_clustering(n=i+1)))
    a.append(AMI(g.vs['comm'],gn.as_clustering(n=i+1).membership))
plt.plot(np.arange(1,1+g.vcount()),q,'.-',color='black',label='modularity')
plt.plot(np.arange(1,1+g.vcount()),a,'.-',color='grey',label='AMI')
plt.xlabel('number of clusters',fontsize=14)
plt.ylabel('modularity',fontsize=14)
plt.legend();

## ABCD with varying xi

In [None]:
L = []
for rep in range(10):
    ## generate new degree and community size values
    cmd = 'julia '+abcd_path+'deg_sampler.jl deg.dat 2.5 10 100 1000 1000'
    os.system(cmd)
    cmd = 'julia '+abcd_path+'com_sampler.jl cs.dat 1.5 50 150 1000 1000'
    os.system(cmd)
    ## generate graphs for a range of xi 
    for x in np.arange(10,91,5):
        xi = x/100
        cmd = 'julia '+abcd_path+'graph_sampler.jl net.dat comm.dat deg.dat cs.dat '\
                +str(xi)+' false false'
        os.system(cmd)
        ## compute AMI for various clustering algorithms
        g = ig.Graph.Read_Ncol('net.dat',directed=False)
        c = np.loadtxt('comm.dat',dtype='uint16',usecols=(1))
        g.vs['comm'] = [c[int(x['name'])-1] for x in g.vs]
        ## clustering
        L.append(['ECG',xi,AMI(g.community_ecg().membership,g.vs['comm'])])
        L.append(['Louvain',xi,AMI(g.community_multilevel().membership,g.vs['comm'])])
        L.append(['Infomap',xi,AMI(g.community_infomap().membership,g.vs['comm'])])
        L.append(['Label Prop.',xi,AMI(g.community_label_propagation().membership,g.vs['comm'])])

In [None]:
D = pd.DataFrame(L,columns=['algo','xi','AMI'])
X = D.groupby(by=['algo','xi']).mean()

In [None]:
a = ['ECG','Louvain','Infomap','Label Prop.']
lt = ['-','--','-.',':']
for i in range(len(a)):
    plt.plot(X.loc[(a[i])].index,X.loc[(a[i])],lt[i],label=a[i],color='black')
plt.xlabel(r'ABCD noise ($\xi$)',fontsize=14)
plt.ylabel('AMI',fontsize=14)
plt.legend();