## Altmap Experiments
### Compare altmap to map eq using networkx on realworld networks


In [19]:
import networkx as nx
import numpy as np
from sklearn.metrics import adjusted_mutual_info_score as ami_score
from sklearn.metrics import normalized_mutual_info_score as nmi_score
import itertools
from collections import OrderedDict

# show plots in separate window
%pylab
# load helpers and wrappers
%run helpers.py 

# compute adjusted mutual information between two partitions
def compute_ami(communities_true, communities_found):
    
    labels_true = list(communities_true.values())
    
    # remove nodes from found communities that are not in ground truth comms
    communities_clean = {x:communities_found[x] for x in communities_found if x in communities_true} 
    labels_found = list(communities_clean.values())

    # return nmi_score(labels_true,labels_found, average_method='arithmetic')
    return ami_score(labels_true,labels_found, average_method='arithmetic')

Using matplotlib backend: Qt5Agg
Populating the interactive namespace from numpy and matplotlib


In [17]:
# dataset = 'dblp' # citation network; highly overlapping communities
# dataset = 'youtube'
dataset = 'lj' # live journal
# dataset = 'amazon'

# load network
path = './realworld/com-' + dataset + '.ungraph.txt'
G = nx.read_adjlist(path, create_using=nx.Graph, nodetype=str)

print(f'There are {len(G.nodes())} nodes in the network')
print(f'There are {len(G.edges())} edges in the network')

# load groundtruth comms
path = './realworld/com-' + dataset + '.top5000.cmty.txt'

with open(path, 'r') as file:
    lines = file.readlines()

comm_list = [list(map(int, line.strip().split('\t'))) for line in lines]

node_ids = list(itertools.chain(*comm_list))
labels = [i+1 for i,comm in enumerate(comm_list) for node in comm]
communities_true = dict(zip(node_ids, labels))
communities_true = OrderedDict(sorted(communities_true.items()))

print (len(node_ids))
print(len(np.unique(node_ids)))

KeyboardInterrupt: 

In [15]:
# run community detection
communities_found, num_communities_found,_,_ = infomap(G, altmap=False)
ami = compute_ami(communities_true, communities_found)
print (f'Found {num_communities_found} communities.')
print (f'Achieved AMI is {ami}.')

communities_found, num_communities_found,_,_ = infomap(G, altmap=True, update_inputfile=False)
ami = compute_ami(communities_true, communities_found)
print (f'Found {num_communities_found} communities.')
print (f'Achieved AMI is {ami}.')

# communities_found, num_communities_found,_,_ = infomap(G, altmap=True, init='sc')
# print (f'We found {num_communities_found} communities.')

# print results





Found 14221 communities.
Achieved AMI is 0.9726783706462871.
Found 14679 communities.
Achieved AMI is 0.9712353318971827.
