## Altmap Experiments
### Compare altmap to map eq using networkx on realworld networks


In [2]:
import networkx as nx
import numpy as np
from sklearn.metrics import adjusted_mutual_info_score as ami_score
from sklearn.metrics import normalized_mutual_info_score as nmi_score
import itertools
from collections import OrderedDict

# show plots in separate window
%pylab
# load helpers and wrappers
%run helpers.py 

# compute adjusted mutual information between two partitions
def compute_ami(communities_true, communities_found):
    
    labels_true = list(communities_true.values())
    
    # remove nodes from found communities that are not in ground truth comms
    communities_clean = {x:communities_found[x] for x in communities_found if x in communities_true} 
    labels_found = list(communities_clean.values())

    # return nmi_score(labels_true,labels_found, average_method='arithmetic')
    return ami_score(labels_true,labels_found, average_method='arithmetic')

Using matplotlib backend: Qt5Agg
Populating the interactive namespace from numpy and matplotlib


In [None]:
# dataset = 'dblp' # citation network; highly overlapping communities
# dataset = 'youtube'
dataset = 'lj' # live journal
# dataset = 'amazon'

# load network
path = './realworld/com-' + dataset + '.ungraph.txt'
G = nx.read_adjlist(path, create_using=nx.Graph, nodetype=int)
all_nodes = set(G.nodes)

print(f'There are {len(G.nodes())} nodes in the network')
print(f'There are {len(G.edges())} edges in the network')

# load groundtruth comms
path = './realworld/com-' + dataset + '.top5000.cmty.txt'
with open(path, 'r') as file:
    lines = file.readlines()

comm_list = [list(map(int, line.strip().split('\t'))) for line in lines]
node_ids = list(itertools.chain(*comm_list))
labels = [i+1 for i,comm in enumerate(comm_list) for node in comm]
communities_true = dict(zip(node_ids, labels))
communities_true = OrderedDict(sorted(communities_true.items()))

nodes_in_top = set(np.unique(node_ids)) # node ids occuring in some top community
nodes_not_in_top = all_nodes - nodes_in_top

G_easy = G.copy()
G_easy.remove_nodes_from(nodes_not_in_top)

print(f'There are {len(nodes_in_top)} unique nodes in the top 5000 communities.')

In [12]:
reduced_network = False # remove nodes that are not in top 5000 comms from graph

G_test = G_easy if reduced_network else G

# run community detection for infomap
print('Testing Infomap...')
communities_found, num_communities_found,_,_ = infomap(G_test, altmap=False)
print (f'Found {num_communities_found} communities.')

if not reduced_network:
    [communities_found.pop(node) for node in nodes_not_in_top]
    num_communities_found = get_num_communities(communities_found)

print (f'Found {num_communities_found} communities with top 5000 community nodes.')
print (f'Achieved RENDC is {num_communities_found/5000.0 - 1}.')

ami = compute_ami(communities_true, communities_found)
print (f'Achieved AMI is {ami}.')


# run community detection for altmap
print('Testing Altmap...')
communities_found, num_communities_found,_,_ = infomap(G_test, altmap=True, update_inputfile=False)
print (f'Found {num_communities_found} communities.')

if not reduced_network:
    [communities_found.pop(node) for node in nodes_not_in_top]
    num_communities_found = get_num_communities(communities_found)
    
print (f'Found {num_communities_found} communities with top 5000 community nodes.')
print (f'Achieved RENDC is {num_communities_found/5000.0 - 1}.')

ami = compute_ami(communities_true, communities_found)
print (f'Achieved AMI is {ami}.')


Testing Infomap...
Found 54434 communities.
Found 10311 communities with top 5000 community nodes.
Achieved RENDC is 1.0621999999999998.
Achieved AMI is 0.4668699578832647.
Testing Altmap...
Found 88947 communities.
Found 12492 communities with top 5000 community nodes.
Achieved RENDC is 1.4984000000000002.
Achieved AMI is 0.47012862178800996.
