In [None]:
######################################################################################################
# Loading libraries ##################################################################################
######################################################################################################
import argparse, math, random, simanneal, sys
import networkx as nx
import numpy as np
import pandas as pd
from __future__ import print_function
from math import log
from simanneal import Annealer
from sklearn.metrics.pairwise import pairwise_distances
from scipy.spatial.distance import hamming

#####################################################################################################
# Defining constants ################################################################################
#####################################################################################################
TAU = 0.15 # Used in PageRank calculation (alpha is 1-tau where [RAB2009 says \tau=0.15])
PAGE_RANK = 'page_rank'
MODULE_ID = 'module_id'

#####################################################################################################
# Auxiliary functions definition ####################################################################
#####################################################################################################
## It's generally a good practice not to use numbers in functions' names 
##   (just pedantic CS rules that once were important because of old architectures).
def log_base_two(probability):
    "Returns the log of probability in base 2"
    base_two_logarithm = log(probability,2)
    return base_two_logarithm 

## Using only one 'return' statement is safer than branching them in conditional statements. 
##   It does not make a difference here, but in larger projects it makes code more readable 
##   (and 'friendlier' to collaborators).
def partial_entropy(probability):
    "Half of the entropy function, as used in the InfoMap paper."
    partial_entropy = 0
    if probability != 0:
        partial_entropy = probability * log2(probability)
    return partial_entropy

## Splitting this large function into 3 sub-functions. It's generally a good practice to split large 
##   functions into smaller ones: one function = one specific goal. In this case, having three 
##   functions makes sense because the smaller sub-routines could be used outside the scope of 
##   the main "load and process". As a rule of thumb: if you have around 20 or more lines in one 
##   function, you are probably doing too many things in the same block.
def load_and_process_directed_graph(filename):
    """Loads the network from a file, normalizes it, and calculates its page rank 
        (storing the value in the network itself)."""
    preProcessedGraph=load_directed_graph_from_file(filename)
    normalizedGraph=normalize_edge_weights(preProcessedGraph)
    pageRankedGraph=calculate_pagerank(normalizedGraph,TAU)
    return pageRankedGraph

### It is useful to define a function to this specific 'loading' task because you can use it outside
###    the "load and process" scope to do other analyses and tests.
def load_directed_graph_from_file(filename):
    "Imports a '.net' file and processes it as a directed network."
    directedGraph = nx.DiGraph(nx.read_pajek(filename))
    print(
      "Loaded a graph (%d nodes, %d edges)" % 
      (len(directedGraph), len(directedGraph.edges()))
    )
    return directedGraph

### Love this routine! So elegant.
def normalize_edge_weights(directedGraph):
    """For each node in the network: calculates the total of the weights' values,
        and then normalizes them to make their total equal to 1."""
    for node in directedGraph:
        edges = directedGraph.edges(node, data=True)
        total_weight = sum([data['weight'] for (_, _, data) in edges])
        for (_, _, data) in edges:
            data['weight'] = data['weight'] / total_weight
    return directedGraph
    
### I think it's useful to have 'tau' as an argument, even when it is defined as a global constant.
###   Even though the use of the global constant is completely justified in this case, having tau
###   as an argument for your function makes it much more flexible; and the typing is a small price
###   to pay for it.
def calculate_pagerank(normalizedGraph, tau):
    """Get the network's PageRank."""
    page_ranks = nx.pagerank(normalizedGraph, alpha=1-TAU)
    for (node, page_rank) in page_ranks.items():
        normalizedGraph.node[node][PAGE_RANK] = page_rank
    return normalizedGraph
    
## No changes. Just made the name a bit more specific.
def load_coordinates_from_file(filename):
    field_names = ['X','Y',"w"]
    coords = pd.read_csv(filename, header=None, names=field_names)
    coords = coords.loc[:,["X","Y"]]
    return coords

#####################################################################################################
# GeoInfomap class definition #######################################################################
#####################################################################################################
#class GeoInfomap(Annealer):
    # Attributes
    # Initializers
    ## def __init__(self, state, module, graph, coordinates):
    # Mutators
    # Accessors
    # Actions
    ## def move(self):
    ## def energy(self):

In [None]:
#####################################################################################################
# Main ##############################################################################################
#####################################################################################################
graph = load_directed_graph_from_file("houses.net")
coords = load_coordinates("coordinates.csv")
coords.index = np.arange(0, len(coords))

single_nodes = [[nodes] for nodes in graph]
single_nodes[0] = ['0','1']
single_nodes.remove(single_nodes[1]) 
init_state = single_nodes