In [16]:
import networkx as nx
import community

from community import best_partition

In [17]:
sep = "\t"

In [18]:
def build_graph(g, people, community):
    for curr_idx in range(len(people)):
        for next_idx in range(curr_idx+1, len(people)):
            curr_person = people[curr_idx]
            next_person = people[next_idx]
            
            if curr_person in community and next_person in community:        
                if g.has_edge(curr_person, next_person):
                    data = g.get_edge_data(curr_person, next_person)
                    g.add_edge(curr_person, next_person, key="edge", weight=data['weight']+1)
                else:
                    g.add_edge(curr_person, next_person, weight=1)
                
    return g

events = {}
with open("../data/people_event.tsv", "rb") as in_file:
    in_file.next()
    for line in in_file:
        person, _, event = line.strip().split(sep)
        events.setdefault(person, set())
        events[person].add(event)

In [19]:
def community_detection(graph, level, name, modularity_lower=0.2, modularity_upper=0.8):
    partitions = community.best_partition(graph)
    modularity = community.modularity(partitions, graph)
    
    communities = {}
    for k, v in partitions.items():
        communities.setdefault(v, [])
        communities[v].append(k)
    
    if modularity > modularity_lower and modularity < modularity_upper:    
        print "{}There are {} people in this community({}), and modularity is {:4f}, split {} communities".format(\
            "\t"*level, graph.number_of_nodes(), name, modularity, len(communities))
    
        for cid, people in communities.items():
            community_name = "{}-{}".format(name, cid)
        
            sub_graph = graph.subgraph(people)
            sub_p = community.best_partition(sub_graph)
            sub_m = community.modularity(sub_p, sub_graph)
            community_detection(sub_graph, level+1, community_name, modularity_lower, modularity_upper)
    else:        
        print "{}There are {} people in this community({}), and modularity is {:4f}".format(\
            "\t"*level, graph.number_of_nodes(), name, modularity)

In [20]:
g = nx.Graph()
with open("../data/event_relation.tsv", "rb") as in_file:
    in_file.next()
    for line in in_file:
        event, people = line.strip().split(sep)
        people = people.split(",")
        
        g = build_graph(g, people, set(people))

community_detection(g, 0, "0")

There are 1000 people in this community(0), and modularity is 0.548649, split 5 communities
	There are 201 people in this community(0-0), and modularity is 0.001306
	There are 201 people in this community(0-1), and modularity is 0.001730
	There are 307 people in this community(0-2), and modularity is 0.237815, split 2 communities
		There are 154 people in this community(0-2-0), and modularity is 0.000968
		There are 153 people in this community(0-2-1), and modularity is 0.001528
	There are 151 people in this community(0-3), and modularity is 0.236836, split 2 communities
		There are 76 people in this community(0-3-0), and modularity is 0.000813
		There are 75 people in this community(0-3-1), and modularity is 0.000763
	There are 140 people in this community(0-4), and modularity is 0.235455, split 2 communities
		There are 70 people in this community(0-4-0), and modularity is 0.001771
		There are 70 people in this community(0-4-1), and modularity is 0.001321
