In [74]:
import time

import string
import random

import numpy as np
import networkx as nx

import community
from community import best_partition

In [75]:
event_all = ["".join([random.choice(string.uppercase) for idx in range(6)]) for _ in range(1024)]

In [76]:
groups = [80, 43, 120, 30, 20]
events = [event_all[:groups[0]/2], 
          event_all[groups[0]/2:np.sum(groups[i] for i in range(0, 2))/2], 
          event_all[np.sum(groups[i] for i in range(0, 2))/2:np.sum(groups[i] for i in range(0, 3))/2], 
          event_all[np.sum(groups[i] for i in range(0, 3))/2:np.sum(groups[i] for i in range(0, 4))/2], 
          event_all[np.sum(groups[i] for i in range(0, 4))/2:np.sum(groups[i] for i in range(0, 5))/2], 
          []]
dates = [1496801812, 1497406612]
sep = "\t"

In [77]:
minimum_event = 1

with open("people_event.tsv", "wb") as out_file:
    cnt_people = 1
    
    out_file.write("{}\n".format(sep.join(["people_name", "timestamp", "event"])))
    for idx, group in enumerate(groups):
        for people in range(group):
            people_name = "people_{:04d}".format(cnt_people)

            candidated_events = None
            if idx != len(groups)-1:
                cnt_event = random.randint(minimum_event, len(events[idx]))
                
                plus_events = []
                if people%2 == 0:
                    for event_idx in range(0, len(events[idx]), 2):
                        plus_events.append(events[idx][event_idx])
                else:
                    for event_idx in range(1, len(events[idx]), 2):
                        plus_events.append(events[idx][event_idx])
                
                candidated_events = random.sample(events[idx] + plus_events*8, cnt_event)
            else:
                candidated_events = random.sample(event_all, 5)
            
            for event in candidated_events:
                out_file.write("{}{sep}{}{sep}{}\n".format(people_name, random.randint(dates[0], dates[1]), event, sep=sep))
                
            cnt_people += 1

In [78]:
with open("event_relation1.tsv", "wb") as out_file:
    written_events = {}
    
    with open("people_event.tsv", "rb") as in_file:
        in_file.next()
        for line in in_file:
            pid, timestamp, event = line.strip().split(sep)
            written_events.setdefault(event, set())
            written_events[event].add(pid)
            
    out_file.write("event\tpeoples\n")
    for event, people in written_events.items():
        out_file.write("{}{sep}{}\n".format(event, ",".join(people), sep=sep))

In [79]:
def build_graph(g, people, community):
    for curr_idx in range(len(people)):
        for next_idx in range(curr_idx+1, len(people)):
            curr_person = people[curr_idx]
            next_person = people[next_idx]
            
            if curr_person in community and next_person in community:        
                if g.has_edge(curr_person, next_person):
                    data = g.get_edge_data(curr_person, next_person)
                    g.add_edge(curr_person, next_person, key="edge", weight=data['weight']+1)
                else:
                    g.add_edge(curr_person, next_person, weight=1)
                
    return g

events = {}
with open("people_event.tsv", "rb") as in_file:
    in_file.next()
    for line in in_file:
        person, _, event = line.strip().split(sep)
        events.setdefault(person, set())
        events[person].add(event)

In [80]:
def community_detection(graph, level, name, modularity_lower=0.2, modularity_upper=0.8):
    partitions = community.best_partition(graph)
    modularity = community.modularity(partitions, graph)
    
    communities = {}
    for k, v in partitions.items():
        communities.setdefault(v, [])
        communities[v].append(k)
    
    if modularity > modularity_lower and modularity < modularity_upper:    
        print "{}There are {} people in this community({}), and modularity is {:4f}, split {} communities".format(\
            "\t"*level, graph.number_of_nodes(), name, modularity, len(communities))
    
        for cid, people in communities.items():
            community_name = "{}-{}".format(name, cid)
        
            sub_graph = graph.subgraph(people)
            sub_p = community.best_partition(sub_graph)
            sub_m = community.modularity(sub_p, sub_graph)
            community_detection(sub_graph, level+1, community_name, modularity_lower, modularity_upper)
    else:        
        print "{}There are {} people in this community({}), and modularity is {:4f}".format(\
            "\t"*level, graph.number_of_nodes(), name, modularity)

In [81]:
g = nx.Graph()
with open("event_relation1.tsv", "rb") as in_file:
    in_file.next()
    for line in in_file:
        event, people = line.strip().split(sep)
        people = people.split(",")
        
        g = build_graph(g, people, set(people))
        
community_detection(g, 0, "0")

There are 287 people in this community(0), and modularity is 0.479939, split 7 communities
	There are 81 people in this community(0-0), and modularity is 0.253558, split 2 communities
		There are 39 people in this community(0-0-0), and modularity is 0.004402
		There are 42 people in this community(0-0-1), and modularity is 0.018939
	There are 63 people in this community(0-1), and modularity is 0.003093
	There are 64 people in this community(0-2), and modularity is 0.005168
	There are 30 people in this community(0-3), and modularity is 0.251648, split 2 communities
		There are 16 people in this community(0-3-0), and modularity is 0.023209
		There are 14 people in this community(0-3-1), and modularity is 0.040799
	There are 44 people in this community(0-4), and modularity is 0.263446, split 2 communities
		There are 22 people in this community(0-4-0), and modularity is 0.027067
		There are 22 people in this community(0-4-1), and modularity is 0.008593
	There are 3 people in this communit

In [84]:
partitions = community.best_partition(g)

with open("people_label.txt", "wb") as out_file:
    for p, c in sorted(partitions.items(), key=lambda x: x[0]):
        out_file.write("{}\t{}\n".format(p, c))

In [1]:
!cp GraphFeatureRepresentaion.ipynb GraphFeatureRepresentaion_part1.ipynb