In [1]:
import numpy as np
import pandas as pd
import os
import datetime
import pytz
import matplotlib.pyplot as plt
from collections import Counter
from pandas import Series, DataFrame
import seaborn as sns

import hatching as ht
from scipy import stats

import igraph as ig
import networkx as nx
import graph_tool as gt

In [25]:
f = 'T1_10h_new_95conf_212dist_3ilen_2gap_600minutes_2016-08-14 08:00:00+00:00.graphml'

In [26]:
f

'T1_10h_new_95conf_212dist_3ilen_2gap_600minutes_2016-08-14 08:00:00+00:00.graphml'

In [27]:
nxg = nx.read_graphml(f)

In [28]:
print(nx.info(nxg))

Name: 
Type: Graph
Number of nodes: 963
Number of edges: 45235
Average degree:  93.9460


In [29]:
igg = ig.Graph.Read_GraphML(f)

In [30]:
igg.summary()

'IGRAPH U--- 963 45235 -- \n+ attr: id (v), frequency (e), totalduration (e)'

In [31]:
gtg = gt.load_graph(f)

In [32]:
gtg

<Graph object, undirected, with 963 vertices and 45235 edges at 0x7f8e843e0668>

# iGraph Algos

## :) Fastgreedy

In [33]:
fg = igg.community_fastgreedy(weights="frequency")

In [34]:
vc = fg.as_clustering(fg.optimal_count)
print(vc.summary())

Clustering with 963 elements and 4 clusters


In [35]:
ids = []
membership = vc.membership
for name, membership in zip(igg.vs, membership):
    ids.append((int(name['id']), membership))

ids = DataFrame(ids, columns=["id", "community"])
ids.groupby(by="community").size()

community
0    501
1    168
2    292
3      2
dtype: int64

## Infomap

In [None]:
im = igg.community_infomap(edge_weights="frequency")

In [None]:
im.summary()

## :) leading_eigenvector

In [37]:
lev = igg.community_leading_eigenvector(weights="frequency")

In [38]:
lev.summary()

'Clustering with 963 elements and 12 clusters'

In [39]:
def getMembershipDF(stuff, g):
    ids = []
    membership = stuff.membership
    for name, membership in zip(g.vs, membership):
        ids.append((int(name['id']), membership))

    ids = DataFrame(ids, columns=["id", "community"])
    print(ids.groupby(by="community").size())
    print(ids.groupby(by="community").size().sum())

In [40]:
getMembershipDF(lev, igg)

community
0     424
1     288
2     238
3       1
4       1
5       1
6       1
7       1
8       5
9       1
10      1
11      1
dtype: int64
963


## Label Propagation

In [None]:
lpg = igg.community_label_propagation(weights="frequency")

In [None]:
lpg.summary()

## :) Multilevel

In [56]:
ml = igg.community_multilevel(weights="frequency")

In [57]:
ml.summary()

'Clustering with 963 elements and 3 clusters'

In [58]:
getMembershipDF(ml, igg)

community
0    227
1    429
2    307
dtype: int64
963


## Spinglass

In [None]:
#sg = igg.community_spinglass(weights="frequency")

In [None]:
#sg.summary()

In [None]:
#getMembershipDF(sg, igg)

## :) Walktrap

In [53]:
wt = igg.community_walktrap(weights="frequency", steps=10)

In [54]:
wtvc = wt.as_clustering(wt.optimal_count)
print(wtvc.summary())

Clustering with 963 elements and 119 clusters


In [55]:
ids = []
membership = wtvc.membership
for name, membership in zip(igg.vs, membership):
    ids.append((int(name['id']), membership))

ids = DataFrame(ids, columns=["id", "community"])
ids.groupby(by="community").size()

community
0        1
1        1
2      195
3      443
4      209
5        1
6        1
7        1
8        1
9        1
10       1
11       1
12       1
13       1
14       1
15       1
16       1
17       1
18       1
19       1
20       1
21       1
22       1
23       1
24       1
25       1
26       1
27       1
28       1
29       1
      ... 
89       1
90       1
91       1
92       1
93       1
94       1
95       1
96       1
97       1
98       1
99       1
100      1
101      1
102      1
103      1
104      1
105      1
106      1
107      1
108      1
109      1
110      1
111      1
112      1
113      1
114      1
115      1
116      1
117      1
118      1
dtype: int64

# Graph-tools

In [None]:
gtg

In [None]:
from graph_tool.all import *

In [None]:
gt.inference.minimize_blockmodel_dl()

# networkX - community package

In [60]:
import pythonlouvain.community.community_louvain as community

In [61]:
partition = community.best_partition(nxg, weight="frequency")

In [62]:
df = DataFrame(list(zip(list(partition.keys()),list(partition.values()))), columns=["id", "comm"])

In [63]:
df.groupby(by="comm").size()

comm
0    231
1    428
2    304
dtype: int64