In [1]:
import numpy as np
import pandas as pd
import os
import datetime
import pytz
import matplotlib.pyplot as plt
from collections import Counter
from pandas import Series, DataFrame
import seaborn as sns

import hatching as ht
from scipy import stats

import igraph as ig
import networkx as nx
import graph_tool as gt

In [76]:
f = 'TT1_10h_95conf_212dist_3ilen_2gap_600minutes_2016-08-14 08:00:00+00:00.graphml'

In [77]:
f

'TT1_10h_95conf_212dist_3ilen_2gap_600minutes_2016-08-14 08:00:00+00:00.graphml'

In [78]:
nxg = nx.read_graphml(f)

In [79]:
print(nx.info(nxg))

Name: 
Type: Graph
Number of nodes: 1193
Number of edges: 406023
Average degree: 680.6756


In [80]:
igg = ig.Graph.Read_GraphML(f)

In [81]:
igg.summary()

'IGRAPH U--- 1193 406023 -- \n+ attr: id (v), frequency (e), totalduration (e)'

In [82]:
gtg = gt.load_graph(f)

In [83]:
gtg

<Graph object, undirected, with 1193 vertices and 406023 edges at 0x7f8e843bb438>

# iGraph Algos

## :) Fastgreedy

In [33]:
fg = igg.community_fastgreedy(weights="frequency")

In [34]:
vc = fg.as_clustering(fg.optimal_count)
print(vc.summary())

Clustering with 963 elements and 4 clusters


In [35]:
ids = []
membership = vc.membership
for name, membership in zip(igg.vs, membership):
    ids.append((int(name['id']), membership))

ids = DataFrame(ids, columns=["id", "community"])
ids.groupby(by="community").size()

community
0    501
1    168
2    292
3      2
dtype: int64

## Infomap

In [None]:
im = igg.community_infomap(edge_weights="frequency")

In [None]:
im.summary()

## :) leading_eigenvector

In [37]:
lev = igg.community_leading_eigenvector(weights="frequency")

In [38]:
lev.summary()

'Clustering with 963 elements and 12 clusters'

In [39]:
def getMembershipDF(stuff, g):
    ids = []
    membership = stuff.membership
    for name, membership in zip(g.vs, membership):
        ids.append((int(name['id']), membership))

    ids = DataFrame(ids, columns=["id", "community"])
    print(ids.groupby(by="community").size())
    print(ids.groupby(by="community").size().sum())

In [40]:
getMembershipDF(lev, igg)

community
0     424
1     288
2     238
3       1
4       1
5       1
6       1
7       1
8       5
9       1
10      1
11      1
dtype: int64
963


## Label Propagation

In [None]:
lpg = igg.community_label_propagation(weights="frequency")

In [None]:
lpg.summary()

## :) Multilevel

In [56]:
ml = igg.community_multilevel(weights="frequency")

In [57]:
ml.summary()

'Clustering with 963 elements and 3 clusters'

In [58]:
getMembershipDF(ml, igg)

community
0    227
1    429
2    307
dtype: int64
963


## Spinglass

In [None]:
#sg = igg.community_spinglass(weights="frequency")

In [None]:
#sg.summary()

In [None]:
#getMembershipDF(sg, igg)

## :) Walktrap

In [53]:
wt = igg.community_walktrap(weights="frequency", steps=10)

In [54]:
wtvc = wt.as_clustering(wt.optimal_count)
print(wtvc.summary())

Clustering with 963 elements and 119 clusters


In [55]:
ids = []
membership = wtvc.membership
for name, membership in zip(igg.vs, membership):
    ids.append((int(name['id']), membership))

ids = DataFrame(ids, columns=["id", "community"])
ids.groupby(by="community").size()

community
0        1
1        1
2      195
3      443
4      209
5        1
6        1
7        1
8        1
9        1
10       1
11       1
12       1
13       1
14       1
15       1
16       1
17       1
18       1
19       1
20       1
21       1
22       1
23       1
24       1
25       1
26       1
27       1
28       1
29       1
      ... 
89       1
90       1
91       1
92       1
93       1
94       1
95       1
96       1
97       1
98       1
99       1
100      1
101      1
102      1
103      1
104      1
105      1
106      1
107      1
108      1
109      1
110      1
111      1
112      1
113      1
114      1
115      1
116      1
117      1
118      1
dtype: int64

# Graph-tools

In [None]:
gtg

In [None]:
from graph_tool.all import *

In [None]:
gt.inference.minimize_blockmodel_dl()

# networkX - community package

In [60]:
import pythonlouvain.community.community_louvain as community

In [61]:
partition = community.best_partition(nxg, weight="frequency")

In [62]:
df = DataFrame(list(zip(list(partition.keys()),list(partition.values()))), columns=["id", "comm"])

In [63]:
df.groupby(by="comm").size()

comm
0    231
1    428
2    304
dtype: int64

# Compare Comunity Outcome

In [85]:
def compareCommunities(g):
    fg = g.community_fastgreedy(weights="frequency")
    fgvc = fg.as_clustering(fg.optimal_count)

    ids = []
    membership = fgvc.membership
    for name, membership in zip(g.vs, membership):
        ids.append((int(name['id']), membership))

    ids = DataFrame(ids, columns=["id", "community-fg"])
    
    lev = g.community_leading_eigenvector(weights="frequency")
    ids['community-le'] = lev.membership
    
    ml = g.community_multilevel(weights="frequency")
    ids['community-ml'] = ml.membership
    
    wt = g.community_walktrap(weights="frequency", steps=10)
    wtvc = wt.as_clustering(wt.optimal_count)
    ids['community-wt'] = wtvc.membership

    return ids

In [86]:
dfc = compareCommunities(igg)

In [87]:
def addAge(df, date_dt):
    aa = ht.get_all_bees_age(date_dt)
    df["age"] = df.id.apply(lambda x: aa.iloc[x].age)
    return df

In [88]:
start = "2016-08-14"
start_dt = datetime.datetime.strptime(start, "%Y-%m-%d").replace(tzinfo=pytz.UTC)

In [89]:
dfc = addAge(dfc, start_dt)

In [90]:
dfc.head()

Unnamed: 0,id,community-fg,community-le,community-ml,community-wt,age
0,2051,0,0,0,0,3
1,6,1,1,1,1,26
2,2055,1,2,2,2,23
3,8,1,2,2,2,20
4,2057,0,0,2,2,13


In [94]:
def commSize(df, col):
    print(dfc.groupby(by=col).size())

In [99]:
def commMeanAge(df, col):
    print(dfc.groupby(by=col).age.mean())

## Size and Number

In [101]:
commSize(df, 'community-fg'), commSize(df, 'community-le'), commSize(df, 'community-ml'),commSize(df, 'community-wt')

community-fg
0    359
1    834
dtype: int64
community-le
0    354
1    502
2    337
dtype: int64
community-ml
0    313
1    541
2    339
dtype: int64
community-wt
0    241
1    482
2    469
3      1
dtype: int64


(None, None, None, None)

## Mean Age

In [102]:
commMeanAge(df, 'community-fg'), commMeanAge(df, 'community-le'), commMeanAge(df, 'community-ml'),commMeanAge(df, 'community-wt')

community-fg
0     8.359331
1    21.575540
Name: age, dtype: float64
community-le
0     8.257062
1    24.205179
2    17.569733
Name: age, dtype: float64
community-ml
0     6.706070
1    24.048059
2    17.362832
Name: age, dtype: float64
community-wt
0     5.651452
1    24.460581
2    16.671642
3    24.000000
Name: age, dtype: float64


(None, None, None, None)

In [103]:
dfc.head()

Unnamed: 0,id,community-fg,community-le,community-ml,community-wt,age
0,2051,0,0,0,0,3
1,6,1,1,1,1,26
2,2055,1,2,2,2,23
3,8,1,2,2,2,20
4,2057,0,0,2,2,13


## Look at IDs

In [107]:
def getIdList(df, col):
    return dfc.groupby(by=col).id.apply(set)

In [108]:
getIdList(dfc, 'community-fg')

community-fg
0    {2051, 2057, 11, 2065, 2066, 19, 2075, 2079, 2...
1    {6, 2055, 8, 2061, 2062, 16, 2067, 23, 2072, 2...
Name: id, dtype: object

In [127]:
def getMappingScore(df, col1, col2):
    id1 = getIdList(df, col1)
    id2 = getIdList(df, col2)
    
    print(col1, commSize(df, col1), col2, commSize(df,col2))
    
    for e1, i in enumerate(id1):
        for e2, j in enumerate(id2):
            print(e1, col1, e2, col2, len(i.intersection(j)))
        print('\n')

In [128]:
getMappingScore(dfc, 'community-fg', 'community-le')

community-fg
0    359
1    834
dtype: int64
community-le
0    354
1    502
2    337
dtype: int64
community-fg None community-le None
0 community-fg 0 community-le 344
0 community-fg 1 community-le 1
0 community-fg 2 community-le 14


1 community-fg 0 community-le 10
1 community-fg 1 community-le 501
1 community-fg 2 community-le 323




In [129]:
getMappingScore(dfc, 'community-le', 'community-ml')

community-le
0    354
1    502
2    337
dtype: int64
community-ml
0    313
1    541
2    339
dtype: int64
community-le None community-ml None
0 community-le 0 community-ml 310
0 community-le 1 community-ml 1
0 community-le 2 community-ml 43


1 community-le 0 community-ml 0
1 community-le 1 community-ml 501
1 community-le 2 community-ml 1


2 community-le 0 community-ml 3
2 community-le 1 community-ml 39
2 community-le 2 community-ml 295




In [130]:
getMappingScore(dfc, 'community-le', 'community-wt')

community-le
0    354
1    502
2    337
dtype: int64
community-wt
0    241
1    482
2    469
3      1
dtype: int64
community-le None community-wt None
0 community-le 0 community-wt 241
0 community-le 1 community-wt 0
0 community-le 2 community-wt 112
0 community-le 3 community-wt 1


1 community-le 0 community-wt 0
1 community-le 1 community-wt 453
1 community-le 2 community-wt 49
1 community-le 3 community-wt 0


2 community-le 0 community-wt 0
2 community-le 1 community-wt 29
2 community-le 2 community-wt 308
2 community-le 3 community-wt 0


