In [95]:
import networkx as nx
import os
import pandas as pd
import numpy as np
from pandas import DataFrame, Series
import datetime
import pytz
import matplotlib.pyplot as plt
import igraph as ig
import hatching as ht

In [4]:
%matplotlib inline

In [5]:
def getAllNetworks(folder):
    graph_files = os.listdir("{}/".format(folder))
    graph_files = [g for g in graph_files if g.endswith('.graphml')]
    graph_files.sort()
    
    print(graph_files)
    g_list = []
    for f in graph_files:
        g_list.append((f, ig.Graph.Read_GraphML("{}/{}".format(folder,f))))
    return g_list

# Communities over time

In [139]:
g_list = getAllNetworks('networks/')

['0813_10h_95conf_212dist_3ilen_2gap_600minutes_2016-08-13_08:00:00+00:00.graphml', '0814_10h_95conf_212dist_3ilen_2gap_600minutes_2016-08-14_08:00:00+00:00.graphml', '0816_10h_95conf_212dist_3ilen_2gap_600minutes_2016-08-16_08:00:00+00:00.graphml', '0817_10h_95conf_212dist_3ilen_2gap_600minutes_2016-08-17_08:00:00+00:00.graphml', '0820_10h_95conf_212dist_3ilen_2gap_600minutes_2016-08-20_08:00:00+00:00.graphml', '0822_10h_95conf_212dist_3ilen_2gap_600minutes_2016-08-22_08:00:00+00:00.graphml', '0824_10h_95conf_212dist_3ilen_2gap_600minutes_2016-08-24_08:00:00+00:00.graphml', '0825_10h_95conf_212dist_3ilen_2gap_600minutes_2016-08-25_08:00:00+00:00.graphml', '0902_10h_95conf_212dist_3ilen_2gap_600minutes_2016-09-02_08:00:00+00:00.graphml']


In [140]:
g_list = [g_list[4], g_list[5], g_list[6]]

In [217]:
def getCommunities(g, datee):
    
    #lev = g.community_leading_eigenvector(weights="frequency")
    lev = g.community_walktrap(weights="frequency")
    lev_vc = lev.as_clustering(lev.optimal_count)
    membership = lev_vc.membership

    ids = []
    #membership = lev.membership
    
    for name, membership in zip(g.vs, membership):
        ids.append((int(name['id']), membership))
    
    date_dt = datetime.datetime.strptime(datee, "%Y-%m-%d").replace(tzinfo=pytz.UTC)
    aa = ht.get_all_bees_age(date_dt)
    

    df = DataFrame(ids, columns=["id", "community"])
    df['age'] = df.id.apply(lambda x: aa.iloc[x].age)
    
    return df

In [218]:
c_list = [None, None, None]

for e, g in enumerate(g_list):
    c_list[e] = getCommunities(g[1], g[0].split('_')[-2])

In [219]:
c_list[0].groupby(by='community').size()

community
0    431
1    490
2      1
dtype: int64

In [220]:
c_list[1].groupby(by='community').size()

community
0    311
1    372
2    294
3      1
dtype: int64

In [221]:
c_list[2].groupby(by='community').size()

community
0    301
1    390
2    231
dtype: int64

# Relative Overlap

In [222]:
def overlap1(s0, s1):
    x = len(set(s0).intersection(s1))
    y = len(set(s0).union(s1))
    return x/y

In [223]:
def overlap2(s0,s1):
    x = len(set(s0).intersection(s1))
    return min(x/len(s0), x/len(s1))

In [224]:
a = c_list[0].groupby(by='community').id.apply(list)

In [225]:
b = c_list[1].groupby(by='community').id.apply(list)

In [226]:
c = c_list[2].groupby(by='community').id.apply(list)

In [227]:
aa = list(c_list[0].id)

In [228]:
bb = list(c_list[1].id)

In [229]:
cc = list(c_list[2].id)

In [242]:
overlap2(aa,bb)

0.8517382413087935

In [243]:
len(set(aa).intersection(set(bb)))

833

In [244]:
len(set(bb).intersection(set(cc)))

823

In [233]:
len(set(aa).intersection(set(cc)))

716

In [234]:
overlap2(bb,cc)

0.8415132924335378

In [235]:
overlap2(aa,cc)

0.7765726681127982

In [240]:
overlap = []

for e0, l0 in enumerate(b):
    for e1, l1 in enumerate(c):
        overlap.append((e0,e1, overlap1(l0,l1), len(l0), len(l1), len(set(l0).intersection(set(l1)))))
        

In [241]:
overlap

[(0, 0, 0.21188118811881188, 311, 301, 107),
 (0, 1, 0.300556586270872, 311, 390, 162),
 (0, 2, 0.003703703703703704, 311, 231, 2),
 (1, 0, 0.3300395256916996, 372, 301, 167),
 (1, 1, 0.02557200538358008, 372, 390, 19),
 (1, 2, 0.36425339366515835, 372, 231, 161),
 (2, 0, 0.024096385542168676, 294, 301, 14),
 (2, 1, 0.3790322580645161, 294, 390, 188),
 (2, 2, 0.0038240917782026767, 294, 231, 2),
 (3, 0, 0.0, 1, 301, 0),
 (3, 1, 0.0, 1, 390, 0),
 (3, 2, 0.004329004329004329, 1, 231, 1)]

## For daylie Networks

In [125]:
def getMatches(g0,g1):
    list0 = getCommunitiesFastGreedy(g0)
    list1 = getCommunitiesFastGreedy(g1)
    
    overlap = []

    for e0, l0 in enumerate(list0):
        for e1, l1 in enumerate(list1):
            overlap.append((e0,e1, len(l0), len(l1), overlap1(l0,l1)))
    
    return overlap

In [126]:
gd0 = ig.Graph.Read_GraphML("networks-days/2015-08-21T00:00:00Z_1d_allCams_0.99conf_160dist_6ilen.graphml")
gd1 = ig.Graph.Read_GraphML("networks-days/2015-08-22T00:00:00Z_1d_allCams_0.99conf_160dist_6ilen.graphml")

In [127]:
gd2 = ig.Graph.Read_GraphML("networks-days/2015-08-23T00:00:00Z_1d_allCams_0.99conf_160dist_6ilen.graphml")
gd3 = ig.Graph.Read_GraphML("networks-days/2015-08-24T00:00:00Z_1d_allCams_0.99conf_160dist_6ilen.graphml")
gd4 = ig.Graph.Read_GraphML("networks-days/2015-08-25T00:00:00Z_1d_allCams_0.99conf_160dist_6ilen.graphml")

In [128]:
getMatches(gd0,gd1)

[(0, 0, 509, 526, 0.5356083086053413),
 (0, 1, 509, 654, 0.028293545534924844),
 (0, 2, 509, 2, 0.0),
 (1, 0, 661, 526, 0.0448943661971831),
 (1, 1, 661, 654, 0.7234600262123198),
 (1, 2, 661, 2, 0.0015105740181268882),
 (2, 0, 3, 526, 0.003795066413662239),
 (2, 1, 3, 654, 0.0),
 (2, 2, 3, 2, 0.0),
 (3, 0, 12, 526, 0.011278195488721804),
 (3, 1, 12, 654, 0.0030120481927710845),
 (3, 2, 12, 2, 0.0),
 (4, 0, 8, 526, 0.007547169811320755),
 (4, 1, 8, 654, 0.0060790273556231),
 (4, 2, 8, 2, 0.0),
 (5, 0, 9, 526, 0.011342155009451797),
 (5, 1, 9, 654, 0.0030257186081694403),
 (5, 2, 9, 2, 0.0)]

In [129]:
getMatches(gd1,gd2)

[(0, 0, 526, 633, 0.02475685234305924),
 (0, 1, 526, 510, 0.553223388305847),
 (0, 2, 526, 8, 0.0037593984962406013),
 (0, 3, 526, 4, 0.003787878787878788),
 (1, 0, 654, 633, 0.716),
 (1, 1, 654, 510, 0.043010752688172046),
 (1, 2, 654, 8, 0.004552352048558422),
 (1, 3, 654, 4, 0.0015220700152207),
 (2, 0, 2, 633, 0.0015772870662460567),
 (2, 1, 2, 510, 0.0),
 (2, 2, 2, 8, 0.0),
 (2, 3, 2, 4, 0.0)]

In [130]:
getMatches(gd2,gd3)

[(0, 0, 633, 584, 0.6740027510316369),
 (0, 1, 633, 541, 0.05008944543828265),
 (0, 2, 633, 9, 0.0015600624024961),
 (1, 0, 510, 584, 0.016728624535315983),
 (1, 1, 510, 541, 0.525399129172714),
 (1, 2, 510, 9, 0.007766990291262136),
 (2, 0, 8, 584, 0.003389830508474576),
 (2, 1, 8, 541, 0.003656307129798903),
 (2, 2, 8, 9, 0.0625),
 (3, 0, 4, 584, 0.0),
 (3, 1, 4, 541, 0.003683241252302026),
 (3, 2, 4, 9, 0.0)]

In [131]:
getMatches(gd3,gd4)

[(0, 0, 584, 637, 0.5436156763590392),
 (0, 1, 584, 502, 0.04725168756027001),
 (0, 2, 584, 12, 0.006756756756756757),
 (1, 0, 541, 637, 0.03788546255506608),
 (1, 1, 541, 502, 0.5094066570188133),
 (1, 2, 541, 12, 0.009124087591240875),
 (2, 0, 9, 637, 0.004665629860031105),
 (2, 1, 9, 502, 0.007889546351084813),
 (2, 2, 9, 12, 0.0)]

## Das ganze mit k-cliques>

In [97]:
def getMatchesKCC(g0,g1,k):
    list0 = list(nx.community.k_clique_communities(g0,k))
    list1 = list(nx.community.k_clique_communities(g1,k))
    
    overlap = []

    for e0, l0 in enumerate(list0):
        for e1, l1 in enumerate(list1):
            overlap.append((e0,e1, overlap1(l0,l1)))
    
    return overlap

In [None]:
gh0 = nx.read_graphml("networks-hours/2015-08-21T10:00:00Z_1h_allCams_0.99conf_160dist_6ilen.graphml")
gh1 = nx.read_graphml("networks-hours/2015-08-21T11:00:00Z_1h_allCams_0.99conf_160dist_6ilen.graphml")

In [117]:
gh2 = nx.read_graphml("networks-hours/2015-08-21T12:00:00Z_1h_allCams_0.99conf_160dist_6ilen.graphml")

In [115]:
result = getMatchesKCC(gh0, gh1, 3)

In [116]:
[(a,b,c) for a,b,c in result if c > 0.2]

[(0, 0, 0.8276209677419355)]

In [120]:
result = getMatchesKCC(gh1, gh2, 4)

In [121]:
[(a,b,c) for a,b,c in result if c > 0.2]

[(0, 0, 0.5977011494252874),
 (14, 49, 0.2222222222222222),
 (19, 45, 0.2857142857142857),
 (47, 57, 0.3333333333333333),
 (84, 48, 0.2857142857142857),
 (85, 95, 0.2857142857142857),
 (108, 40, 0.2222222222222222)]

# KCC Matching Palla

In [147]:
k=4

In [148]:
gh0 = nx.read_graphml("networks-hours/2015-08-21T10:00:00Z_1h_allCams_0.99conf_160dist_6ilen.graphml")
gh1 = nx.read_graphml("networks-hours/2015-08-21T11:00:00Z_1h_allCams_0.99conf_160dist_6ilen.graphml")

In [149]:
list0 = list(nx.community.k_clique_communities(gh0,k))
list1 = list(nx.community.k_clique_communities(gh1,k))

In [154]:
print(len(list0))

127


In [156]:
print(len(list1))

145


In [150]:
print(nx.info(gh0))

Name: 
Type: Graph
Number of nodes: 993
Number of edges: 14991
Average degree:  30.1934


In [151]:
print(nx.info(gh1))

Name: 
Type: Graph
Number of nodes: 989
Number of edges: 13255
Average degree:  26.8049


In [157]:
gh01 = nx.compose(gh0,gh1)

In [158]:
print(nx.info(gh01))

Name: compose( ,  )
Type: Graph
Number of nodes: 1044
Number of edges: 27171
Average degree:  52.0517


In [159]:
# Communities in Joint Graph
list01 = list(nx.community.k_clique_communities(gh01,k))

In [160]:
len(list01)

72

In [161]:
# For each community v_k in the joint graph
# extract a list of communities d_ik contained in v_k
# extract a list of communities e_ik contained in v_k

In [206]:
d = [[] for i in range(len(list01))]
e = [[] for i in range(len(list01))]

In [207]:
for e01, l01 in enumerate(list01):
    for e0, l0 in enumerate(list0):
        if (set(l0).issubset(l01)):
            d[e01].append(l0)
    
    for e1, l1 in enumerate(list1):
        if (set(l1).issubset(l01)):
            e[e01].append(l1)

In [213]:
# calculate relative overlap for each pairs
o = [[] for i in range(len(list01))]

for i in range(len(list01)):
    for ed, di in enumerate(d[i]):
        for ee, ei in enumerate(e[i]):
            o[i].append((ed,ee, overlap1(di,ei)))
    
    

In [240]:
for i in range(len(o)):
    print([(a,b,c) for a,b,c in o[i] if c > 0.2])

[(0, 0, 0.6519721577726219), (10, 97, 0.3333333333333333), (14, 115, 0.3333333333333333), (14, 141, 0.3333333333333333), (33, 20, 0.25), (54, 45, 0.3333333333333333), (59, 62, 0.2857142857142857), (88, 9, 0.3333333333333333), (88, 41, 0.3333333333333333)]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
