In [11]:
import numpy as np
import pandas as pd
import os
import datetime
import pytz
import matplotlib.pyplot as plt
from collections import Counter
from pandas import Series, DataFrame
import seaborn as sns

import hatching as ht
from scipy import stats

import igraph as ig
import networkx as nx
import graph_tool as gt

In [None]:
f = 'TT1_10h_95conf_212dist_3ilen_2gap_600minutes_2016-08-14 08:00:00+00:00.graphml'
f = 'TT2_10h_95conf_212dist_3ilen_2gap_600minutes_2016-08-17 08:00:00+00:00.graphml'
nxg = nx.read_graphml(f)

In [None]:
print(nx.info(nxg))

In [None]:
igg = ig.Graph.Read_GraphML(f)

In [None]:
igg.summary()

In [None]:
gtg = gt.load_graph(f)

In [None]:
gtg

# iGraph Algos

## :) Fastgreedy

In [None]:
fg = igg.community_fastgreedy(weights="frequency")

In [None]:
vc = fg.as_clustering(fg.optimal_count)
print(vc.summary())

In [None]:
ids = []
membership = vc.membership
for name, membership in zip(igg.vs, membership):
    ids.append((int(name['id']), membership))

ids = DataFrame(ids, columns=["id", "community"])
ids.groupby(by="community").size()

## Infomap

In [None]:
im = igg.community_infomap(edge_weights="frequency")

In [None]:
im.summary()

## :) leading_eigenvector

In [None]:
lev = igg.community_leading_eigenvector(weights="frequency")

In [None]:
lev.summary()

In [None]:
def getMembershipDF(stuff, g):
    ids = []
    membership = stuff.membership
    for name, membership in zip(g.vs, membership):
        ids.append((int(name['id']), membership))

    ids = DataFrame(ids, columns=["id", "community"])
    print(ids.groupby(by="community").size())
    print(ids.groupby(by="community").size().sum())

In [None]:
getMembershipDF(lev, igg)

## Label Propagation

In [None]:
lpg = igg.community_label_propagation(weights="frequency")

In [None]:
lpg.summary()

## :) Multilevel

In [None]:
ml = igg.community_multilevel(weights="frequency")

In [None]:
ml.summary()

In [None]:
getMembershipDF(ml, igg)

## Spinglass

In [None]:
#sg = igg.community_spinglass(weights="frequency")

In [None]:
#sg.summary()

In [None]:
#getMembershipDF(sg, igg)

## :) Walktrap

In [None]:
wt = igg.community_walktrap(weights="frequency", 0)

In [None]:
wtvc = wt.as_clustering(wt.optimal_count)
print(wtvc.summary())

In [None]:
ids = []
membership = wtvc.membership
for name, membership in zip(igg.vs, membership):
    ids.append((int(name['id']), membership))

ids = DataFrame(ids, columns=["id", "community"])
ids.groupby(by="community").size()

# Graph-tools

In [None]:
gtg

In [None]:
from graph_tool.all import *

In [None]:
gt.inference.minimize_blockmodel_dl()

# networkX - community package

In [None]:
import pythonlouvain.community.community_louvain as community

In [None]:
partition = community.best_partition(nxg, weight="frequency")

In [None]:
df = DataFrame(list(zip(list(partition.keys()),list(partition.values()))), columns=["id", "comm"])

In [None]:
df.groupby(by="comm").size()

# Compare Comunity Outcome

In [1]:
def addAge(df, date_dt):
    aa = ht.get_all_bees_age(date_dt)
    df["age"] = df.id.apply(lambda x: aa.iloc[x].age)
    return df

In [42]:
def compareCommunities(g, datestr):
    fg = g.community_fastgreedy(weights="frequency")
    fgvc = fg.as_clustering(fg.optimal_count)

    ids = []
    membership = fgvc.membership
    for name, membership in zip(g.vs, membership):
        ids.append((int(name['id']), membership))

    ids = DataFrame(ids, columns=["id", "community-fg"])
    
    lev = g.community_leading_eigenvector(weights="frequency")
    ids['community-le'] = lev.membership
    
    ml = g.community_multilevel(weights="frequency")
    ids['community-ml'] = ml.membership
    
    wt = g.community_walktrap(weights="frequency", steps=3)
    wtvc = wt.as_clustering(wt.optimal_count)
    ids['community-wt'] = wtvc.membership
    
    start_dt = datetime.datetime.strptime(datestr, "%Y-%m-%d").replace(tzinfo=pytz.UTC)
    ids = addAge(ids, start_dt)

    return ids

In [None]:
dfc = compareCommunities(igg)

In [None]:
start = "2016-08-14"
start_dt = datetime.datetime.strptime(start, "%Y-%m-%d").replace(tzinfo=pytz.UTC)

In [None]:
dfc = addAge(dfc, start_dt)

In [None]:
dfc.head()

In [23]:
def commSize(df, col):
    print(df.groupby(by=col).size())

In [24]:
def commMeanAge(df, col):
    print(df.groupby(by=col).age.mean())

## Size and Number

In [None]:
commSize(df, 'community-fg'), commSize(df, 'community-le'), commSize(df, 'community-ml'),commSize(df, 'community-wt')

## Mean Age

In [None]:
commMeanAge(df, 'community-fg'), commMeanAge(df, 'community-le'), commMeanAge(df, 'community-ml'),commMeanAge(df, 'community-wt')

## Look at IDs

In [26]:
def getIdList(df, col):
    return df.groupby(by=col).id.apply(set)

In [None]:
getIdList(dfc, 'community-fg')

In [6]:
def overlap1(s0, s1):
    x = len(set(s0).intersection(s1))
    y = len(set(s0).union(s1))
    return x/y

In [7]:
def getMappingScore(df, col1, col2):
    id1 = getIdList(df, col1)
    id2 = getIdList(df, col2)
    
    print(col1, commSize(df, col1), col2, commSize(df,col2))
    
    for e1, i in enumerate(id1):
        for e2, j in enumerate(id2):
            print(e1, col1, e2, col2, overlap1(i,j))
        print('\n')

In [None]:
getMappingScore(dfc, 'community-fg', 'community-le')

In [None]:
getMappingScore(dfc, 'community-le', 'community-ml')

In [None]:
getMappingScore(dfc, 'community-le', 'community-wt')

In [None]:
getMappingScore(dfc, 'community-ml', 'community-wt')

In [28]:
def alles(df):
    print(commSize(df, 'community-fg'), commSize(df, 'community-le'), commSize(df, 'community-ml'),commSize(df, 'community-wt'))
    print(commMeanAge(df, 'community-fg'), commMeanAge(df, 'community-le'), commMeanAge(df, 'community-ml'),commMeanAge(df, 'community-wt'))
    print(getMappingScore(df, 'community-fg', 'community-le'))
    print(getMappingScore(df, 'community-fg', 'community-ml'))
    print(getMappingScore(df, 'community-fg', 'community-wt'))
    print(getMappingScore(df, 'community-le', 'community-ml'))
    print(getMappingScore(df, 'community-le', 'community-wt'))
    print(getMappingScore(df, 'community-ml', 'community-wt'))

In [None]:
alles(dfc)

# Alle Testen Stat

In [9]:
f1 = 'TT1_10h_95conf_212dist_3ilen_2gap_600minutes_2016-08-14 08:00:00+00:00.graphml'
f2 = 'TT2_10h_95conf_212dist_3ilen_2gap_600minutes_2016-08-17 08:00:00+00:00.graphml'
f3 = 'TT3_10h_95conf_212dist_3ilen_2gap_600minutes_2016-08-20 08:00:00+00:00.graphml'
f4 = 'TT4_10h_95conf_212dist_3ilen_2gap_600minutes_2016-09-02 08:00:00+00:00.graphml'

In [12]:
g1 = ig.Graph.Read_GraphML(f1)
g2 = ig.Graph.Read_GraphML(f2)
g3 = ig.Graph.Read_GraphML(f3)
g4 = ig.Graph.Read_GraphML(f4)

In [63]:
g1 = thresholdedGraph(f1, 1)
g2 = thresholdedGraph(f2, 1)
g3 = thresholdedGraph(f3, 1)
g4 = thresholdedGraph(f4, 1)

Name: 
Type: Graph
Number of nodes: 1193
Number of edges: 406023
Average degree: 680.6756
anzahl removed edges 87667
Name: 
Type: Graph
Number of nodes: 1193
Number of edges: 318356
Average degree: 533.7066
Number of components: 3
Counter({1: 2, 1191: 1})
Name: 
Type: Graph
Number of nodes: 1095
Number of edges: 381548
Average degree: 696.8913
anzahl removed edges 72518
Name: 
Type: Graph
Number of nodes: 1095
Number of edges: 309030
Average degree: 564.4384
Number of components: 1
Counter({1095: 1})
Name: 
Type: Graph
Number of nodes: 922
Number of edges: 291179
Average degree: 631.6247
anzahl removed edges 48326
Name: 
Type: Graph
Number of nodes: 922
Number of edges: 242853
Average degree: 526.7961
Number of components: 2
Counter({921: 1, 1: 1})
Name: 
Type: Graph
Number of nodes: 379
Number of edges: 43922
Average degree: 231.7784
anzahl removed edges 6368
Name: 
Type: Graph
Number of nodes: 379
Number of edges: 37554
Average degree: 198.1741
Number of components: 2
Counter({1: 1, 

In [65]:
g1.summary(), g2.summary(), g3.summary(), g4.summary()

('IGRAPH U--- 1191 318356 -- \n+ attr: id (v), frequency (e), totalduration (e)',
 'IGRAPH U--- 1095 309030 -- \n+ attr: id (v), frequency (e), totalduration (e)',
 'IGRAPH U--- 921 242853 -- \n+ attr: id (v), frequency (e), totalduration (e)',
 'IGRAPH U--- 378 37554 -- \n+ attr: id (v), frequency (e), totalduration (e)')

In [66]:
t1 = compareCommunities(g1, "2016-08-14")

In [67]:
t2 = compareCommunities(g2, "2016-08-17")

In [68]:
t3 = compareCommunities(g3, "2016-08-20")

In [69]:
t4 = compareCommunities(g4, "2016-09-02")

In [74]:
file1 =open('./ilen3WithT1', 'w+')
file1.write(alles(t1), alles(t2), alles(t3), alles(t4))

community-fg
0    830
1    361
dtype: int64
community-le
0    335
1    355
2    501
dtype: int64
community-ml
0    539
1    339
2    313
dtype: int64
community-wt
0    506
1    439
2    246
dtype: int64
None None None None
community-fg
0    21.543373
1     8.526316
Name: age, dtype: float64
community-le
0    17.668657
1     8.211268
2    24.201597
Name: age, dtype: float64
community-ml
0    23.975881
1    17.466077
2     6.757188
Name: age, dtype: float64
community-wt
0    17.069170
1    24.920273
2     5.617886
Name: age, dtype: float64
None None None None
community-fg
0    830
1    361
dtype: int64
community-le
0    335
1    355
2    501
dtype: int64
community-fg None community-le None
0 community-fg 0 community-le 0.3819691577698695
0 community-fg 1 community-le 0.009369676320272573
0 community-fg 2 community-le 0.5959232613908872


1 community-fg 0 community-le 0.01903367496339678
1 community-fg 1 community-le 0.9247311827956989
1 community-fg 2 community-le 0.004662004662004662




TypeError: write() takes exactly one argument (4 given)

In [76]:
alles(t1), alles(t2)

community-fg
0    830
1    361
dtype: int64
community-le
0    335
1    355
2    501
dtype: int64
community-ml
0    539
1    339
2    313
dtype: int64
community-wt
0    506
1    439
2    246
dtype: int64
None None None None
community-fg
0    21.543373
1     8.526316
Name: age, dtype: float64
community-le
0    17.668657
1     8.211268
2    24.201597
Name: age, dtype: float64
community-ml
0    23.975881
1    17.466077
2     6.757188
Name: age, dtype: float64
community-wt
0    17.069170
1    24.920273
2     5.617886
Name: age, dtype: float64
None None None None
community-fg
0    830
1    361
dtype: int64
community-le
0    335
1    355
2    501
dtype: int64
community-fg None community-le None
0 community-fg 0 community-le 0.3819691577698695
0 community-fg 1 community-le 0.009369676320272573
0 community-fg 2 community-le 0.5959232613908872


1 community-fg 0 community-le 0.01903367496339678
1 community-fg 1 community-le 0.9247311827956989
1 community-fg 2 community-le 0.004662004662004662




(None, None)

In [78]:
alles(t3)

community-fg
0    465
1     18
2    438
dtype: int64
community-le
0    486
1    435
dtype: int64
community-ml
0    476
1    445
dtype: int64
community-wt
0    494
1    426
2      1
dtype: int64
None None None None
community-fg
0    25.118280
1    15.277778
2    17.335616
Name: age, dtype: float64
community-le
0    24.870370
1    17.151724
Name: age, dtype: float64
community-ml
0    17.638655
1    25.060674
Name: age, dtype: float64
community-wt
0     25.04251
1     17.08216
2   -100.00000
Name: age, dtype: float64
None None None None
community-fg
0    465
1     18
2    438
dtype: int64
community-le
0    486
1    435
dtype: int64
community-fg None community-le None
0 community-fg 0 community-le 0.9487704918032787
0 community-fg 1 community-le 0.0022271714922048997


1 community-fg 0 community-le 0.03278688524590164
1 community-fg 1 community-le 0.004434589800443459


2 community-fg 0 community-le 0.007633587786259542
2 community-fg 1 community-le 0.9751131221719457


None
community-fg
0

In [59]:
def thresholdedGraph(file, border):

    G = nx.read_graphml(file)
    print(nx.info(G))
    
    lowedges = []
    for a,b in list(G.edges()):
        data = G.get_edge_data(a,b)
        f = data.get('frequency')
        if f <= border:
            lowedges.append((a,b))

    print("anzahl removed edges", len(lowedges))
    for e in lowedges:
        G.remove_edge(e[0],e[1])

    print(nx.info(G))

    Gcc = sorted(nx.connected_component_subgraphs(G), key = len, reverse=True)
    print("Number of components: {}".format(len(Gcc)))

    size_components = []
    for comp in Gcc:
        size_components.append(nx.number_of_nodes(comp))
    
    print(Counter(list(size_components)))

    G = Gcc[0]

    nx.write_graphml(G, 'ttt.graphml')
    
    return ig.Graph.Read_GraphML('ttt.graphml')

In [60]:
gtest = thresholdedGraph(f1, 1)

Name: 
Type: Graph
Number of nodes: 1193
Number of edges: 406023
Average degree: 680.6756
anzahl removed edges 87667
Name: 
Type: Graph
Number of nodes: 1193
Number of edges: 318356
Average degree: 533.7066
Number of components: 3
Counter({1: 2, 1191: 1})


In [62]:
gtest.summary()

'IGRAPH U--- 1191 318356 -- \n+ attr: id (v), frequency (e), totalduration (e)'