In [1]:
import pandas as pd 
import numpy as np 
import networkx as nx 
import json 
import matplotlib.pyplot as plt 
import seaborn as sns
from datetime import datetime
import tqdm

In [2]:

import cdlib
from cdlib import algorithms as cd
from cdlib import readwrite, viz, evaluation



In [3]:
G = nx.read_graphml('../data/clean/byuser.graphml',node_type=int)
print(len(G),len(G.edges))

9107 111741


In [4]:
for node in list(G.nodes)[:3]: print(node,G.nodes[node])
for node in list(G.edges)[:3]: print(node,G.edges[node])

0 {'province': 'PI', 'covidMeasure': 0, 'latitude': 43.85180773789867, 'longitude': 10.506207}
15775 {'province': 'LU', 'covidMeasure': 0, 'latitude': 43.85135, 'longitude': 10.506207}
1979 {'province': 'FI', 'covidMeasure': 0, 'latitude': 43.76973, 'longitude': 11.26562}
(0, 15775, 0) {'start': 414, 'end': 414, 'user': 'RobertaC2372', 'days': 0, 'geodistance': 0.0004577378986709846}
(0, 15775, 1) {'start': 414, 'end': 414, 'user': 'ClaudioC138', 'days': 0, 'geodistance': 0.0004577378986709846}
(0, 15775, 2) {'start': 414, 'end': 414, 'user': 'juan pablo A 116887066', 'days': 0, 'geodistance': 0.0004577378986709846}


In [5]:
provinceTags = ['PI','PO','LI','FI','AR','PT','GR','MS','SI','LU']

In [6]:
communities = {p:list() for p in provinceTags}

for node, attr in G.nodes.items(): 
    p = attr['province']
    communities[p].append(node)

In [7]:
communityObj = {"communities" : [c for c in communities.values()],"overlap": False, "coverage": 1.0,"algorithm": "provinces","params": {"initial_membership": None, "weights": None}}


In [8]:
with open('../data/clean/provinceCommunity.json','w+') as jf:
    json.dump(communityObj,jf)

In [9]:
communities = readwrite.read_community_json('../data/clean/provinceCommunity.json')

## Fitness

In [17]:
all_evalution_functions = [evaluation.avg_distance, evaluation.avg_embeddedness, evaluation.average_internal_degree, evaluation.avg_transitivity, evaluation.conductance, evaluation.cut_ratio, evaluation.edges_inside, evaluation.expansion,  evaluation.fraction_over_median_degree,  evaluation.hub_dominance,  evaluation.internal_edge_density, evaluation.normalized_cut, evaluation.max_odf, evaluation.avg_odf, evaluation.flake_odf, evaluation.scaled_density, evaluation.significance, evaluation.size , evaluation.surprise , evaluation.triangle_participation_ratio, evaluation.purity]  

In [20]:
fitness = dict()

for evaluation_function in all_evalution_functions:
    f_name = str(evaluation_function).split(' ')[1].upper()
    print(f_name)
    try: 
        ev = evaluation_function(G,communities,summary = False)
        print(ev)
        fitness[f_name] = {p:ev[i] for i,p in enumerate(provinceTags)}

    except Exception as e: 
        print(e) 
    
    print('\n')

AVG_DISTANCE
Graph is not weakly connected.


AVG_EMBEDDEDNESS
[0.7008817105636693, 0.7430236148767924, 0.6343410423651683, 0.748056841634982, 0.5208650675279324, 0.6409079046004966, 0.5949691544719298, 0.4927136921690475, 0.6242103479683465, 0.3867069061354015]


AVERAGE_INTERNAL_DEGREE
[20.79191919191919, 26.45205479452055, 14.1326352530541, 21.280510018214937, 8.846242774566473, 23.59766081871345, 8.264014466546111, 6.064516129032258, 7.293388429752066, 4.32280701754386]


AVG_TRANSITIVITY
not implemented for multigraph type


CONDUCTANCE
[0.12799983054796552, 0.12000607625702567, 0.1906046976511744, 0.12197504885014279, 0.3251609489372961, 0.1809028905488795, 0.23807935978659553, 0.3055193394176445, 0.24134966688158177, 0.4169427354472314]


CUT_RATIO
[0.0004282335066676304, 0.00046289053459169246, 0.00041805021118441056, 0.0003691202588344943, 0.0005171593964650254, 0.0006315676086321799, 0.0003018796447290926, 0.00031094774927792004, 0.00026907664778897105, 0.0003503999172722102]

In [26]:
len(fitness)

15

In [22]:
pd.DataFrame(fitness)

Unnamed: 0,AVG_EMBEDDEDNESS,AVERAGE_INTERNAL_DEGREE,CONDUCTANCE,CUT_RATIO,EDGES_INSIDE,EXPANSION,FRACTION_OVER_MEDIAN_DEGREE,HUB_DOMINANCE,INTERNAL_EDGE_DENSITY,NORMALIZED_CUT,MAX_ODF,AVG_ODF,FLAKE_ODF,SCALED_DENSITY,SIZE
PI,0.700882,20.791919,0.128,0.000428,20584,3.05202,0.493434,0.258211,0.010506,0.160083,172,6.785354,0.191919,3.898605,1980
PO,0.743024,26.452055,0.120006,0.000463,17379,3.607306,0.499239,0.336634,0.020146,0.144507,172,7.581431,0.123288,7.475756,1314
LI,0.634341,14.132635,0.190605,0.000418,8098,3.328098,0.487784,0.253275,0.012343,0.208672,193,7.386562,0.247818,4.580133,1146
FI,0.748057,21.28051,0.121975,0.000369,11683,2.956284,0.468124,0.381951,0.019399,0.137937,111,6.321494,0.118397,7.198398,1098
AR,0.520865,8.846243,0.325161,0.000517,3826,4.262428,0.447399,0.24537,0.010239,0.341957,352,9.278613,0.42659,3.799317,865
PT,0.640908,23.597661,0.180903,0.000632,10088,5.211696,0.478363,0.442623,0.027632,0.202351,225,12.238596,0.233918,10.253485,855
GR,0.594969,8.264014,0.238079,0.000302,2285,2.582278,0.435805,0.199275,0.014971,0.24456,87,5.556962,0.327306,5.555363,553
MS,0.492714,6.064516,0.305519,0.000311,1598,2.667932,0.440228,0.148289,0.011529,0.311861,130,5.73814,0.466793,4.278296,527
SI,0.62421,7.293388,0.24135,0.000269,1765,2.320248,0.47314,0.165631,0.0151,0.246429,68,4.942149,0.278926,5.603284,484
LU,0.386707,4.322807,0.416943,0.00035,616,3.091228,0.494737,0.116197,0.015221,0.420891,44,6.512281,0.638596,5.648172,285


In [15]:
for i in ev: print(i)

None
None
96860.51052010029
None


## Modularities

In [27]:
all_modularities = [evaluation.erdos_renyi_modularity, evaluation.link_modularity , evaluation.modularity_density , evaluation.newman_girvan_modularity , evaluation.z_modularity]

In [35]:
modularities = dict()

for modularity in all_modularities:
    f_name = str(modularity).split(' ')[1].upper()
    print(f_name)
    try: 
        ev = modularity(G,communities, summary =False)
        print(ev)
        modularities[f_name] = ev.score
        print(ev.score)

    except Exception as e: 
        print(e) 
    
    print('\n')

ERDOS_RENYI_MODULARITY
FitnessResult(min=None, max=None, score=0.5703064235302439, std=None)
0.5703064235302439


LINK_MODULARITY
FitnessResult(min=None, max=None, score=0.03616247289364656, std=None)
0.03616247289364656


MODULARITY_DENSITY
FitnessResult(min=None, max=None, score=68.70416792044826, std=None)
68.70416792044826


NEWMAN_GIRVAN_MODULARITY
FitnessResult(min=None, max=None, score=0.10942757495034788, std=None)
0.10942757495034788


Z_MODULARITY
FitnessResult(min=None, max=None, score=1.4861854381310924, std=None)
1.4861854381310924




In [37]:
modularities

{'ERDOS_RENYI_MODULARITY': 0.5703064235302439,
 'LINK_MODULARITY': 0.03616247289364656,
 'MODULARITY_DENSITY': 68.70416792044826,
 'NEWMAN_GIRVAN_MODULARITY': 0.10942757495034788,
 'Z_MODULARITY': 1.4861854381310924}

In [39]:
for i in modularities.values(): print(i)

0.5703064235302439
0.03616247289364656
68.70416792044826
0.10942757495034788
1.4861854381310924
