In [1]:
import pandas as pd 
import numpy as np 
import networkx as nx 
import json 
import matplotlib.pyplot as plt 
import seaborn as sns
from datetime import datetime
import tqdm

In [13]:

import cdlib
from cdlib import algorithms as cd
from cdlib import readwrite, viz, evaluation

In [16]:
G = nx.read_graphml('../data/clean/byuser.graphml',node_type=int)
print(len(G),len(G.edges))

9107 111741


In [17]:
for node in list(G.nodes)[:3]: print(node,G.nodes[node])
for node in list(G.edges)[:3]: print(node,G.edges[node])

0 {'province': 'PI', 'covidMeasure': 0, 'latitude': 43.85180773789867, 'longitude': 10.506207}
15775 {'province': 'LU', 'covidMeasure': 0, 'latitude': 43.85135, 'longitude': 10.506207}
1979 {'province': 'FI', 'covidMeasure': 0, 'latitude': 43.76973, 'longitude': 11.26562}
(0, 15775, 0) {'start': 414, 'end': 414, 'user': 'RobertaC2372', 'days': 0, 'geodistance': 0.0004577378986709846}
(0, 15775, 1) {'start': 414, 'end': 414, 'user': 'ClaudioC138', 'days': 0, 'geodistance': 0.0004577378986709846}
(0, 15775, 2) {'start': 414, 'end': 414, 'user': 'juan pablo A 116887066', 'days': 0, 'geodistance': 0.0004577378986709846}


In [18]:
provinceTags = ['PI','PO','LI','FI','AR','PT','GR','MS','SI','LU']

In [19]:
communities = {p:list() for p in provinceTags}

for node, attr in G.nodes.items(): 
    p = attr['province']
    communities[p].append(node)

In [24]:
communityObj = {"communities" : [c for c in communities.values()],"overlap": False, "coverage": 1.0,"algorithm": "provinces","params": {"initial_membership": None, "weights": None}}


In [26]:
with open('../data/clean/provinceCommunity.json','w+') as jf:
    json.dump(communityObj,jf)

In [28]:
communities = readwrite.read_community_json('../data/clean/provinceCommunity.json')

## Fitness

In [32]:
all_evalution_functions = [evaluation.avg_distance, evaluation.avg_embeddedness, evaluation.average_internal_degree, evaluation.avg_transitivity, evaluation.conductance, evaluation.cut_ratio, evaluation.edges_inside, evaluation.expansion,  evaluation.fraction_over_median_degree,  evaluation.hub_dominance,  evaluation.internal_edge_density, evaluation.normalized_cut, evaluation.max_odf, evaluation.avg_odf, evaluation.flake_odf, evaluation.scaled_density, evaluation.significance, evaluation.size , evaluation.surprise , evaluation.triangle_participation_ratio, evaluation.purity]  

In [36]:
fitness = dict()

for evaluation_function in all_evalution_functions:
    f_name = str(evaluation_function).split(' ')[1].upper()
    print(f_name)
    try: 
        ev = evaluation_function(G,communities)
        print(ev)
        fitness[f_name] = ev.score
        print(ev.score)

    except Exception as e: 
        print(e) 
    
    print('\n')

AVG_DISTANCE
Graph is not weakly connected.


AVG_EMBEDDEDNESS
FitnessResult(min=0.3867069061354015, max=0.748056841634982, score=0.6086676282313765, std=0.1088729865406125)
0.6086676282313765


AVERAGE_INTERNAL_DEGREE
FitnessResult(min=4.32280701754386, max=26.45205479452055, score=14.1045748893863, std=7.792780876465862)
14.1045748893863


AVG_TRANSITIVITY
not implemented for multigraph type


CONDUCTANCE
FitnessResult(min=0.12000607625702567, max=0.4169427354472314, score=0.2268540594325537, std=0.09377424414023365)
0.2268540594325537


CUT_RATIO
FitnessResult(min=0.00026907664778897105, max=0.0006315676086321799, score=0.0004059325475443626, std=0.00010497078115369946)
0.0004059325475443626


EDGES_INSIDE
FitnessResult(min=616, max=20584, score=7792.2, std=6695.464969664168)
7792.2


EXPANSION
FitnessResult(min=2.3202479338842976, max=5.211695906432748, score=3.307951784831225, std=0.8229952271174137)
3.307951784831225


FRACTION_OVER_MEDIAN_DEGREE
FitnessResult(min=0.4358047016274

## Modularities

In [37]:
all_modularities = [evaluation.erdos_renyi_modularity, evaluation.link_modularity , evaluation.modularity_density , evaluation.newman_girvan_modularity , evaluation.z_modularity]

In [38]:
modularities = dict()

for modularity in all_modularities:
    f_name = str(modularity).split(' ')[1].upper()
    print(f_name)
    try: 
        ev = modularity(G,communities)
        print(ev)
        modularities[f_name] = ev.score
        print(ev.score)

    except Exception as e: 
        print(e) 
    
    print('\n')

ERDOS_RENYI_MODULARITY
FitnessResult(min=None, max=None, score=0.5703064235302439, std=None)
0.5703064235302439


LINK_MODULARITY
FitnessResult(min=None, max=None, score=0.03616247289364656, std=None)
0.03616247289364656


MODULARITY_DENSITY
FitnessResult(min=None, max=None, score=68.70416792044826, std=None)
68.70416792044826


NEWMAN_GIRVAN_MODULARITY
FitnessResult(min=None, max=None, score=0.10942757495034788, std=None)
0.10942757495034788


Z_MODULARITY
FitnessResult(min=None, max=None, score=1.4861854381310924, std=None)
1.4861854381310924


