## Инициализация и загрузка данных

In [16]:
import networkx as nx
import community_metrics as cm
from community import generate_dendogram
from statistics import mean

GRAPH_FILE = 'data/graph.gpickle'
OUTPUT = 'lab6sclustering.json'

G = nx.read_gpickle(GRAPH_FILE)

RESULT = {}

def uniq(lst):
    return len(set(lst))

## Вычисление базовых характеристик

In [17]:
RESULT["max_core"] = max(nx.core_number(G).values())
RESULT["num_4-cores"] = len(list(nx.connected_component_subgraphs(nx.k_core(G,k=4))))

In [13]:
print(RESULT)

{'num_4-cores': 3, 'max_core': 34}


## Вычисление метрик

In [18]:
def modularity(kx):
  m0,m = 0.,G.number_of_edges()
  for gr in list(nx.connected_component_subgraphs(nx.k_core(G,k=kx))):
      n = 0
      for node in nx.nodes(gr):
          n += len(list(nx.all_neighbors(gr, node)))
      m0 += (nx.number_of_edges(gr)*1./m)-(n/(2*m))**2
  return m0

def clique_tr(G,node):
   nb = set(nx.all_neighbors(G, node))
   tr,nodes = set(),set()
   for n in nb:
       nodes.add(n)
       for n2 in nx.all_neighbors(G, n):
           if n2 in nb:
               tr.add(tuple(sorted([n,n2])))
               nodes.add(n2)
   return (len(tr),len(nodes),len(nb))

In [19]:
# Modularity
RESULT["modularity_max-cores"] = modularity(RESULT["max_core"])
RESULT["modularity_4-cores"] = modularity(RESULT["num_4-cores"])

In [16]:
def wcc(kx):
   wcc_all = 0.
   num_nodes_all = 0#len(nx.nodes(G))
   grs = list(nx.connected_component_subgraphs(nx.k_core(G,k=kx)))
   for gr in grs:
       nodes = nx.nodes(gr)
       num_nodes_gr = len(nodes)
       num_nodes_all += num_nodes_gr
       wcc0 = 0.
       for node in nodes:
           tV,vtV,nbV = clique_tr(G,node)
           tS,vtS,nbS = clique_tr(gr,node)
           if tV != 0:
               wcc0 += (tS*1./tV) * (vtV/(num_nodes_gr-1+(nbV-nbS)))
       wcc_all += wcc0
   return wcc_all/num_nodes_all

In [4]:
max_core_community = list(nx.connected_component_subgraphs(nx.k_core(G, k=RESULT['max_core'])))[0]

In [5]:
wcc_max_core = cm.wcc(max_core_community, G)
print(wcc_max_core)

0.2296878108830443


In [10]:
four_core_communities = list(nx.connected_component_subgraphs(nx.k_core(G, k=4)))

In [13]:
max(four_core_communities, key=lambda c: c.number_of_nodes())

25724

In [7]:
wcc_four_core = cm.wcc(four_core_community, G)
print(wcc_four_core)

0.0007715195805451026


In [20]:
# WCC
RESULT["wcc_max-cores"] = wcc_max_core
RESULT["wcc_4-cores"] = wcc_four_core
RESULT

{'max_core': 34,
 'modularity_4-cores': 0.08463815529658236,
 'modularity_max-cores': 0.015516874392815372,
 'num_4-cores': 3,
 'wcc_4-cores': 0.0007715195805451026,
 'wcc_max-cores': 0.2296878108830443}

## Louvain 

In [21]:
dendo = generate_dendogram(G)

In [22]:
RESULT["louvain_steps"] = [uniq(prt.values()) for prt in dendo]

## Сохранение данных

In [23]:
import json
RESULT

{'louvain_steps': [53614, 49239, 48983, 48978],
 'max_core': 34,
 'modularity_4-cores': 0.08463815529658236,
 'modularity_max-cores': 0.015516874392815372,
 'num_4-cores': 3,
 'wcc_4-cores': 0.0007715195805451026,
 'wcc_max-cores': 0.2296878108830443}

In [24]:
with open(OUTPUT, 'w') as out:
    out.write(json.dumps(RESULT))