In [2]:
import networkx as nx
import pandas as pd

In [3]:
raw_graphs_file_path_mask = "../Data/ResultData/graphs/raw_graphs/cluster_%s_social_graph.gexf"

graphs_with_measurements_file_path_mask = "../Data/ResultData/graphs/process_graphs/cluster_%s_social_graph_with_measure.gexf"

biggest_component_graph_path_mask = "../Data/ResultData/graphs/biggest_component_graphs/cluster_%s_biggest_component_graph.gexf"

In [12]:
cluster_list = [0,136,45, 53, 137, 123, 105, 129]

In [5]:
graphs = {}
for cluster_num in cluster_list:
    G = nx.read_gexf(raw_graphs_file_path_mask % (cluster_num))
    graphs[cluster_num] = G

In [51]:
# filter and node metrics
for cluster_num in cluster_list:
    G = graphs[cluster_num]
    #node metrics
    
    degree = nx.degree(G)
    nx.set_node_attributes(G, 'degree', degree)
    
    #filter node by degree >= 1
    f_G = G.subgraph([node for node, data in G.nodes_iter(data=True) if data['degree'] >= 1])
    
    bb = nx.betweenness_centrality(f_G)
    nx.set_node_attributes(f_G, 'betweenness_centrality', bb)
    
    degree_centrality = nx.degree_centrality(f_G)
    nx.set_node_attributes(f_G, 'degree_centrality', degree_centrality)
    
    closeness_centrality = nx.closeness_centrality(f_G)
    nx.set_node_attributes(f_G, 'closeness_centrality', closeness_centrality)
    
    nx.write_gexf(f_G, graphs_with_measurements_file_path_mask % (cluster_num))

In [6]:
filtered_graphs = {}
for cluster_num in cluster_list:
    G = nx.read_gexf(graphs_with_measurements_file_path_mask % (cluster_num))
    filtered_graphs[cluster_num] = G

In [54]:
biggest_component_grpahs = {}

#get huge component
for cluster_num in cluster_list:
    G = filtered_graphs[cluster_num]
    
    biggest_component_G = max(nx.connected_component_subgraphs(G), key=len)
    
    biggest_component_grpahs[cluster_num] = biggest_component_G
    
    nx.write_gexf(biggest_component_G, biggest_component_graph_path_mask % (cluster_num))

In [7]:
biggest_component_grpahs = {}

for cluster_num in cluster_list:
    G = nx.read_gexf(biggest_component_graph_path_mask % (cluster_num))
    biggest_component_grpahs[cluster_num] = G

In [18]:
#network metrics
network_metrics = []
for cluster_num in cluster_list:
    
    metrics = {}
    
    row_G = graphs[cluster_num] 
    GSK_G = biggest_component_grpahs[cluster_num]
    
    metrics['cluster_n'] = cluster_num
    metrics['node_count'] = row_G.number_of_nodes()
    metrics['edge_count'] = row_G.number_of_edges()
    metrics['GSK_node_count'] = GSK_G.number_of_nodes()
    metrics['GSK_edge_count'] = GSK_G.number_of_edges()
    #metrics['GSK_connectivity'] = nx.average_node_connectivity(GSK_G)
    metrics['GSK_density'] = nx.density(GSK_G)
    metrics['GSK_average_clustering_coefficient'] = nx.average_clustering(GSK_G)
    
    network_metrics.append(metrics) 
    
network_metrics_df = pd.DataFrame(network_metrics)
network_metrics_df.to_csv('../Data/ResultData/networks_metrics.csv')
    

In [17]:
network_metrics

[{'GSK_average_clustering_coefficient': 0.06105863546482094,
  'GSK_density': 0.0006382303309775617,
  'GSK_edge_count': 14516,
  'GSK_node_count': 6745,
  'cluster_num': 0,
  'edge_count': 15182,
  'node_count': 14776},
 {'GSK_average_clustering_coefficient': 0.08192199291448017,
  'GSK_density': 0.001857708778297968,
  'GSK_edge_count': 3647,
  'GSK_node_count': 1982,
  'cluster_num': 136,
  'edge_count': 4020,
  'node_count': 5617},
 {'GSK_average_clustering_coefficient': 0.06308837313115122,
  'GSK_density': 0.0006987192767255136,
  'GSK_edge_count': 11909,
  'GSK_node_count': 5839,
  'cluster_num': 45,
  'edge_count': 12540,
  'node_count': 13076},
 {'GSK_average_clustering_coefficient': 0.0,
  'GSK_density': 0.5,
  'GSK_edge_count': 3,
  'GSK_node_count': 4,
  'cluster_num': 53,
  'edge_count': 37,
  'node_count': 439},
 {'GSK_average_clustering_coefficient': 0.08076193887882198,
  'GSK_density': 0.01155655938264634,
  'GSK_edge_count': 307,
  'GSK_node_count': 231,
  'cluster_nu