In [11]:
 def read_json(filename):
    with open(filename) as f:
        js_graph = json.load(f) #, default={'sender': 'source'})
        _attrs = dict(source='sender', target='receiver', name='guid',
              key='guid', link='links')
    #return nx.readwrite.node_link_graph(js_graph, {'link': 'links', 'source': 'sender', 'target': 'receiver', 'key': 'guid'})
    return nx.readwrite.node_link_graph(js_graph, directed=True, multigraph=False, attrs={'link': 'links', 'source': 'sender', 'target': 'receiver', 'key': 'guid', 'name': 'guid'} )
 
# get the largest connected component
def read_json_file(filename):
    graph = read_json(filename)
    return graph.subgraph(max(nx.weakly_connected_components(graph), key=len))  


In [24]:
import networkx as nx 
import json
import time
import datetime
import numpy as np
from collections import OrderedDict
from math import log10
  
def remove_edges(G_reduced, items, edges_max_goal):
    current_time = time.time()
    removed_edges = []
    # sort edges by betweeness centrality lowest-to-highest
    sorted_bet_cent_edges = sorted(items,reverse=False, key=lambda x: x[1])
     
    print("sorting took:", time.time()-current_time)
    to_remove = G_reduced.number_of_edges() - edges_max_goal
    for bet_cent in sorted_bet_cent_edges:  
        if (len(removed_edges) >= to_remove):
            break
        if G_reduced.degree(bet_cent[0]) > 2 and G_reduced.degree(bet_cent[1]) > 2:
            G_reduced.remove_edge(bet_cent[0], bet_cent[1]) 
            removed_edges.append(bet_cent)

    time_spent = time.time()-current_time
    print("for loop took : ", time_spent)
    G_reduced.remove_edges_from(removed_edges)

    return G_reduced, removed_edges
 
def postprocess(G_reduced, items):
    number_wcc = nx.number_weakly_connected_components(G_reduced) 
    print(number_wcc)
    if number_wcc == 1:
        #print("****** already 1 component ")
        return G_reduced

    current_time = time.time() 
    _components = [c for c in nx.weakly_connected_components(G_reduced)]
    
    for edge in reversed(items): 
        if number_wcc == 1: 
            break
            
        for c in _components:
            if edge[0] in c and edge[1] in c :
                # edge is within one component
                break
            elif edge[0] in c or edge[1] in c : 
                # edge is connecting two components
                G_reduced.add_edge(*edge)
                _components = [c for c in nx.weakly_connected_components(G_reduced)]
                number_wcc = len(_components) 
                break
     
    time_spent = time.time()-current_time
    print("remove_edges took : ", time_spent)
    return G_reduced  

In [18]:
def edge_reduce(G, edges_max_goal, weight_attr):
    bet_cent_edges = nx.edge_betweenness_centrality(G, weight=weight_attr)
 
    G_reduced, removed_edges = remove_edges(G, bet_cent_edges, edges_max_goal) 
    G_reduced = postprocess(G_reduced, removed_edges)

def edge_reduce_test(G, edge_cuts, weight_attr):
    bet_cent_edges = nx.edge_betweenness_centrality(G, weight=weight_attr)
    
    total_weight = []

    for edge_cut in edge_cuts:
        edges_max_goal = G.number_of_edges() * edge_cut
        G_reduced, removed_edges = remove_edges(G.copy(), bet_cent_edges, edges_max_goal) 
        G_reduced = postprocess(G_reduced, removed_edges)
        
        total_weight.append(G_reduced.size(weight=weight_attr)) 

    return edge_cuts, total_weight

def edge_reduce_approximate(G, edges_max_goal, weight_attr): 
    c = 10
    take_count = int(c * log10(nx.number_of_nodes(G)))
    print("take_count",take_count)

    bet_cent_edges = nx.edge_betweenness_centrality(G, k=take_count, weight=weight_attr) 
 
    G_reduced, removed_edges = remove_edges(G, bet_cent_edges, edges_max_goal) 
    G_reduced = postprocess(G_reduced, removed_edges)
 

In [19]:
def edge_reduce_approximate_test(filename, G, edge_cuts, weight_attr='transferred'):
    c = 10
    take_count = int(c * log10(nx.number_of_nodes(G)))
    print("edge_betweenness_centrality")

    bet_cent_edges = nx.edge_betweenness_centrality(G, k=take_count, weight=weight_attr) 
 
    print("edge_betweenness_centrality done")
    total_weight = []
    in_degree = []
    out_degree = []
    running_time = []
    average_clustering = []
    nn = []
    ne = []
    wcc = []

    for edge_cut in edge_cuts:  
        current_time = time.time()
        edges_max_goal = G.number_of_edges() * edge_cut
        print("copying:")
        graph = nx.DiGraph(G)
        print("original number of edges:", graph.number_of_edges())
        
        G_reduced, removed_edges = remove_edges(graph, bet_cent_edges, edges_max_goal)
        G_reduced = postprocess(G_reduced, removed_edges) 
        time_spent = time.time()-current_time
        
        total_weight.append(G_reduced.size(weight=weight_attr))  
        running_time.append(time_spent)
        #average_clustering.append(nx.average_clustering(G_reduced.to_undirected(as_view=True)))

        nn.append(G_reduced.number_of_nodes())
        ne.append(G_reduced.number_of_edges())
        wcc.append(nx.number_weakly_connected_components(G_reduced))

        print("weight: ", G_reduced.size())
        print("weight: ", G_reduced.size(weight=weight_attr)) 

    return edge_cuts, total_weight, average_clustering, nn, ne, wcc




In [22]:
def run_test_for_file(file_name, weight_attr):
    graph = read_json_file(file_name) 
    print(nx.info(graph))
    print("")
    edge_percentages = [0.03, 0.5, 1] 
    edge_cuts_2, total_weight_2, average_clustering2, nn2, ne2, wcc2  = edge_reduce_approximate_test(file_name, graph.copy(), edge_percentages, weight_attr)

    print("edge_cuts_BC", edge_cuts_2)
    print("total_weight_BC", total_weight_2) 
    print("wcc_BC", wcc2)     
    print("average_clustering_BC", average_clustering2)
    print("nn_BC", nn2)
    print("ne_BC", ne2)


    

In [25]:
run_test_for_file("test_data/test_caveman_8_50.json", "lastTs") 

Name: 
Type: DiGraph
Number of nodes: 400
Number of edges: 9800
Average in degree:  24.5000
Average out degree:  24.5000

edge_betweenness_centrality
edge_betweenness_centrality done
copying:
original number of edges: 9800
sorting took: 0.001827239990234375
for loop took :  0.04661726951599121
1
weight:  503
weight:  260020.0
copying:
original number of edges: 9800
sorting took: 0.0018291473388671875
for loop took :  0.05661511421203613
1
weight:  503
weight:  260020.0
copying:
original number of edges: 9800
sorting took: 0.0018897056579589844
for loop took :  0.028668880462646484
1
weight:  4900
weight:  2454422.0
copying:
original number of edges: 9800
sorting took: 0.0027310848236083984
for loop took :  0.0034351348876953125
1
weight:  9800
weight:  4913264.0
edge_cuts_BC [0.01, 0.03, 0.5, 1]
total_weight_BC [260020.0, 260020.0, 2454422.0, 4913264.0]
wcc_BC [1, 1, 1, 1]
average_clustering_BC []
nn_BC [400, 400, 400, 400]
ne_BC [503, 503, 4900, 9800]
