In [1]:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import json
import time
import datetime
import random
import bisect
import itertools

In [2]:
def read_json(filename):
    with open(filename) as f:
        js_graph = json.load(f) #, default={'sender': 'source'})
        _attrs = dict(source='sender', target='receiver', name='guid',
              key='guid', link='links')
    #return nx.readwrite.node_link_graph(js_graph, {'link': 'links', 'source': 'sender', 'target': 'receiver', 'key': 'guid'})
    return nx.readwrite.node_link_graph(js_graph, directed=True, multigraph=False, attrs={'link': 'links', 'source': 'sender', 'target': 'receiver', 'key': 'guid', 'name': 'guid'} )
 
def read_json_file(filename):
    graph = read_json(filename)
    return graph.subgraph(max(nx.weakly_connected_components(graph), key=len))  


In [3]:
def selectRoot(G, weight_attr):
    cum_weights = [0]*G.number_of_nodes()
    nodes = [0]*G.number_of_nodes()
    for i,v in enumerate(G.nodes):  
        cum_weights[i] = (v, G.degree(v, weight=weight_attr))
        nodes[i] = v 
    
    items = sorted(cum_weights,reverse=True, key=lambda x: x[1])
    root = items[:1] 

    print("root", root)
    root2 = cum_weights[np.argmax( np.array([x[1] for x in cum_weights]))]

    return root[0][0]

In [21]:
def FBF(G, weight_attr, root, threshold): 
    G_number_of_nodes = G.number_of_nodes()
    tree = nx.DiGraph() 
    tree.add_node(root)  
 
    tree_nodes = [root]
    tree_edges = []
    neighbours = list() 
    top = root
    while len(tree_nodes) != G_number_of_nodes:
        edge, neighbours, top = FBF_add_nodes(G, tree_nodes, weight_attr, neighbours, top)
        tree_nodes.append(top)
        tree_edges.append(edge)
        
    tree.add_nodes_from(tree_nodes)
    tree.add_weighted_edges_from(tree_edges)
    print("before dense_component_extraction")
    print(nx.info(tree)) 
    tree = dense_component_extraction(G, tree, threshold, weight_attr) 
    print("after dense_component_extraction")
    print(nx.info(tree))
    
    return tree


In [27]:
def FBF_add_nodes(G, tree_nodes, weight_attr, neighbours = None, lastAdded = None):  
    # pick a neightbour Vn+1 with a highest degree
    if neighbours == None:
        neighbours = []  

    neighbours.extend(G.degree(G.successors(lastAdded), weight=weight_attr)) 
    neighbours.extend(G.degree(G.predecessors(lastAdded), weight=weight_attr)) 
            
    # neighbours of nodes in tree
    neighbours = [x for x in neighbours if x[0] not in tree_nodes]  
    neighbours = list(set(neighbours))  
     
    # neighbours with the highest degree 
    top = max(neighbours, key=lambda x: x[1]) 
    
    # edges between Vn+1 and tree nodes  
    edges = [e for e in (G.edges(top[0])) if e[1] in tree_nodes] 
    if (len(edges) == 0):
        edges = [e for e in list(G.in_edges(top[0])) if e[1] in tree_nodes or e[0] in tree_nodes] 
   
    nodes = set(list(sum(edges, ()))) - set([top[0]]) 
    
    vn_weight = (G.degree(nodes, weight=weight_attr))  
    other_end = max(vn_weight, key=lambda x: x[1])
 
    # edge = [item for item in edges if other_end[0] in item][0]
    edge = [e for e in edges if e[0] == other_end[0] or e[1] == other_end[0]][0]
    edge = (edge[0], edge[1], G[edge[0]][edge[1]][weight_attr])

    return edge, neighbours, top[0] 


In [28]:
def dense_component_extraction(G, tree, threshold, weight_attr):
    if tree.number_of_edges() >= threshold:
        return tree 
    # for each edge not in tree
    skipped_edges = set(G.edges.data(weight_attr, default=0)) - set(tree.edges.data(weight_attr, default=0))  
     
    current_time = time.time()
    tree_ud = tree.to_undirected(as_view=True)
    print("to_undirected:", time.time()-current_time) 
      
    # compute shortest path, keep adding the ones with the longest path
    items = [] 
    #lengths = dict(nx.all_pairs_shortest_path_length(tree_ud))
    lengths = dict(nx.all_pairs_shortest_path_length(tree_ud)) 

    for edge in skipped_edges:
        #length = nx.shortest_path_length(tree_ud, source=edge[0], target=edge[1], weight = weight_attr)
        items.append((edge[0], edge[1], edge[2], lengths[edge[1]][edge[0]])) 
        #items.append((edge[0], edge[1], edge[2], length)) 
    
    items = sorted(items, reverse=True, key=lambda x: x[3])  
    #print("items", items)
    
    items = [(item[0], item[1], item[2]) for item in items]
    number_to_add = int(threshold - tree.number_of_edges()) 
    tree.add_weighted_edges_from(items[:number_to_add])
    return tree 


In [29]:
def run_focus_test(G, edge_cuts, weight_attr):
    total_weight = [] 
    in_degree = []
    out_degree = []
    average_clustering = []
    nn = []
    ne = []
    wcc = []

    root = selectRoot(G, weight_attr)
    
    print("root: ", root)
    for edge_cut in edge_cuts: 
        print("G.number_of_edges():", G.number_of_edges())
        threshold = G.number_of_edges() * edge_cut
        print("edge_cut:", edge_cut)
        print("threshold:", threshold)   
         
        G_reduced = FBF(G, weight_attr, root, threshold)
        
        print("weight: ", G_reduced.size()) 
        print("weight: ", G_reduced.size(weight="weight"))  
        total_weight.append(G_reduced.size(weight="weight"))  

        nn.append(G_reduced.number_of_nodes())
        ne.append(G_reduced.number_of_edges())
        wcc.append(nx.number_weakly_connected_components(G_reduced))

    return edge_cuts, total_weight, average_clustering, nn, ne, wcc



In [30]:
def run_test_for_file(file_name, weight_attr):
    graph = read_json_file(file_name) 
    print(nx.info(graph))
    print("")
    edge_percentages = [0.03, 0.5, 1] 
    edge_cuts_2, total_weight_2, average_clustering2, nn2, ne2, wcc2  = run_focus_test(graph.copy(), edge_percentages, weight_attr)

    print("edge_cuts_BC", edge_cuts_2)
    print("total_weight_BC", total_weight_2) 
    print("wcc_BC", wcc2)     
    print("average_clustering_BC", average_clustering2)
    print("nn_BC", nn2)
    print("ne_BC", ne2)



In [31]:
run_test_for_file("test_data/test_caveman_8_50.json", "lastTs") 

Name: 
Type: DiGraph
Number of nodes: 400
Number of edges: 9800
Average in degree:  24.5000
Average out degree:  24.5000

root [(113, 31566)]
root:  113
G.number_of_edges(): 9800
edge_cut: 0.03
threshold: 294.0
[(102, 113, 945)]
[(102, 113, 945), (46, 113, 758)]
[(102, 113, 945), (46, 113, 758), (14, 46, 981)]
[(102, 113, 945), (46, 113, 758), (14, 46, 981), (14, 28, 740)]
[(102, 113, 945), (46, 113, 758), (14, 46, 981), (14, 28, 740), (21, 28, 551)]
[(102, 113, 945), (46, 113, 758), (14, 46, 981), (14, 28, 740), (21, 28, 551), (1, 28, 792)]
[(102, 113, 945), (46, 113, 758), (14, 46, 981), (14, 28, 740), (21, 28, 551), (1, 28, 792), (113, 137, 838)]
[(102, 113, 945), (46, 113, 758), (14, 46, 981), (14, 28, 740), (21, 28, 551), (1, 28, 792), (113, 137, 838), (113, 227, 688)]
[(102, 113, 945), (46, 113, 758), (14, 46, 981), (14, 28, 740), (21, 28, 551), (1, 28, 792), (113, 137, 838), (113, 227, 688), (226, 227, 245)]
[(102, 113, 945), (46, 113, 758), (14, 46, 981), (14, 28, 740), (21, 28

[(102, 113, 945), (46, 113, 758), (14, 46, 981), (14, 28, 740), (21, 28, 551), (1, 28, 792), (113, 137, 838), (113, 227, 688), (226, 227, 245), (220, 226, 884), (68, 220, 330), (68, 306, 664), (306, 335, 304), (64, 68, 727), (320, 335, 550), (331, 335, 545), (60, 64, 820), (309, 335, 857), (61, 306, 783), (61, 176, 1000), (176, 258, 546), (258, 353, 582), (212, 226, 569), (258, 265, 248), (352, 353, 100), (224, 226, 418), (135, 137, 797), (353, 364, 540), (3, 28, 687), (71, 309, 835), (85, 102, 867), (350, 353, 401), (11, 28, 476), (6, 28, 950), (211, 226, 684), (64, 86, 983), (226, 235, 823), (265, 365, 460), (39, 226, 810), (65, 68, 283), (110, 113, 530), (258, 282, 543), (252, 258, 254), (123, 137, 790), (75, 85, 726), (54, 306, 613), (353, 374, 167), (35, 60, 717), (186, 320, 599), (66, 68, 892), (112, 113, 932), (353, 386, 427), (70, 71, 775), (362, 365, 243), (69, 113, 637), (312, 335, 397), (226, 247, 976), (100, 113, 541), (132, 137, 726), (306, 336, 851), (306, 337, 566), (167

[(102, 113, 945), (46, 113, 758), (14, 46, 981), (14, 28, 740), (21, 28, 551), (1, 28, 792), (113, 137, 838), (113, 227, 688), (226, 227, 245), (220, 226, 884), (68, 220, 330), (68, 306, 664), (306, 335, 304), (64, 68, 727), (320, 335, 550), (331, 335, 545), (60, 64, 820), (309, 335, 857), (61, 306, 783), (61, 176, 1000), (176, 258, 546), (258, 353, 582), (212, 226, 569), (258, 265, 248), (352, 353, 100), (224, 226, 418), (135, 137, 797), (353, 364, 540), (3, 28, 687), (71, 309, 835), (85, 102, 867), (350, 353, 401), (11, 28, 476), (6, 28, 950), (211, 226, 684), (64, 86, 983), (226, 235, 823), (265, 365, 460), (39, 226, 810), (65, 68, 283), (110, 113, 530), (258, 282, 543), (252, 258, 254), (123, 137, 790), (75, 85, 726), (54, 306, 613), (353, 374, 167), (35, 60, 717), (186, 320, 599), (66, 68, 892), (112, 113, 932), (353, 386, 427), (70, 71, 775), (362, 365, 243), (69, 113, 637), (312, 335, 397), (226, 247, 976), (100, 113, 541), (132, 137, 726), (306, 336, 851), (306, 337, 566), (167

[(102, 113, 945), (46, 113, 758), (14, 46, 981), (14, 28, 740), (21, 28, 551), (1, 28, 792), (113, 137, 838), (113, 227, 688), (226, 227, 245), (220, 226, 884), (68, 220, 330), (68, 306, 664), (306, 335, 304), (64, 68, 727), (320, 335, 550), (331, 335, 545), (60, 64, 820), (309, 335, 857), (61, 306, 783), (61, 176, 1000), (176, 258, 546), (258, 353, 582), (212, 226, 569), (258, 265, 248), (352, 353, 100), (224, 226, 418), (135, 137, 797), (353, 364, 540), (3, 28, 687), (71, 309, 835), (85, 102, 867), (350, 353, 401), (11, 28, 476), (6, 28, 950), (211, 226, 684), (64, 86, 983), (226, 235, 823), (265, 365, 460), (39, 226, 810), (65, 68, 283), (110, 113, 530), (258, 282, 543), (252, 258, 254), (123, 137, 790), (75, 85, 726), (54, 306, 613), (353, 374, 167), (35, 60, 717), (186, 320, 599), (66, 68, 892), (112, 113, 932), (353, 386, 427), (70, 71, 775), (362, 365, 243), (69, 113, 637), (312, 335, 397), (226, 247, 976), (100, 113, 541), (132, 137, 726), (306, 336, 851), (306, 337, 566), (167

[(102, 113, 945), (46, 113, 758), (14, 46, 981), (14, 28, 740), (21, 28, 551), (1, 28, 792), (113, 137, 838), (113, 227, 688), (226, 227, 245), (220, 226, 884), (68, 220, 330), (68, 306, 664), (306, 335, 304), (64, 68, 727), (320, 335, 550), (331, 335, 545), (60, 64, 820), (309, 335, 857), (61, 306, 783), (61, 176, 1000), (176, 258, 546), (258, 353, 582), (212, 226, 569), (258, 265, 248), (352, 353, 100), (224, 226, 418), (135, 137, 797), (353, 364, 540), (3, 28, 687), (71, 309, 835), (85, 102, 867), (350, 353, 401), (11, 28, 476), (6, 28, 950), (211, 226, 684), (64, 86, 983), (226, 235, 823), (265, 365, 460), (39, 226, 810), (65, 68, 283), (110, 113, 530), (258, 282, 543), (252, 258, 254), (123, 137, 790), (75, 85, 726), (54, 306, 613), (353, 374, 167), (35, 60, 717), (186, 320, 599), (66, 68, 892), (112, 113, 932), (353, 386, 427), (70, 71, 775), (362, 365, 243), (69, 113, 637), (312, 335, 397), (226, 247, 976)]
[(102, 113, 945), (46, 113, 758), (14, 46, 981), (14, 28, 740), (21, 28,

[(102, 113, 945), (46, 113, 758), (14, 46, 981), (14, 28, 740), (21, 28, 551), (1, 28, 792), (113, 137, 838), (113, 227, 688), (226, 227, 245), (220, 226, 884), (68, 220, 330), (68, 306, 664), (306, 335, 304), (64, 68, 727), (320, 335, 550), (331, 335, 545), (60, 64, 820), (309, 335, 857), (61, 306, 783), (61, 176, 1000), (176, 258, 546), (258, 353, 582), (212, 226, 569), (258, 265, 248), (352, 353, 100), (224, 226, 418), (135, 137, 797), (353, 364, 540), (3, 28, 687), (71, 309, 835), (85, 102, 867), (350, 353, 401), (11, 28, 476), (6, 28, 950), (211, 226, 684), (64, 86, 983), (226, 235, 823), (265, 365, 460), (39, 226, 810), (65, 68, 283), (110, 113, 530), (258, 282, 543), (252, 258, 254), (123, 137, 790), (75, 85, 726), (54, 306, 613), (353, 374, 167), (35, 60, 717), (186, 320, 599), (66, 68, 892), (112, 113, 932), (353, 386, 427), (70, 71, 775), (362, 365, 243), (69, 113, 637), (312, 335, 397), (226, 247, 976), (100, 113, 541), (132, 137, 726), (306, 336, 851), (306, 337, 566), (167

[(102, 113, 945), (46, 113, 758), (14, 46, 981), (14, 28, 740), (21, 28, 551), (1, 28, 792), (113, 137, 838), (113, 227, 688), (226, 227, 245), (220, 226, 884), (68, 220, 330), (68, 306, 664), (306, 335, 304), (64, 68, 727), (320, 335, 550), (331, 335, 545), (60, 64, 820), (309, 335, 857), (61, 306, 783), (61, 176, 1000), (176, 258, 546), (258, 353, 582), (212, 226, 569), (258, 265, 248), (352, 353, 100), (224, 226, 418), (135, 137, 797), (353, 364, 540), (3, 28, 687), (71, 309, 835), (85, 102, 867), (350, 353, 401), (11, 28, 476), (6, 28, 950), (211, 226, 684), (64, 86, 983), (226, 235, 823), (265, 365, 460), (39, 226, 810), (65, 68, 283), (110, 113, 530), (258, 282, 543), (252, 258, 254), (123, 137, 790), (75, 85, 726), (54, 306, 613), (353, 374, 167), (35, 60, 717), (186, 320, 599), (66, 68, 892), (112, 113, 932), (353, 386, 427), (70, 71, 775), (362, 365, 243), (69, 113, 637), (312, 335, 397), (226, 247, 976), (100, 113, 541), (132, 137, 726), (306, 336, 851), (306, 337, 566), (167

[(102, 113, 945), (46, 113, 758), (14, 46, 981), (14, 28, 740), (21, 28, 551), (1, 28, 792), (113, 137, 838), (113, 227, 688), (226, 227, 245), (220, 226, 884), (68, 220, 330), (68, 306, 664), (306, 335, 304), (64, 68, 727), (320, 335, 550), (331, 335, 545), (60, 64, 820), (309, 335, 857), (61, 306, 783), (61, 176, 1000), (176, 258, 546), (258, 353, 582), (212, 226, 569), (258, 265, 248), (352, 353, 100), (224, 226, 418), (135, 137, 797), (353, 364, 540), (3, 28, 687), (71, 309, 835), (85, 102, 867), (350, 353, 401), (11, 28, 476), (6, 28, 950), (211, 226, 684), (64, 86, 983), (226, 235, 823), (265, 365, 460), (39, 226, 810), (65, 68, 283), (110, 113, 530), (258, 282, 543), (252, 258, 254), (123, 137, 790), (75, 85, 726), (54, 306, 613), (353, 374, 167), (35, 60, 717), (186, 320, 599), (66, 68, 892), (112, 113, 932), (353, 386, 427), (70, 71, 775), (362, 365, 243), (69, 113, 637), (312, 335, 397), (226, 247, 976), (100, 113, 541), (132, 137, 726), (306, 336, 851), (306, 337, 566), (167

after dense_component_extraction
Name: 
Type: DiGraph
Number of nodes: 400
Number of edges: 4900
Average in degree:  12.2500
Average out degree:  12.2500
weight:  4900
weight:  2455918.0
G.number_of_edges(): 9800
edge_cut: 1
threshold: 9800
[(102, 113, 945)]
[(102, 113, 945), (46, 113, 758)]
[(102, 113, 945), (46, 113, 758), (14, 46, 981)]
[(102, 113, 945), (46, 113, 758), (14, 46, 981), (14, 28, 740)]
[(102, 113, 945), (46, 113, 758), (14, 46, 981), (14, 28, 740), (21, 28, 551)]
[(102, 113, 945), (46, 113, 758), (14, 46, 981), (14, 28, 740), (21, 28, 551), (1, 28, 792)]
[(102, 113, 945), (46, 113, 758), (14, 46, 981), (14, 28, 740), (21, 28, 551), (1, 28, 792), (113, 137, 838)]
[(102, 113, 945), (46, 113, 758), (14, 46, 981), (14, 28, 740), (21, 28, 551), (1, 28, 792), (113, 137, 838), (113, 227, 688)]
[(102, 113, 945), (46, 113, 758), (14, 46, 981), (14, 28, 740), (21, 28, 551), (1, 28, 792), (113, 137, 838), (113, 227, 688), (226, 227, 245)]
[(102, 113, 945), (46, 113, 758), (14, 46

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



after dense_component_extraction
Name: 
Type: DiGraph
Number of nodes: 400
Number of edges: 9798
Average in degree:  24.4950
Average out degree:  24.4950
weight:  9798
weight:  4911970.0
edge_cuts_BC [0.03, 0.5, 1]
total_weight_BC [227690.0, 2455918.0, 4911970.0]
wcc_BC [1, 1, 1]
average_clustering_BC []
nn_BC [400, 400, 400]
ne_BC [399, 4900, 9798]
