In [1]:
# Please run `jupyter notebook` from the folder in which is this file (it uses getcwd()!)
%reload_ext autoreload
%autoreload 2

import networkx as nx
import numpy as np
from networkx.algorithms.components import strongly_connected_component_subgraphs
from networkx.algorithms.components import connected_component_subgraphs
import timeit
import os

workdir = os.getcwd()
utildir = os.path.join(workdir, os.path.pardir, "utils")

if(os.curdir != workdir):
    os.chdir(workdir)

import sys
sys.path.insert(0, utildir)

import utils
import pickle
import time
graph_names = ["wikivote", "epinions", "gplus", "pokec"]

In [2]:
np.random.seed(132435)
p_samples = [5, 10, 15, 20]
n_repetitions = [3, 5, 7]

graph_files = []
graph_files.append(os.path.join(os.path.pardir, 'dataset', 'wiki_vote', graph_names[0] + '_cc.edgelist'))
graph_files.append(os.path.join(os.path.pardir, 'dataset', 'epinions', graph_names[1] + '_cc.edgelist'))
graph_files.append(os.path.join(os.path.pardir, 'dataset', 'gplus', graph_names[2] + '_cc.edgelist'))
graph_files.append(os.path.join(os.path.pardir, 'dataset', 'soc_pokec', graph_names[3] + '_cc.edgelist'))

stats_cc_g = []
for graph_file, graph_name in zip(graph_files, graph_names):
    G = nx.read_adjlist(graph_file, create_using=nx.Graph())
    print("graph " + graph_name + "loaded")
    dim = len(G.nodes)
    matrix_cc = utils.reservoir_sampling_nodes(G, n_repetitions, p_samples, False)
    
    stats_cc_p = []
    for i, p in enumerate(p_samples):
        stats_cc_aggr = []
        for j, n in enumerate(n_repetitions):
            stats_cc_n = []
            for h in range(n):
                selected_nodes_cc = matrix_cc[i][j][h]
                cc_mat = utils.all_pairs_shortest_path_bfs(G, selected_nodes_cc)
                stats_cc = utils.get_stats_bfs(cc_mat)
                stats_cc_n.append(stats_cc)
                #print(graph_name + "_" + str(p) + "_" + str(n) + "_" + str(h))
            stats_cc_n = np.array(stats_cc_n)

            aggr_stats_cc = utils.aggregate_stats(stats_cc_n)

            stats_cc_aggr.append(aggr_stats_cc)
        stats_cc_p.append(stats_cc_aggr)
        print(stats_cc_aggr)
    

graph gplusloaded
[[5.0, 2.8433799204549679, 3.0, 3.0], [5.0, 2.8433799204549679, 3.0, 3.0], [5.0, 2.8433799204549679, 3.0, 3.0]]
[[5.0, 2.8553547781562032, 3.0, 3.0], [5.0, 2.8553547781562036, 3.0, 3.0], [5.0, 2.8553547781562041, 3.0, 3.0]]
[[5.0, 2.8680500958735649, 3.0, 3.0], [5.0, 2.8680500958735644, 3.0, 3.0], [5.0, 2.868050095873564, 3.0, 3.0]]


KeyboardInterrupt: 

In [None]:
graph_files = []
graph_files.append(os.path.join(os.path.pardir, 'dataset', 'wiki_vote', graph_names[0] + '_scc.edgelist'))
graph_files.append(os.path.join(os.path.pardir, 'dataset', 'epinions', graph_names[1] + '_scc.edgelist'))
graph_files.append(os.path.join(os.path.pardir, 'dataset', 'gplus', graph_names[2] + '_scc.edgelist'))
graph_files.append(os.path.join(os.path.pardir, 'dataset', 'soc_pokec', graph_names[3] + '_scc.edgelist'))

p_samples = [5, 10, 15, 20]
n_repetitions = [3, 5, 7]

stats_scc_g = []
for graph_file, graph_name in zip(graph_files, graph_names):
    dG = nx.read_adjlist(graph_file, create_using=nx.DiGraph())
    print("graph " + graph_name + "loaded")
    matrix_scc = utils.reservoir_sampling_nodes(dG, n_repetitions, p_samples, True)
    
    stats_scc_p = []
    for i, p in enumerate(p_samples):
        stats_scc_aggr = []
        for j, n in enumerate(n_repetitions):
            stats_scc_n = []
            for h in range(n):
                selected_nodes_scc =matrix_scc[i][j][h]
                scc_mat = utils.all_pairs_shortest_path_bfs(dG, selected_nodes_scc)
                stats_scc = utils.get_stats_bfs(scc_mat)
                stats_scc_n.append(stats_scc)
                #print(graph_name + "_" + str(p) + "_" + str(n) + "_" + str(h))
            
            stats_scc_n = np.array(stats_scc_n)
            aggr_stats_scc = utils.aggregate_stats(stats_scc_n)
            stats_scc_aggr.append(aggr_stats_scc)
        
        stats_scc_p.append(stats_scc_aggr)
        print(stats_scc_aggr)