In [1]:
# Please run `jupyter notebook` from the folder in which is this file (it uses getcwd()!)
%reload_ext autoreload
%autoreload 2

import networkx as nx
import numpy as np
from networkx.algorithms.components import strongly_connected_component_subgraphs
from networkx.algorithms.components import connected_component_subgraphs
import timeit
import os

workdir = os.getcwd()
utildir = os.path.join(workdir, os.path.pardir, "utils")

if(os.curdir != workdir):
    os.chdir(workdir)

import sys
sys.path.insert(0, utildir)

import utils
import pickle
import time
graph_names = ["wikivote", "epinions", "gplus", "pokec"]

In [None]:
np.random.seed(132435)
p_samples = [100, 200, 500, 1000]
n_repetitions = [3, 5, 7]

graph_files = []
graph_files.append(os.path.join(os.path.pardir, 'dataset', 'wiki_vote', graph_names[0] + '_cc.edgelist'))
graph_files.append(os.path.join(os.path.pardir, 'dataset', 'epinions', graph_names[1] + '_cc.edgelist'))
graph_files.append(os.path.join(os.path.pardir, 'dataset', 'gplus', graph_names[2] + '_cc.edgelist'))
graph_files.append(os.path.join(os.path.pardir, 'dataset', 'soc_pokec', graph_names[3] + '_cc.edgelist'))

stats_cc_g = []
for graph_file, graph_name in zip(graph_files, graph_names):
    gtype = "cc"
    G = nx.read_adjlist(graph_file, create_using=nx.Graph())
    print("graph " + graph_name + " loaded")
    start_time = time.time()
    
    matrix_cc = utils.reservoir_sampling_edges(G, n_repetitions, p_samples, False)
    
    stats_cc_p = []
    for i, p in enumerate(p_samples):
        stats_cc_aggr = []        
        for j, n in enumerate(n_repetitions):
            stats_cc_n = []
            for h in range(n):
                stats_cc, _ = utils.exact_computation_g(matrix_cc[i][j][h]
                                                        , graph_name + "_" + str(p) + "_" + str(n) + "_" + str(h))
                stats_cc_n.append(stats_cc)
            
            stats_cc_n = np.array(stats_cc_n)
            aggr_stats_cc = utils.aggregate_stats(stats_cc_n)
            stats_cc_aggr.append(aggr_stats_cc)
            
        stats_cc_p.append(stats_cc_aggr)
    stats_cc_g.append(stats_cc_p)
    print(stats_cc_p)
    
    end_time = time.time()
    print(end_time-start_time)

In [None]:
graph_files = []
graph_files.append(os.path.join(os.path.pardir, 'dataset', 'wiki_vote', graph_names[0] + '_scc.edgelist'))
graph_files.append(os.path.join(os.path.pardir, 'dataset', 'epinions', graph_names[1] + '_scc.edgelist'))
graph_files.append(os.path.join(os.path.pardir, 'dataset', 'gplus', graph_names[2] + '_scc.edgelist'))
graph_files.append(os.path.join(os.path.pardir, 'dataset', 'soc_pokec', graph_names[3] + '_scc.edgelist'))

p_samples = [100, 200, 500, 1000]
n_repetitions = [3, 5, 7]

stats_scc_g = []
for graph_file, graph_name in zip(graph_files, graph_names):
    gtype = "scc"
    dG = nx.read_adjlist(graph_file, create_using=nx.DiGraph())
    
    print("graph " + graph_name + "loaded")
    start_time = time.time()

    matrix_scc = utils.reservoir_sampling_edges(dG, n_repetitions, p_samples, True)
    
    stats_scc_p = []
    for i, p in enumerate(p_samples):
        stats_scc_aggr = []
        for j, n in enumerate(n_repetitions):
            stats_scc_n = []
            for h in range(n):
                stats_scc, _= utils.exact_computation_g(matrix_scc[i][j][h]
                                                        , graph_name + "_" + gtype + "_" + str(p) + "_" + str(n) + "_" + str(h))
                stats_scc_n.append(stats_scc)
            stats_scc_n = np.array(stats_scc_n)
            aggr_stats_scc = utils.aggregate_stats(stats_scc_n)
            stats_scc_aggr.append(aggr_stats_scc)
        stats_scc_p.append(stats_scc_aggr)
    
    stats_scc_g.append(stats_scc_p)
    print(stats_scc_p)
    
    end_time = time.time()
    print(end_time-start_time)
    

