In [1]:
# change the cwd
import os
os.chdir(os.getcwd()+"/HLP_Project/")

In [2]:
# use cython compiled modules for faster execution
from libs.cython.pyx.edgelistParser import edgelistParser
from libs.cython.pyx.utils import list2matrix, getInOutDegree
from libs.cython.cpp.pageRankCpp import cppLocalPageRank as localPageRank
from libs.cython.cpp.pageRankCpp import cppLocalPageRank_weight as localPageRank_weight

In [3]:
import numpy as np
from matplotlib import pyplot as plt
from scipy.optimize import curve_fit
import datetime

In [4]:
_ = """
Dataset Types
../datasets/facebook/*.edges -> raw_list
../datasets/facebook_combined.txt -> list
../datasets/p2p-Gnutella31.txt -> list
../datasets/Wiki-Vote.txt -> list
../datasets/CA-HepPh.txt -> raw_list
../datasets/com-dblp.ungraph.txt -> list
../datasets/Email-Enron.txt -> raw_list
../datasets/Email-EuAll.txt -> list
"""

In [5]:
_ = """
../datasets/facebook/0.edges -> N = 333
../datasets/facebook/107.edges -> N = 1034
../datasets/facebook/348.edges -> N = 224
../datasets/facebook/414.edges -> N = 150
../datasets/facebook/686.edges -> N = 168
../datasets/facebook/698.edges -> N = 61
../datasets/facebook/1684.edges -> N = 786
../datasets/facebook/1912.edges -> N = 747
../datasets/facebook/3437.edges -> N = 534
../datasets/facebook/3980.edges -> N = 52
../datasets/facebook_combined.txt -> N = 4039
../datasets/p2p-Gnutella31.txt -> N = 62586
../datasets/Wiki-Vote.txt -> N = 7115
../datasets/CA-HepPh.txt -> N = 12008
../datasets/com-dblp.ungraph.txt -> N = 317080
../datasets/Email-Enron.txt -> N = 36692
../datasets/Email-EuAll.txt -> N = 265214
"""

In [6]:
datasets = ["../datasets/facebook/3980.edges",
            "../datasets/facebook/698.edges",
            "../datasets/facebook/414.edges",
            "../datasets/facebook/686.edges",
            "../datasets/facebook/348.edges",
            "../datasets/facebook/0.edges",
            "../datasets/facebook/3437.edges",
            "../datasets/facebook/1912.edges",
            "../datasets/facebook/1684.edges",
            "../datasets/facebook/107.edges",
            "../datasets/facebook_combined.txt",
            "../datasets/Wiki-Vote.txt",
            "../datasets/CA-HepPh.txt",
            "../datasets/Email-Enron.txt",
            "../datasets/p2p-Gnutella31.txt",
            "../datasets/Email-EuAll.txt",
            "../datasets/com-dblp.ungraph.txt"]
list_types = ["raw_list"]*10 + \
             ["list",
              "list",
              "raw_list",
              "raw_list",
              "list",
              "list",
              "list"]

In [7]:
print("Num Datasets:", len(datasets))

Num Datasets: 17


In [8]:
Ns = [0]*len(datasets)
avgDegs = [0.]*len(datasets)
unweigted_times = [None]*len(datasets)
weigted_times = [None]*len(datasets)

In [9]:
idx = 3
dataset = datasets[idx]
list_type = list_types[idx]

In [10]:
# load a dataset, in both weighted and unweighted forms
A_weighted, N, original_idxs = edgelistParser(dataset, list_type, True)
A, _, _ = edgelistParser(dataset, list_type, False)

In [11]:
inDeg, _ = getInOutDegree(A_weighted)
Ns[idx] = N
avgDegs[idx] = np.array(inDeg).mean()
print("Nodes:", N, "\b, AvgDeg:", avgDegs[idx])

Nodes: 168 , AvgDeg: 39.42857142857143


In [13]:
unweigted_times[idx] = %timeit -o -q localPageRank(A, c=0.85, return_only_neighbours=True, epsilon=1e-5, max_iters=50)
weigted_times[idx] = %timeit -o -q localPageRank_weight(A_weighted, c=0.85, return_only_neighbours=True, epsilon=1e-5, max_iters=50)

print("Unweigted:", unweigted_times[idx])
print("Weigted:", weigted_times[idx])

Unweigted: 8.36 ms ± 5.88 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Weigted: 33.9 ms ± 106 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [14]:
print(unweigted_times)
print(weigted_times)

[None, None, None, <IPython.core.magics.execution.TimeitResult object at 0x7efbf8217ed0>, None, None, None, None, None, None, None, None, None, None, None, None, None]
[None, None, None, <IPython.core.magics.execution.TimeitResult object at 0x7efbd02ebc90>, None, None, None, None, None, None, None, None, None, None, None, None, None]


In [15]:
for idx in range(len(datasets)):
    dataset = datasets[idx]
    list_type = list_types[idx]
    
    # load a dataset, in both weighted and unweighted forms
    A_weighted, N, original_idxs = edgelistParser(dataset, list_type, True)
    A, _, _ = edgelistParser(dataset, list_type, False)
    
    inDeg, _ = getInOutDegree(A_weighted)
    Ns[idx] = N
    avgDegs[idx] = np.array(inDeg).mean()
    print("Nodes:", N, "\b, AvgDeg:", avgDegs[idx])
    
    unweigted_times[idx] = %timeit -o -q localPageRank(A, c=0.85, return_only_neighbours=True, epsilon=1e-5, max_iters=50)
    weigted_times[idx] = %timeit -o -q localPageRank_weight(A_weighted, c=0.85, return_only_neighbours=True, epsilon=1e-5, max_iters=50)

    print("Unweigted:", unweigted_times[idx])
    print("Weigted:", weigted_times[idx])

Nodes: 52, AvgDeg: 11.23076923076923
Unweigted: 1.23 ms ± 1.67 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
Weigted: 2.19 ms ± 1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Nodes: 61, AvgDeg: 17.704918032786885
Unweigted: 1.99 ms ± 1.84 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
Weigted: 3.98 ms ± 1.65 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Nodes: 150, AvgDeg: 45.14666666666667
Unweigted: 9.44 ms ± 7.11 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Weigted: 36.5 ms ± 31.2 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
Nodes: 168, AvgDeg: 39.42857142857143
Unweigted: 8.33 ms ± 1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Weigted: 33.7 ms ± 70.3 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
Nodes: 224, AvgDeg: 57.0
Unweigted: 13.9 ms ± 13.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Weigted: 74.9 ms ± 79.9 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
Nodes: 333,

In [17]:
np.savez("linux-tom.npz", unweigted_times, weigted_times)

In [5]:
test = np.load("../linux-tom.npz")

In [17]:
for file in test.files:
    print(file)

arr_0
arr_1
