# Unweighted, Non-Laplacian

## GEE pure, Numba Serial, Numba Parallel

Edit `@jit` part of the code, then rerun these benchmarks to get the timing results

In [1]:
import os
import numpy as np
from src import DataPreprocess
from numba import set_num_threads, get_num_threads
import timeit
import multiprocessing

print(multiprocessing.cpu_count())

print(get_num_threads())

48

In [3]:
def setup_gee(graph_name):
    # Run this every time to not cache results
    # print("Loading", graph_name)

    G_edgelist = np.load(os.path.join(erdos_10_degree_graphs_npy_path , graph_name))
    # G_edgelist = np.loadtxt("../../../Thesis-Graph-Data/twitch-SNAP-bidir-manually", delimiter=" ", dtype=np.int32)
    
    G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column
    
    # Add column of ones - weights
    G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))#.astype(np.int32)
    # Make sure G_edgelist isn't restricted to int-s
    
    n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

    return G_edgelist, n


def run_gee(erdos_labels_path , graph_name, G_edgelist, n, Y_path=None):
    if Y_path is None:
        Y = np.load(os.path.join(erdos_labels_path , graph_name)) # For Ligra fairness - ligra cannot preload this
    
    _ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

## Erdos-Renyi $2^{13} \to 2^{31}$

All graphs have average degree of 10

In [2]:
base_dir = "/home/ubuntu/prog/erdos-new"

erdos_10_degree_graphs_npy_path = os.path.join(base_dir, "NPY")
erdos_labels_path = os.path.join(base_dir, "Ys")

#### Timing these results manually won't match bcs. we're running `setup_gee()` for caching purposes, which should not be counted into the time

In [None]:
graph_files = [f for f in os.listdir(erdos_10_degree_graphs_npy_path) if f.endswith('.npy')]

# Sort files by number of nodes (optional, for structured progression)
graph_files.sort(key=lambda x: int(x.split('-')[0].split('_')[0]))

# Loop over each graph file
for graph_name in graph_files:
    with open("runtime_results.txt", "a") as result_file:
        result_file.write(f"\n\n{graph_name}\n\n")

    print(f"\n\nRunning experiments for {graph_name}\n")

    for i in range(7):
        # Setup GEE (outside of timing)
        G_edgelist, n = setup_gee(graph_name)

        # Time the run_gee function
        runtime = timeit.timeit(lambda: run_gee(erdos_labels_path, graph_name, G_edgelist, n), number=1)
        result_string = f"Experiment {i+1} for {graph_name}: {runtime} seconds"

        # Print and write the result to runtime_results.txt
        print(result_string)
        with open("runtime_results.txt", "a") as result_file:
            result_file.write(str(runtime)+'\n')



Running experiments for 214748365-nodes_1073796575-edges.npy

Experiment 1 for 214748365-nodes_1073796575-edges.npy: 2198.0943059069996 seconds


# Named Graphs
## Adjacency

In [34]:
def setup_gee(graph_path):
    # Run this every time to not cache results
    print("Loading", graph_name)

    G_edgelist = np.load(graph_path)
    # G_edgelist = np.loadtxt("../../../Thesis-Graph-Data/twitch-SNAP-bidir-manually", delimiter=" ", dtype=np.int32)
    
    G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column
    
    # Add column of ones - weights
    G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))#.astype(np.int32)
    # Make sure G_edgelist isn't restricted to int-s
    
    n = int(np.max(G_edgelist) + 1) # Nr. vertices

    return G_edgelist, n


def run_gee(graph_path, G_edgelist, n, Y_path):
    Y = np.load(Y_path) # For Ligra fairness - ligra cannot preload this
    
    _ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)


def run_numba(graph_path, G_edgelist, n, Y_path):
    Y = np.load(Y_path) # For Ligra fairness - ligra cannot preload this
    
    _ = DataPreprocess.numba_graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

### Twitch

In [27]:
graph_path = "../../../Thesis-Graph-Data/NPY-graphs/twitch.npy"
Y_path = "../../../Thesis-Graph-Data/Ys/twitch-Y50.npy"

In [35]:
for i in range(7):
    # Setup GEE (outside of timing)
    G_edgelist, n = setup_gee(graph_name)

    # Time the run_gee function
    runtime = timeit.timeit(lambda: run_gee(graph_path, G_edgelist, n, Y_path), number=1)
    result_string = f"Experiment {i+1} for {graph_name}: {runtime} seconds"

    # Print and write the result to runtime_results.txt
    print(result_string)
    # with open("runtime_results.txt", "a") as result_file:
        # result_file.write(str(runtime)+'\n')

Loading ../../../Thesis-Graph-Data/NPY-graphs/twitch.npy
Running GEE without numba


IndexError: index 168113 is out of bounds for axis 0 with size 168113

In [4]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/twitch-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

31.2 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [30]:
print("Loading Twitch graph")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/twitch.npy")
# G_edgelist = np.loadtxt("../../../Thesis-Graph-Data/twitch-SNAP-bidir-manually", delimiter=" ", dtype=np.int32)

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))#.astype(np.int32)
# Make sure G_edgelist isn't restricted to int-s

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Twitch graph


In [33]:
n

168114

In [32]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/twitch-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

Running GEE without numba


IndexError: index 168113 is out of bounds for axis 0 with size 168113

In [21]:
print("Loading Twitch graph")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/twitch.npy")
# G_edgelist = np.loadtxt("../../../Thesis-Graph-Data/twitch-SNAP-bidir-manually", delimiter=" ", dtype=np.int32)

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))#.astype(np.int32)
# Make sure G_edgelist isn't restricted to int-s

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Twitch graph


In [22]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/twitch-Y20.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

125 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [23]:
print("Loading Twitch graph")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/twitch.npy")
# G_edgelist = np.loadtxt("../../../Thesis-Graph-Data/twitch-SNAP-bidir-manually", delimiter=" ", dtype=np.int32)

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))#.astype(np.int32)
# Make sure G_edgelist isn't restricted to int-s

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Twitch graph


In [24]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/twitch-Y20.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

102 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [25]:
print("Loading Twitch graph")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/twitch.npy")
# G_edgelist = np.loadtxt("../../../Thesis-Graph-Data/twitch-SNAP-bidir-manually", delimiter=" ", dtype=np.int32)

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))#.astype(np.int32)
# Make sure G_edgelist isn't restricted to int-s

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Twitch graph


In [26]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/twitch-Y20.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

102 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [27]:
print("Loading Twitch graph")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/twitch.npy")
# G_edgelist = np.loadtxt("../../../Thesis-Graph-Data/twitch-SNAP-bidir-manually", delimiter=" ", dtype=np.int32)

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))#.astype(np.int32)
# Make sure G_edgelist isn't restricted to int-s

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Twitch graph


In [28]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/twitch-Y20.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

103 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [29]:
print("Loading Twitch graph")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/twitch.npy")
# G_edgelist = np.loadtxt("../../../Thesis-Graph-Data/twitch-SNAP-bidir-manually", delimiter=" ", dtype=np.int32)

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))#.astype(np.int32)
# Make sure G_edgelist isn't restricted to int-s

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Twitch graph


In [30]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/twitch-Y20.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

104 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


### Pokec

In [7]:
print("Loading Pokec graph - 400MB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/pokec.npy")

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Pokec graph - 400MB


In [8]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/pokec-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

2min 22s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [33]:
print("Loading Pokec graph - 400MB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/pokec.npy")

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Pokec graph - 400MB


In [34]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/pokec-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

758 ms ± 443 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [35]:
print("Loading Pokec graph - 400MB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/pokec.npy")

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Pokec graph - 400MB


In [36]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/pokec-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

757 ms ± 270 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [37]:
print("Loading Pokec graph - 400MB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/pokec.npy")

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Pokec graph - 400MB


In [38]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/pokec-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

758 ms ± 580 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [39]:
print("Loading Pokec graph - 400MB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/pokec.npy")

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Pokec graph - 400MB


In [40]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/pokec-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

757 ms ± 358 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [41]:
print("Loading Pokec graph - 400MB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/pokec.npy")

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Pokec graph - 400MB


In [42]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/pokec-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

758 ms ± 381 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [43]:
print("Loading Pokec graph - 400MB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/pokec.npy")

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Pokec graph - 400MB


In [44]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/pokec-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

758 ms ± 847 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


### LiveJournal

In [9]:
print("Loading LiveJournal graph - 1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/liveJournal.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading LiveJournal graph - 1GB


In [10]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/liveJournal-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

5min 22s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [47]:
print("Loading LiveJournal graph - 1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/liveJournal.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading LiveJournal graph - 1GB


In [48]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/liveJournal-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

1.68 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [49]:
print("Loading LiveJournal graph - 1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/liveJournal.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading LiveJournal graph - 1GB


In [50]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/liveJournal-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

1.68 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [51]:
print("Loading LiveJournal graph - 1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/liveJournal.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading LiveJournal graph - 1GB


In [52]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/liveJournal-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

1.7 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [53]:
print("Loading LiveJournal graph - 1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/liveJournal.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading LiveJournal graph - 1GB


In [54]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/liveJournal-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

1.68 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [55]:
print("Loading LiveJournal graph - 1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/liveJournal.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading LiveJournal graph - 1GB


In [56]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/liveJournal-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

1.68 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [57]:
print("Loading LiveJournal graph - 1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/liveJournal.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading LiveJournal graph - 1GB


In [58]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/liveJournal-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

1.68 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


### Orkut

In [59]:
print("Loading Orkut graph - 1.8GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut graph - 1.8GB


In [60]:
%%timeit -r 1 -n 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

2.55 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [61]:
print("Loading Orkut graph - 1.8GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut graph - 1.8GB


In [62]:
%%timeit -r 1 -n 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

2.55 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [63]:
print("Loading Orkut graph - 1.8GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut graph - 1.8GB


In [64]:
%%timeit -r 1 -n 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

2.55 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [65]:
print("Loading Orkut graph - 1.8GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut graph - 1.8GB


In [66]:
%%timeit -r 1 -n 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

2.55 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [67]:
print("Loading Orkut graph - 1.8GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut graph - 1.8GB


In [68]:
%%timeit -r 1 -n 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

2.55 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [69]:
print("Loading Orkut graph - 1.8GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut graph - 1.8GB


In [70]:
%%timeit -r 1 -n 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

2.55 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [71]:
print("Loading Orkut graph - 1.8GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut graph - 1.8GB


In [72]:
%%timeit -r 1 -n 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

2.55 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


### Orkut-Groups 5.1GB

In [16]:
print("Loading Orkut Groups graph - 5.1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut-groupsUnw.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut Groups graph - 5.1GB


In [17]:
%%timeit -r 1 -n 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-groups-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.numba_graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

Running numba with parallel = False
11.2 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [4]:
print("Loading Orkut Groups graph - 5.1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut-groupsUnw.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut Groups graph - 5.1GB


In [5]:
%%timeit -r 1 -n 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-groups-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.numba_graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

Running numba with parallel = False
11.2 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [6]:
print("Loading Orkut Groups graph - 5.1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut-groupsUnw.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut Groups graph - 5.1GB


In [7]:
%%timeit -r 1 -n 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-groups-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.numba_graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

Running numba with parallel = False
11.2 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [8]:
print("Loading Orkut Groups graph - 5.1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut-groupsUnw.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut Groups graph - 5.1GB


In [9]:
%%timeit -r 1 -n 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-groups-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.numba_graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

Running numba with parallel = False
11.2 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [10]:
print("Loading Orkut Groups graph - 5.1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut-groupsUnw.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut Groups graph - 5.1GB


In [11]:
%%timeit -r 1 -n 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-groups-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.numba_graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

Running numba with parallel = False
11.2 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [12]:
print("Loading Orkut Groups graph - 5.1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut-groupsUnw.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut Groups graph - 5.1GB


In [13]:
%%timeit -r 1 -n 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-groups-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.numba_graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

Running numba with parallel = False
11.2 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [14]:
print("Loading Orkut Groups graph - 5.1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut-groupsUnw.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut Groups graph - 5.1GB


In [15]:
%%timeit -r 1 -n 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-groups-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.numba_graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

Running numba with parallel = False
11.2 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


### Friendster 30GB

In [2]:
os.environ['NUMBA'] = "Serial"

In [None]:
os.environ['NUMBA'] = "Parallel"

In [39]:
set_num_threads(24)

In [40]:
get_num_threads()

24

In [41]:
print("Loading Friendster graph - 31GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/friendster-0indexed-colswapped.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Friendster graph - 31GB


In [42]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/friendster-Y50.npy") # For Ligra fairness - ligra cannot preload this

print("Done loading Y")

if os.environ.get('NUMBA') == 'None':
    _ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False) # Stock GEE
else:
    _ = DataPreprocess.numba_graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

Done loading Y
Running numba with parallel = True
1min 19s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [43]:
print("Loading Friendster graph - 31GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/friendster-0indexed-colswapped.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Friendster graph - 31GB


In [44]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/friendster-Y50.npy") # For Ligra fairness - ligra cannot preload this

print("Done loading Y")

if os.environ.get('NUMBA') == 'None':
    _ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False) # Stock GEE
else:
    _ = DataPreprocess.numba_graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

Done loading Y
Running numba with parallel = True
1min 19s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [45]:
print("Loading Friendster graph - 31GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/friendster-0indexed-colswapped.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Friendster graph - 31GB


In [46]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/friendster-Y50.npy") # For Ligra fairness - ligra cannot preload this

print("Done loading Y")

if os.environ.get('NUMBA') == 'None':
    _ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False) # Stock GEE
else:
    _ = DataPreprocess.numba_graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

Done loading Y
Running numba with parallel = True
1min 19s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [95]:
print("Loading Friendster graph - 31GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/friendster-0indexed-colswapped.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Friendster graph - 31GB


In [96]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/friendster-Y50.npy") # For Ligra fairness - ligra cannot preload this

if os.environ.get('NUMBA') == 'None':
    _ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False) # Stock GEE
else:
    _ = DataPreprocess.numba_graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

56.3 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [97]:
print("Loading Friendster graph - 31GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/friendster-0indexed-colswapped.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Friendster graph - 31GB


In [None]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/friendster-Y50.npy") # For Ligra fairness - ligra cannot preload this

if os.environ.get('NUMBA') == 'None':
    _ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False) # Stock GEE
else:
    _ = DataPreprocess.numba_graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

In [None]:
print("Loading Friendster graph - 31GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/friendster-0indexed-colswapped.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

In [None]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/friendster-Y50.npy") # For Ligra fairness - ligra cannot preload this

if os.environ.get('NUMBA') == 'None':
    _ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False) # Stock GEE
else:
    _ = DataPreprocess.numba_graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

In [None]:
print("Loading Friendster graph - 31GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/friendster-0indexed-colswapped.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

In [None]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/friendster-Y50.npy") # For Ligra fairness - ligra cannot preload this

if os.environ.get('NUMBA') == 'None':
    _ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False) # Stock GEE
else:
    _ = DataPreprocess.numba_graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

# Laplacian (also covers weighed)

<font color="red">G_edgelist is edited, so repeated runs with timeit will result in divide-by-0 error</font>

## GEE Pure & Numba Serial

Make sure to checkout Main, not numba-parallel (makes Numba serial)

Make corresponding changes to code (`@jit`) and then rerun all below to get Laplacian results

### Twitch

In [1]:
import numpy as np
import DataPreprocess

In [2]:
print("Loading Twitch graph")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/twitch.npy")
# G_edgelist = np.loadtxt("../../../Thesis-Graph-Data/twitch-SNAP-bidir-manually", delimiter=" ", dtype=np.int32)

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)
# Make sure G_edgelist isn't restricted to int-s

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Twitch graph


In [3]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/twitch-Y20.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

3.63 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [4]:
print("Loading Twitch graph")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/twitch.npy")
# G_edgelist = np.loadtxt("../../../Thesis-Graph-Data/twitch-SNAP-bidir-manually", delimiter=" ", dtype=np.int32)

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)
# Make sure G_edgelist isn't restricted to int-s

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Twitch graph


In [5]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/twitch-Y20.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

717 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [6]:
print("Loading Twitch graph")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/twitch.npy")
# G_edgelist = np.loadtxt("../../../Thesis-Graph-Data/twitch-SNAP-bidir-manually", delimiter=" ", dtype=np.int32)

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)
# Make sure G_edgelist isn't restricted to int-s

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Twitch graph


In [7]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/twitch-Y20.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

691 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [8]:
print("Loading Twitch graph")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/twitch.npy")
# G_edgelist = np.loadtxt("../../../Thesis-Graph-Data/twitch-SNAP-bidir-manually", delimiter=" ", dtype=np.int32)

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)
# Make sure G_edgelist isn't restricted to int-s

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Twitch graph


In [9]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/twitch-Y20.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

679 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [10]:
print("Loading Twitch graph")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/twitch.npy")
# G_edgelist = np.loadtxt("../../../Thesis-Graph-Data/twitch-SNAP-bidir-manually", delimiter=" ", dtype=np.int32)

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)
# Make sure G_edgelist isn't restricted to int-s

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Twitch graph


In [11]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/twitch-Y20.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

683 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [12]:
print("Loading Twitch graph")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/twitch.npy")
# G_edgelist = np.loadtxt("../../../Thesis-Graph-Data/twitch-SNAP-bidir-manually", delimiter=" ", dtype=np.int32)

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)
# Make sure G_edgelist isn't restricted to int-s

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Twitch graph


In [13]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/twitch-Y20.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

676 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [14]:
print("Loading Twitch graph")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/twitch.npy")
# G_edgelist = np.loadtxt("../../../Thesis-Graph-Data/twitch-SNAP-bidir-manually", delimiter=" ", dtype=np.int32)

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)
# Make sure G_edgelist isn't restricted to int-s

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Twitch graph


In [15]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/twitch-Y20.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

675 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


### Pokec

In [16]:
print("Loading Pokec graph - 400MB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/pokec.npy")

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Pokec graph - 400MB


In [17]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/pokec-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

4.07 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [18]:
print("Loading Pokec graph - 400MB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/pokec.npy")

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Pokec graph - 400MB


In [19]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/pokec-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

4.09 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [20]:
print("Loading Pokec graph - 400MB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/pokec.npy")

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Pokec graph - 400MB


In [21]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/pokec-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

3.95 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [22]:
print("Loading Pokec graph - 400MB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/pokec.npy")

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Pokec graph - 400MB


In [23]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/pokec-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

3.48 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [24]:
print("Loading Pokec graph - 400MB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/pokec.npy")

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Pokec graph - 400MB


In [25]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/pokec-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

3.49 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [26]:
print("Loading Pokec graph - 400MB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/pokec.npy")

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Pokec graph - 400MB


In [27]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/pokec-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

3.71 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [28]:
print("Loading Pokec graph - 400MB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/pokec.npy")

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Pokec graph - 400MB


In [29]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/pokec-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

3.67 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


### LiveJournal

In [30]:
print("Loading LiveJournal graph - 1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/liveJournal.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading LiveJournal graph - 1GB


In [31]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/liveJournal-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

9.26 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [32]:
print("Loading LiveJournal graph - 1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/liveJournal.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading LiveJournal graph - 1GB


In [33]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/liveJournal-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

8.68 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [34]:
print("Loading LiveJournal graph - 1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/liveJournal.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading LiveJournal graph - 1GB


In [35]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/liveJournal-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

9.23 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [36]:
print("Loading LiveJournal graph - 1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/liveJournal.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading LiveJournal graph - 1GB


In [37]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/liveJournal-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

8.87 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [38]:
print("Loading LiveJournal graph - 1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/liveJournal.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading LiveJournal graph - 1GB


In [39]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/liveJournal-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

8.47 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [40]:
print("Loading LiveJournal graph - 1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/liveJournal.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading LiveJournal graph - 1GB


In [41]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/liveJournal-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

9.07 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [42]:
print("Loading LiveJournal graph - 1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/liveJournal.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading LiveJournal graph - 1GB


In [43]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/liveJournal-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

8.52 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


### Orkut

In [44]:
print("Loading Orkut graph - 1.8GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut graph - 1.8GB


In [45]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

13.9 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [46]:
print("Loading Orkut graph - 1.8GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut graph - 1.8GB


In [47]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

16.2 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [48]:
print("Loading Orkut graph - 1.8GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut graph - 1.8GB


In [49]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

13.3 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [61]:
print("Loading Orkut graph - 1.8GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)


n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut graph - 1.8GB


In [62]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

12.1 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [52]:
print("Loading Orkut graph - 1.8GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut graph - 1.8GB


In [53]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

12.9 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [54]:
print("Loading Orkut graph - 1.8GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut graph - 1.8GB


In [55]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

13.6 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [56]:
print("Loading Orkut graph - 1.8GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut graph - 1.8GB


In [57]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

SystemError: CPUDispatcher(<function graph_encoder_embed at 0x127217ac0>) returned a result with an exception set

### Orkut-groups

In [None]:
print("Loading Orkut Groups graph - 5.1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut-groups.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

In [None]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-groups-Y40.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

In [None]:
print("Loading Orkut Groups graph - 5.1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut-groups.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

In [None]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-groups-Y40.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

In [None]:
print("Loading Orkut Groups graph - 5.1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut-groups.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

In [None]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-groups-Y40.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

In [None]:
print("Loading Orkut Groups graph - 5.1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut-groups.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

In [None]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-groups-Y40.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

In [None]:
print("Loading Orkut Groups graph - 5.1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut-groups.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

In [None]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-groups-Y40.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

In [None]:
print("Loading Orkut Groups graph - 5.1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut-groups.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

In [None]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-groups-Y40.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

In [None]:
print("Loading Orkut Groups graph - 5.1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut-groups.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

In [None]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-groups-Y40.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

### Friendster

In [None]:
print("Loading Friendster graph - 31GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/friendster-0indexed-colswapped.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

In [None]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/friendster-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

In [None]:
print("Loading Friendster graph - 31GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/friendster-0indexed-colswapped.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

In [None]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/friendster-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

In [None]:
print("Loading Friendster graph - 31GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/friendster-0indexed-colswapped.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

In [None]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/friendster-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

In [None]:
print("Loading Friendster graph - 31GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/friendster-0indexed-colswapped.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

In [None]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/friendster-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

In [None]:
print("Loading Friendster graph - 31GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/friendster-0indexed-colswapped.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

In [None]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/friendster-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

In [None]:
print("Loading Friendster graph - 31GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/friendster-0indexed-colswapped.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

In [None]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/friendster-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

In [None]:
print("Loading Friendster graph - 31GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/friendster-0indexed-colswapped.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1)))).astype(np.float64)

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

In [None]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/friendster-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)