# Unweighted, Non-Laplacian

## GEE pure, Numba Serial, Numba Parallel

Edit `@jit` part of the code, then rerun these benchmarks to get the timing results

In [1]:
import numpy as np
import DataPreprocess

### Twitch

In [2]:
print("Loading Twitch graph")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/twitch.npy")
# G_edgelist = np.loadtxt("../../../Thesis-Graph-Data/twitch-SNAP-bidir-manually", delimiter=" ", dtype=np.int32)

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))#.astype(np.int32)
# Make sure G_edgelist isn't restricted to int-s

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Twitch graph


In [3]:
%%timeit


Y = np.load("../../../Thesis-Graph-Data/Ys/twitch-Y20.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

876 ms ± 111 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Pokec

In [4]:
print("Loading Pokec graph - 400MB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/pokec.npy")

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Pokec graph - 400MB


In [5]:
%%timeit


Y = np.load("../../../Thesis-Graph-Data/Ys/pokec-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

8.05 s ± 98.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### LiveJournal

In [6]:
print("Loading LiveJournal graph - 1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/liveJournal.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading LiveJournal graph - 1GB


In [7]:
%%timeit

Y = np.load("../../../Thesis-Graph-Data/Ys/liveJournal-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

23.2 s ± 177 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Orkut

In [8]:
print("Loading Orkut graph - 1.8GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut graph - 1.8GB


In [9]:
%%timeit

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

15.1 s ± 86 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Orkut-Groups 5.1GB

In [10]:
print("Loading Orkut Groups graph - 5.1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut-groups.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut Groups graph - 5.1GB


In [11]:
%%timeit

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-groups-Y40.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = False)

39.9 s ± 275 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Laplacian (also covers weighed)

<font color="orange">G_edgelist is edited, so repeated runs with timeit will result in divide-by-0 error</font>

## GEE Pure & Numba Serial

Make sure to checkout Main, not numba-parallel (makes Numba serial)

Make corresponding changes to code (`@jit`) and then rerun all below to get Laplacian results

### Twitch

In [12]:
import numpy as np
import DataPreprocess

In [13]:
print("Loading Twitch graph")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/twitch.npy")
# G_edgelist = np.loadtxt("../../../Thesis-Graph-Data/twitch-SNAP-bidir-manually", delimiter=" ", dtype=np.int32)

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))#.astype(np.int32)
# Make sure G_edgelist isn't restricted to int-s

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Twitch graph


In [14]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/twitch-Y20.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

10.4 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [15]:
print("Loading Twitch graph")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/twitch.npy")
# G_edgelist = np.loadtxt("../../../Thesis-Graph-Data/twitch-SNAP-bidir-manually", delimiter=" ", dtype=np.int32)

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))#.astype(np.int32)
# Make sure G_edgelist isn't restricted to int-s

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Twitch graph


In [16]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/twitch-Y20.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

5.83 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [17]:
print("Loading Twitch graph")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/twitch.npy")
# G_edgelist = np.loadtxt("../../../Thesis-Graph-Data/twitch-SNAP-bidir-manually", delimiter=" ", dtype=np.int32)

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))#.astype(np.int32)
# Make sure G_edgelist isn't restricted to int-s

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Twitch graph


In [18]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/twitch-Y20.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

5.75 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [19]:
print("Loading Twitch graph")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/twitch.npy")
# G_edgelist = np.loadtxt("../../../Thesis-Graph-Data/twitch-SNAP-bidir-manually", delimiter=" ", dtype=np.int32)

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))#.astype(np.int32)
# Make sure G_edgelist isn't restricted to int-s

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Twitch graph


In [20]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/twitch-Y20.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

5.93 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [21]:
print("Loading Twitch graph")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/twitch.npy")
# G_edgelist = np.loadtxt("../../../Thesis-Graph-Data/twitch-SNAP-bidir-manually", delimiter=" ", dtype=np.int32)

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))#.astype(np.int32)
# Make sure G_edgelist isn't restricted to int-s

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Twitch graph


In [22]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/twitch-Y20.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

5.72 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [23]:
print("Loading Twitch graph")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/twitch.npy")
# G_edgelist = np.loadtxt("../../../Thesis-Graph-Data/twitch-SNAP-bidir-manually", delimiter=" ", dtype=np.int32)

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))#.astype(np.int32)
# Make sure G_edgelist isn't restricted to int-s

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Twitch graph


In [24]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/twitch-Y20.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

5.71 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [25]:
print("Loading Twitch graph")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/twitch.npy")
# G_edgelist = np.loadtxt("../../../Thesis-Graph-Data/twitch-SNAP-bidir-manually", delimiter=" ", dtype=np.int32)

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))#.astype(np.int32)
# Make sure G_edgelist isn't restricted to int-s

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Twitch graph


In [26]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/twitch-Y20.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

5.84 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


### Pokec

In [27]:
print("Loading Pokec graph - 400MB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/pokec.npy")

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Pokec graph - 400MB


In [28]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/pokec-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

31.7 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [29]:
print("Loading Pokec graph - 400MB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/pokec.npy")

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Pokec graph - 400MB


In [30]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/pokec-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

31.1 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [31]:
print("Loading Pokec graph - 400MB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/pokec.npy")

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Pokec graph - 400MB


In [32]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/pokec-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

31.5 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [33]:
print("Loading Pokec graph - 400MB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/pokec.npy")

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Pokec graph - 400MB


In [34]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/pokec-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

31.6 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [35]:
print("Loading Pokec graph - 400MB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/pokec.npy")

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Pokec graph - 400MB


In [36]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/pokec-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

31.8 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [37]:
print("Loading Pokec graph - 400MB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/pokec.npy")

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Pokec graph - 400MB


In [38]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/pokec-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

31.5 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [39]:
print("Loading Pokec graph - 400MB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/pokec.npy")

G_edgelist = G_edgelist[G_edgelist[:, 0].argsort()] # Sort by first column

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading Pokec graph - 400MB


In [40]:
%%timeit -n 1 -r 1


Y = np.load("../../../Thesis-Graph-Data/Ys/pokec-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

33 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


### LiveJournal

In [41]:
print("Loading LiveJournal graph - 1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/liveJournal.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading LiveJournal graph - 1GB


In [42]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/liveJournal-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

1min 17s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [43]:
print("Loading LiveJournal graph - 1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/liveJournal.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading LiveJournal graph - 1GB


In [44]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/liveJournal-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

1min 17s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [45]:
print("Loading LiveJournal graph - 1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/liveJournal.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading LiveJournal graph - 1GB


In [46]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/liveJournal-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

1min 16s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [47]:
print("Loading LiveJournal graph - 1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/liveJournal.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading LiveJournal graph - 1GB


In [48]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/liveJournal-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

1min 16s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [49]:
print("Loading LiveJournal graph - 1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/liveJournal.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading LiveJournal graph - 1GB


In [50]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/liveJournal-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

1min 18s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [51]:
print("Loading LiveJournal graph - 1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/liveJournal.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading LiveJournal graph - 1GB


In [52]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/liveJournal-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

1min 15s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [53]:
print("Loading LiveJournal graph - 1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/liveJournal.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:,1]) + 1) # Nr. vertices

Loading LiveJournal graph - 1GB


In [54]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/liveJournal-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

1min 16s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


### Orkut

In [55]:
print("Loading Orkut graph - 1.8GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut graph - 1.8GB


In [56]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

1min 43s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [57]:
print("Loading Orkut graph - 1.8GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut graph - 1.8GB


In [58]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

1min 47s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [59]:
print("Loading Orkut graph - 1.8GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut graph - 1.8GB


In [60]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

1min 44s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [61]:
print("Loading Orkut graph - 1.8GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut graph - 1.8GB


In [62]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

1min 44s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [63]:
print("Loading Orkut graph - 1.8GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut graph - 1.8GB


In [64]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

1min 44s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [65]:
print("Loading Orkut graph - 1.8GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut graph - 1.8GB


In [66]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

1min 44s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [67]:
print("Loading Orkut graph - 1.8GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut graph - 1.8GB


In [68]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-Y50.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

1min 46s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


### Orkut-groups

In [69]:
print("Loading Orkut Groups graph - 5.1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut-groups.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut Groups graph - 5.1GB


In [70]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-groups-Y40.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

4min 47s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [71]:
print("Loading Orkut Groups graph - 5.1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut-groups.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut Groups graph - 5.1GB


In [72]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-groups-Y40.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

4min 48s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [73]:
print("Loading Orkut Groups graph - 5.1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut-groups.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut Groups graph - 5.1GB


In [74]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-groups-Y40.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

4min 51s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [75]:
print("Loading Orkut Groups graph - 5.1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut-groups.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut Groups graph - 5.1GB


In [76]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-groups-Y40.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

4min 47s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [77]:
print("Loading Orkut Groups graph - 5.1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut-groups.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut Groups graph - 5.1GB


In [78]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-groups-Y40.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

4min 52s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [79]:
print("Loading Orkut Groups graph - 5.1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut-groups.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut Groups graph - 5.1GB


In [80]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-groups-Y40.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

4min 49s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [81]:
print("Loading Orkut Groups graph - 5.1GB")

G_edgelist = np.load("../../../Thesis-Graph-Data/NPY-graphs/orkut-groups.npy")

# Add column of ones - weights
G_edgelist = np.hstack((G_edgelist, np.ones((G_edgelist.shape[0], 1))))

n = int(np.max(G_edgelist[:, 1]) + 1)  # Nr. vertices

Loading Orkut Groups graph - 5.1GB


In [82]:
%%timeit -n 1 -r 1

Y = np.load("../../../Thesis-Graph-Data/Ys/orkut-groups-Y40.npy") # For Ligra fairness - ligra cannot preload this
_ = DataPreprocess.graph_encoder_embed(G_edgelist, Y, n, Correlation = False, Laplacian = True)

4min 49s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
