In [1]:
# Default parameters
chunksize = '1GB'  # adjust based on hardware configuration
alpha = 0.85
max_iter = 100  # the number of iterations pagerank takes to converge is dependent on chunksize
tol = 1.0e-5

In [2]:
%store -r data_path

In [3]:
import time
from dask.distributed import Client, wait
import dask_cudf
import cugraph
from dask_cuda import LocalCUDACluster
import cugraph.dask as dcg
import cugraph.comms as Comms
from dask.utils import parse_bytes

In [4]:
cluster = LocalCUDACluster(threads_per_worker=1)
client = Client(cluster)
Comms.initialize(p2p=True)

In [5]:
# Start timer
t_start = time.time()

# Multi-GPU CSV reader
e_list = None
chunksize = parse_bytes(chunksize)
while e_list is None:
    try:
        e_list = dask_cudf.read_csv(data_path, 
                                    chunksize=chunksize,
                                    delimiter=' ', 
                                    names=['src', 'dst'], 
                                    dtype=['int32', 'int32'])
    except MemoryError:
        chunksize = chunksize // 10

G = cugraph.DiGraph()
G.from_dask_cudf_edgelist(e_list, source='src', destination='dst')

# Wait for the lazy reader
tmp = wait(client.compute(e_list.to_delayed()))

# Print time
print(time.time()-t_start, "s")

4.991689443588257 s


In [6]:
# Start timer
t_start = time.time()

# Get the pagerank scores
# https://github.com/rapidsai/cugraph/blob/branch-21.12/python/cugraph/cugraph/dask/link_analysis/pagerank.py
pr_ddf = dcg.pagerank(G, alpha=alpha, max_iter=max_iter, tol=tol)

# Print time
print(time.time()-t_start, "s")

1.4094202518463135 s


In [7]:
# Start timer
t_start = time.time()

# Dask Data Frame to regular cuDF Data Frame 
pr_df = pr_ddf.compute()

# Sort, descending order
pr_sorted_df = pr_df.sort_values('pagerank', ascending=False)

# Print time
print(time.time()-t_start, "s")

# Print the Top 3
print(pr_sorted_df.head(3))

0.6952381134033203 s
         pagerank   vertex
96319    0.000112  3720356
6674853  0.000110  2369816
61642    0.000101  2483638


In [8]:
Comms.destroy()
client.close()
cluster.close()