In [1]:
from katana import distributed, do_all, do_all_operator, remote
from katana.distributed import (
    DataCommMode,
    GluonSubstrate,
    Graph,
    ReadLocation,
    Reducer,
    WorkDetection,
    WriteLocation,
    native
)
from katana.local import DynamicBitset
from katana.timer import StatTimer
import katana.remote

                Environment variable MODIN_ENGINE is not set to python, if you run into issues please try setting it by doing:
                import os
                os.environ['MODIN_ENGINE']='python'
                


In [2]:
client = remote.Client()

In [3]:
from katana.remote import import_data

ldbc_graph = client.create_graph()
import_data.rdg(ldbc_graph, "gs://katana-demo-datasets/rdg-datasets/v6/ldbc_003_")
graph = ldbc_graph.project(edge_types=["KNOWS"])

          0/? [?op/s]

In [4]:
print("graph id: ", graph.graph_id)
print("database id: ", graph.database_id)

graph id:  Fequ4eKQkwTAFiUVqek98nSH1qJfsviaUze3pqgx3DLE
database id:  DFVU4QakNqpvxLmfvQj4faJuB6EasnAaCihuWCVKzgjd


In [5]:
import numpy as np

INFINITY = np.iinfo(np.uint32).max

@do_all_operator()
def bfs_operator(graph, distance: np.ndarray, distance_bitset: DynamicBitset, working: WorkDetection, node_id: int):
    current_distance = distance[node_id]
    if current_distance < INFINITY:
        for edge_id in graph.out_edge_ids(node_id):
            dst = graph.out_edge_dst(edge_id)
            old = distance[dst]
            distance[dst] = min(old, distance[node_id] + 1)
            if distance[dst] < old:
                working.signal_worked()
                distance_bitset[dst] = True

@katana.remote.remote
def bfs(graph, source: int, output_property_name: str):
    timer = StatTimer()
    with timer:
        gluon = GluonSubstrate(graph, partition_agnostic=False, enforced_data_mode=DataCommMode.NoData)

        distance = np.empty((len(graph.node_ids()),), dtype=np.uint32)
        # Set the initial distances to our "infinite" value
        distance[:] = INFINITY
        # set the distance of the source node to 0 (if this worker has the source
        # node in its partition).
        if graph.user_to_local_id(source) is not None:
            distance[graph.user_to_local_id(source)] = 0

        distance_bitset = DynamicBitset(len(distance))

        working = WorkDetection(asynchronous=False)
        working.signal_worked()

        while working:
            working.reset()
            do_all(range(len(graph.node_ids())), bfs_operator(graph, distance, distance_bitset, working))

            gluon.sync(
                False,
                Reducer(distance, native.min),
                distance_bitset,
                ReadLocation.Destination,
                WriteLocation.Source,
            )

        graph.add_node_property({output_property_name: distance})

    graph.write()

    return timer.get()

In [6]:
time = bfs(graph, 1, "bfs")
print("Algorithm time:", time)

          0/? [?op/s]

Algorithm time: 857.073


In [7]:
result = ldbc_graph.query("MATCH (a:Person) WHERE a.bfs < 100 RETURN * ORDER BY a.bfs limit 100", contextualize=True)
result.head()

          0/? [?op/s]


    from distributed import Client

    client = Client()

2022-05-17 17:51:03,334 - distributed.diskutils - INFO - Found stale lock file and directory '/home/andrew/katana-enterprise/python/test/notebooks/docs/user-guides/apps/dask-worker-space/worker-hy365nq9', purging
2022-05-17 17:51:03,334 - distributed.diskutils - INFO - Found stale lock file and directory '/home/andrew/katana-enterprise/python/test/notebooks/docs/user-guides/apps/dask-worker-space/worker-5p278l2x', purging
2022-05-17 17:51:03,335 - distributed.diskutils - INFO - Found stale lock file and directory '/home/andrew/katana-enterprise/python/test/notebooks/docs/user-guides/apps/dask-worker-space/worker-947lmw2x', purging
2022-05-17 17:51:03,335 - distributed.diskutils - INFO - Found stale lock file and directory '/home/andrew/katana-enterprise/python/test/notebooks/docs/user-guides/apps/dask-worker-space/worker-ntl28n5n', purging
2022-05-17 17:51:03,335 - distributed.diskutils - INFO - Found stale lock file and direc

In [8]:
result.view()

VBox(children=(HTML(value='\n                <style>\n                #jp-main-content-panel .widget-container…

In [9]:
ldbc_graph.schema().view()

          0/? [?op/s]

VBox(children=(HTML(value='\n                <style>\n                #jp-main-content-panel .widget-container…