In [None]:

#  Setting display options 

import pandas as pd
   #
pd.set_option("display.width", 480)

#  Sets horizontal scroll for wide outputs
#
from IPython.display import display, HTML
display(HTML(""))

from tabulate import tabulate

print("--")


#  Setup stuff: Connectivity

In [None]:

from katana import remote
#  from katana.remote import import_data

my_client = remote.Client()

print(my_client)


In [None]:

NUM_PARTITIONS  = 3
   #
DB_NAME         = "my_db"
GRAPH_NAME      = "my_graph"

print("--")


In [None]:

#  CONNECT TO GRAPH

my_graph, *_ = my_client.get_database(name=DB_NAME).find_graphs_by_name(GRAPH_NAME)

print(my_graph)


# UDFs, Part 01: Mutating Nodes 

In [None]:

#  We will use this counter to generate a unique column name below-
#
l_cntr = 20

print("--")


In [None]:

#  MMM


#  Add a new column to each node, one that is derived from columns local to each node; approach 3
#
#     .  Here we see that if the new column value is None, then the property is not actually added
#        to the node. 
#

def my_func(i_graph, i_label, i_properties, i_new_colname):
    
   from katana.distributed import KeyedColumnNode
   import numpy as np
    
    
   l_node_props = {each: i_graph.nodes.get_property(each) for each in i_properties}                 #  An existing column, we will upshift this existing value
      #
   l_node_keys  = l_node_props[i_properties[0]].keys()                                              #  keys() works against any column in the node.
      #
   l_new_cols   = np.zeros(len(l_node_props[i_properties[0]]), dtype="object")                      #  An empty NumPy array, same length as l_node_props


   for l_index, l_key in enumerate(l_node_keys):                                                    #  Loop over the keys that are already in the graph
      if (i_label in i_graph.nodes.labels(l_key) ):                                                   
         #  Is Airport
         if (str(l_node_props[i_properties[0]][l_key]).upper() < "M"):                              #  Further testing what we can set to None
            l_new_cols[l_index] = str(l_node_props[i_properties[0]][l_key]).upper()
         else:
            l_new_cols[l_index] = None
      else:
         #  Not Airport
         l_new_cols[l_index] = None
    
   l_keyed_cols = KeyedColumnNode(l_new_cols, l_node_props[i_properties[0]], i_new_colname)         #  Building what we need to send to add_property()
      #
   i_graph.nodes.add_property(l_keyed_cols)


   return

l_cntr   += 1
l_newcol =  "newcol_" + str(l_cntr).zfill(4)

   #
l_result = my_graph.run(lambda g: my_func(g, "Airport", ["airport_name"], l_newcol))

print("--")


In [None]:

#  NNN

#  Add a new column to each node, one that is derived from columns local to each node; approach 2
#
#     .  We moved properties to our comprehension technique. Why ?  When dealing with multiple
#        properties, this is handy.
#

def my_func(i_graph, i_label, i_properties, i_new_colname):
    
   from katana.distributed import KeyedColumnNode
   import numpy as np
    
    
   l_node_props = {each: i_graph.nodes.get_property(each) for each in i_properties}                 #  An existing column, we will upshift this existing value
      #
   l_node_keys  = l_node_props[i_properties[0]].keys()                                              #  keys() works against any column in the node.
      #
   l_new_cols   = np.zeros(len(l_node_props[i_properties[0]]), dtype="object")                      #  An empty NumPy array, same length as l_node_props


   for l_index, l_key in enumerate(l_node_keys):                                                    #  Loop over the keys that are already in the graph
      l_new_cols[l_index] = str(l_node_props[i_properties[0]][l_key]).upper()                       #  Derive a new column property value

    
   l_keyed_cols = KeyedColumnNode(l_new_cols, l_node_props[i_properties[0]], i_new_colname)         #  Building what we need to send to add_property()
      #
   i_graph.nodes.add_property(l_keyed_cols)


   return

l_cntr   += 1
l_newcol =  "newcol_" + str(l_cntr).zfill(4)

   #
l_result = my_graph.run(lambda g: my_func(g, "Airport", ["airport_name"], l_newcol))


print("--")


In [None]:

#  PPP

#  Output a raw listing of all columns, for all nodes
#
l_query  = """
   MATCH (n)
   RETURN DISTINCT LABELS(n) AS label, KEYS(n)
   """.format()

l_result = my_graph.query(l_query)
   #
print(tabulate(l_result, headers='keys', tablefmt='psql'))

#  SAmple output,
#
#     +----+----------------+------------------------------------------------------------------------------------------------------------+
#     |    | label          | KEYS(n)                                                                                                    |
#     |----+----------------+------------------------------------------------------------------------------------------------------------|
#     |  0 | ['Airport']    | ['LABEL', 'airport_name', 'id', 'newcol_0021', 'newcol_0022', 'newcol_0023', 'newcol_0024']                |
#     |  1 | ['Airport']    | ['LABEL', 'airport_name', 'id', 'newcol_0021', 'newcol_0022', 'newcol_0023', 'newcol_0024', 'newcol_0026'] |
#     |  2 | ['Restaurant'] | ['id', 'restaurant_name', 'newcol_0021', 'newcol_0022', 'newcol_0023']                                     |
#     +----+----------------+------------------------------------------------------------------------------------------------------------+


In [None]:

#  Error,
#
#     ---------------------------------------------------------------------------
#     OperationError                            Traceback (most recent call last)
#     Cell In [11], line 10
#           1 #  PPP
#           2 
#           3 #  Output a raw listing of all columns, for all nodes
#           4 #
#           5 l_query  = """
#           6    MATCH (n)
#           7    RETURN DISTINCT LABELS(n) AS label, KEYS(n)
#           8    """.format()
#     ---> 10 l_result = my_graph.query(l_query)
#          11    #
#          12 print(tabulate(l_result, headers='keys', tablefmt='psql'))
#     
#     File /opt/conda/lib/python3.8/site-packages/katana_enterprise/async_to_sync.py:249, in AsyncToSync.<locals>.do_wrap.<locals>.wrapper(self, *args, **kwargs)
#         246 @wraps(underlying_func)
#         247 def wrapper(self, *args, **kwargs):
#         248     return registry.async_to_sync(
#     --> 249         underlying_func(
#         250             get_self_func(self),
#         251             *(registry.sync_to_async(a) for a in args),
#         252             **{k: registry.sync_to_async(v) for k, v in kwargs.items()},
#         253         )
#         254     )
#     
#     File /opt/conda/lib/python3.8/site-packages/katana_enterprise/async_to_sync.py:176, in async_to_sync.<locals>.wrapper(timeout, *args, **kwargs)
#         166     registry = AsyncToSyncClassRegistry.get()
#         167     return registry.async_to_sync(
#         168         wait_for(
#         169             async_func(
#        (...)
#         174         )
#         175     )
#     --> 176 return wait_for(async_func(*args, **kwargs), timeout=timeout)
#     
#     File /opt/conda/lib/python3.8/site-packages/katana_enterprise/async_to_sync.py:147, in wait_for(coro, timeout)
#         145 try:
#         146     future = asyncio.run_coroutine_threadsafe(timeout_coro, loop=AsyncRunnerThread.get().loop)
#     --> 147     return future.result()
#         148 except KeyboardInterrupt:
#         149     inner_future.cancel()
#     
#     File /opt/conda/lib/python3.8/concurrent/futures/_base.py:444, in Future.result(self, timeout)
#         442     raise CancelledError()
#         443 elif self._state == FINISHED:
#     --> 444     return self.__get_result()
#         445 else:
#         446     raise TimeoutError()
#     
#     File /opt/conda/lib/python3.8/concurrent/futures/_base.py:389, in Future.__get_result(self)
#         387 if self._exception:
#         388     try:
#     --> 389         raise self._exception
#         390     finally:
#         391         # Break a reference cycle with the exception in self._exception
#         392         self = None
#     
#     File /opt/conda/lib/python3.8/asyncio/tasks.py:455, in wait_for(fut, timeout, loop)
#         450     warnings.warn("The loop argument is deprecated since Python 3.8, "
#         451                   "and scheduled for removal in Python 3.10.",
#         452                   DeprecationWarning, stacklevel=2)
#         454 if timeout is None:
#     --> 455     return await fut
#         457 if timeout <= 0:
#         458     fut = ensure_future(fut, loop=loop)
#     
#     File /opt/conda/lib/python3.8/site-packages/katana_enterprise/remote/aio/graph.py:299, in Graph.query(self, query, memory_usage_factor, contextualize, **parameters)
#         296 if memory_usage_factor:
#         297     parameters["__katana_internal_match_batch_limit_scale_factor"] = memory_usage_factor
#     --> 299 response = await self._run_query(query, parameters=parameters)
#         300 rows = response["result"]["rows"]
#         301 columns = response["result"]["columns"]
#     
#     File /opt/conda/lib/python3.8/site-packages/katana_enterprise/remote/aio/graph.py:277, in Graph._run_query(self, query, parameters, parquet)
#         273     data["cypher"]["output_type"] = "Parquet"
#         275 data = self._add_operation_metadata(data)
#     --> 277 res = await self._client._run_on_graph_and_wait(self, data)
#         279 return res
#     
#     File /opt/conda/lib/python3.8/site-packages/katana_enterprise/remote/aio/client.py:326, in Database._run_on_graph_and_wait(self, graph, data)
#         324 while attempt < max_attempts:
#         325     try:
#     --> 326         return await self._wait_op(op)
#         327     except errors.ServerCommunicationError:
#         328         logging.warning(f"error while waiting, retrying (attempt={attempt})")
#     
#     File /opt/conda/lib/python3.8/site-packages/katana_enterprise/remote/aio/client.py:263, in Database._wait_op(self, op)
#         261 async with _progress_bar() as pbar:
#         262     async with wait_fn(operation_id) as stream:
#     --> 263         async for update in stream:
#         264             status = update["status"]
#         266             for entry in update.get("progress", []):
#     
#     File /opt/conda/lib/python3.8/site-packages/asyncstdlib/builtins.py:445, in map(function, *iterable)
#         443 async for args in args_iter:
#         444     result = function(*args)
#     --> 445     yield await result
#     
#     File /opt/conda/lib/python3.8/site-packages/asyncstdlib/_core.py:134, in force_async.<locals>.async_wrapped(*args, **kwargs)
#         133 async def async_wrapped(*args: Any, **kwargs: Any) -> T:
#     --> 134     return call(*args, **kwargs)
#     
#     File /opt/conda/lib/python3.8/site-packages/katana_enterprise/rpc/client.py:169, in _OperationClient._event_stream.<locals>.parse_stream(line)
#         166 status = data["status"]
#         168 if status == "Failed":
#     --> 169     raise errors.OperationError(operation_id + " " + data["status_message"])
#         170 if status == "Canceled":
#         171     raise errors.CanceledError()
#     
#     OperationError: GspZ89FxgNe6rwMmkLGfHJhYeQPv3EiLWuHHQhixHZmt-mEfVtfAosjnqhy1C backtrace (QueryOperation.cpp:67): backtrace (Network.h:331): backtrace (QueryOperation.cpp:70): backtrace (OpGraph.cpp:560): backtrace (OpGraph.h:596): backtrace (OpGraph.cpp:571): backtrace (OpGraph.cpp:64): backtrace (ProjectOperator.cpp:184): backtrace (Evaluate.cpp:2008): backtrace (Evaluate.cpp:1444): backtrace (ListFunc.cpp:1098): backtrace (PropertiesFunc.cpp:109): backtrace (PropertiesFunc.cpp:57): backtrace (Evaluate.cpp:2008): backtrace (EvaluateSelection.cpp:168): backtrace (KeyedSelection.cpp:287): backtrace (KeyedSelection.cpp:124): backtrace (KeyedCommunication.h:422): backtrace (KeyedCommunication.h:393): (../../libgluon/include/katana/KeyedCommunication.h:316): backtrace: backtrace (ColumnDistOps.cpp:30): buffer_index out of range.: arrow error: arrow error: arrow error: arrow error



In [None]:

#  QQQ

#  Deleting a property from Nodes; approach 2
#
#  We'll show deleting a property just from one Node type; Airport
#


def my_func(i_graph, i_label, i_delcol):
    
   from katana.distributed import KeyedColumnNode
   import numpy as np
    
    
   l_node_props = i_graph.nodes.get_property(i_delcol)
      #
   l_node_keys  = l_node_props.keys()                                                               #  keys() works against any column in the node.
      #
   l_new_cols   = np.zeros(len(l_node_props), dtype="object")                                       #  An empty NumPy array, same length as l_node_props


   for l_index, l_key in enumerate(l_node_keys):                                                    #  Loop over the keys that are already in the graph
      if (i_label in i_graph.nodes.labels(l_key) ):                                                   
         #  Is Airport
         l_new_cols[l_index] = None
      else:
         #  Not Airport
         l_new_cols[l_index] = l_node_props[l_key]
    
   l_keyed_cols = KeyedColumnNode(l_new_cols, l_node_props, i_delcol)                               #  Building what we need to send to upsert_property()
      #
   i_graph.nodes.upsert_property(l_keyed_cols)


   return


l_newcol = "newcol_0022"
#  l_newcol = "newcol_0021"
   #
l_result = my_graph.run(lambda g: my_func(g, "Airport", l_newcol))

print("--")
