#  Display options:

In [None]:

#  References for this Notebook,
#
#     Lines 135-139
#     https://github.com/KatanaGraph/solutions/blob/main/fsi/src/katana_ai.py
#     Lines 31-37
#     https://github.com/KatanaGraph/test-datasets/blob/450232fe5739f327ed0795030a4b9d05731586ee/rdg_datasets/two_self_loops_ai/add_features.py
#
#     https://docs.k9h.dev/latest/recipes/udf-recipes.html?highlight=nodes%20get_property
#
#     https://github.com/KatanaGraph/katana-enterprise/blob/master/python/test/integration/remote/remote_operations_test.py
#
#     https://github.com/KatanaGraph/katana-enterprise/tree/master/lonestar/analytics/distributed/experimental



In [None]:

#  Setting display options 

import pandas as pd
   #
pd.set_option("display.width", 480)

#  Sets horizontal scroll for wide outputs
#
from IPython.display import display, HTML
display(HTML(""))

from tabulate import tabulate


In [None]:

#  When using UDFs, these execute on another host.
#
#  As such, these methods will need to be copied and run locally also.
#
pd.set_option('display.max_columns', None)
pd.set_option("max_colwidth", None)


#  Setup stuff: Connectivity

In [None]:

from katana import remote
#  from katana.remote import import_data

my_client = remote.Client()

print(my_client)


In [None]:

NUM_PARTITIONS  = 3
   #
DB_NAME         = "my_db"
GRAPH_NAME      = "my_graph"

print("--")


In [None]:

#  CONNECT TO GRAPH

my_graph, *_ = my_client.get_database(name=DB_NAME).find_graphs_by_name(GRAPH_NAME)

print(my_graph)


In [None]:

display(my_graph.num_nodes())
display(my_graph.num_edges())


# UDFs, Part 01: Mutating Nodes 

In [None]:

def my_func(i_graph):
    
   help(i_graph.nodes.labels)
      #
   return

l_result = my_graph.run(lambda g: my_func(g))


In [None]:

#  Our code will be getting larger; demonstrate using nested functions with Python

def my_func_HHH(i_graph, i_properties):

   def bbb():
      print("HHH")
    
   print("GGG")
   bbb()
        
   return


l_result = my_graph.run(lambda g: my_func_HHH(g, ["id", "fname", "lname"]))
   #
print(l_result)


#  Sample output,
#
#     Host 0 output:
#     GGG
#     HHH
#     
#     Host 1 output:
#     GGG
#     HHH
#     
#     Host 2 output:
#     GGG
#     HHH
#     None


In [None]:

display(my_graph.num_nodes())
display(my_graph.num_edges())


In [None]:

# my_graph2 = my_graph.project(node_types=["Airport"], edge_types=[None])
my_graph2 = my_graph.project(node_types=["Airport"], edge_types=["FLIES_TO"])

print("--")


In [None]:

display(my_graph2.num_nodes())
display(my_graph2.num_edges())

#  OperationError: 9AgHAAGCdiiUykiKth3cm2rJW4ruWxpLTaf5ofuQkVQx-2dhBp6fa5UWarBkC7 
#  Projected views are not supported for query operation yet. 
#  (QueryOperation.cpp:30): not implemented: not implemented


In [None]:
#  View the graph; verify results

l_result = my_graph2.query("""

   MATCH (n)
   RETURN n
   
   """, contextualize=True)

l_result.view()

#  OperationError: Projected views are not supported for query operation yet. 
#  (QueryOperation.cpp:28): not implemented: not implemented


In [None]:

#  Working with a property, after a projection

def my_func(i_graph, i_property):
    
   for l_node in i_graph.nodes.get_property(i_property):
      print(l_node)
            
   return

l_result = my_graph2.run(lambda g: my_func(g, "id"))

#  Projection not used
#
#     Host 0 output:
#     DEN
#     ORD
#     SJC
#     
#     Host 1 output:
#     MKE
#     ORD
#     PAP
#     
#     Host 2 output:
#     ORD
#     SJC
#     DEN
#     MKE
#     PAP


In [None]:

#  Put the projection inside the UDF


def my_func(i_graph, i_property):
    
   l_graph2 = i_graph.project(node_types=["Airport"], edge_types=["FLIES_TO"])

   for l_node in l_graph2.nodes.get_property(i_property):
      print(l_node)
            
   return
    
    
l_result = my_graph.run(lambda g: my_func(g, "id"))

#  Projection not used
#
#  (Same output as above)


In [None]:

#  Working with edge data; approach 1
#
#     .  Just get something working
#

def my_func(i_graph, i_label):
    
   l_node_props = i_graph.nodes.    get_property("id"      )
   l_edge_props = i_graph.out_edges.get_property("DISTANCE")
    
   for l_node in i_graph.nodes.masters():
      if (i_label in i_graph.nodes.labels(l_node)):
         for l_edge in i_graph.out_edges(l_node):
          
            l_node_id       = l_node_props[l_node]
               #
            l_edge_src      = i_graph.out_edges.src(l_edge)
            l_edge_dst      = i_graph.out_edges.dst(l_edge)
               #
            l_edge_distance = l_edge_props[l_edge]
               #
            print("Given Airport: %-3s   Edge Src: %-3s / %-18s   Edge Dst: %-3s / %-18s   Distance: %-8s" % (
               l_node_id, l_node_props[l_edge_src], l_edge_src, l_node_props[l_edge_dst], l_edge_dst, l_edge_distance))
    
   return

l_result = my_graph.run(lambda g: my_func(g, "Airport"))

#  Sample output,     (fyi:  results may look different, as partitioning may be different between loads)
#
#     Host 0 output:
#     Given Airport: MKE   Edge Src: MKE / <lnid 0>             Edge Dst: ORD / <lnid 4294967294>    Distance: 66      
#     
#     Host 1 output:
#     Given Airport: ORD   Edge Src: ORD / <lnid 0>             Edge Dst: MKE / <lnid 4294967293>    Distance: 66      
#     Given Airport: ORD   Edge Src: ORD / <lnid 0>             Edge Dst: DEN / <lnid 4294967294>    Distance: 886     
#     Given Airport: SJC   Edge Src: SJC / <lnid 1>             Edge Dst: ORD / <lnid 0>             Distance: 1829    
#     Given Airport: SJC   Edge Src: SJC / <lnid 1>             Edge Dst: DEN / <lnid 4294967294>    Distance: 948     
#     Given Airport: SJC   Edge Src: SJC / <lnid 1>             Edge Dst: PAP / <lnid 4294967292>    Distance: None    
#     
#     Host 2 output:
#     Given Airport: DEN   Edge Src: DEN / <lnid 0>             Edge Dst: SJC / <lnid 4294967293>    Distance: 948     
#     Given Airport: DEN   Edge Src: DEN / <lnid 0>             Edge Dst: ORD / <lnid 4294967294>    Distance: 886     

#  Above,
#
#     .  Minimally, we show the out edge from SJC Airport to a non-Airport node.
#        This is because we did filter nodes on the Airport label, but looked at [ all ] out edges.
#
#     .  We add additional node and edge properties; move to managing that with less code.


In [None]:

#  Working with edge data; approach 2
#
#     .  Cleaner handling of properties
#     .  Filter out Restaurants
#

def my_func(i_graph, i_label, i_type, i_weight):
    
    
   l_node_props = {each: i_graph.nodes    .get_property(each) for each in [ "id"             ]}
   l_edge_props = {each: i_graph.out_edges.get_property(each) for each in [ "TYPE", i_weight ]}
    
   for l_node in i_graph.nodes.masters():
      if (i_label in i_graph.nodes.labels(l_node)):
         for l_edge in i_graph.out_edges(l_node):
            if(l_edge_props["TYPE"][l_edge] == i_type):
          
               l_node_id       = l_node_props["id"][l_node]
                  #
               l_edge_src      = i_graph.out_edges.src(l_edge)
               l_edge_dst      = i_graph.out_edges.dst(l_edge)
                  #
               l_edge_distance = l_edge_props[i_weight][l_edge]
                  #
               print("Given Airport: %-3s   Edge Src: %-3s / %-18s   Edge Dst: %-3s / %-18s   Distance: %-8s" % (
                  l_node_id, l_node_props["id"][l_edge_src], l_edge_src, l_node_props["id"][l_edge_dst], l_edge_dst, l_edge_distance))
                  
   return

l_result = my_graph.run(lambda g: my_func(g, "Airport", "FLIES_TO", "DISTANCE"))

#  Sample output,
#
#     Host 0 output:
#     Given Airport: MKE   Edge Src: MKE / <lnid 0>             Edge Dst: ORD / <lnid 4294967294>    Distance: 66      
#     
#     Host 1 output:
#     Given Airport: ORD   Edge Src: ORD / <lnid 0>             Edge Dst: MKE / <lnid 4294967293>    Distance: 66      
#     Given Airport: ORD   Edge Src: ORD / <lnid 0>             Edge Dst: DEN / <lnid 4294967294>    Distance: 886     
#     Given Airport: SJC   Edge Src: SJC / <lnid 1>             Edge Dst: ORD / <lnid 0>             Distance: 1829    
#     Given Airport: SJC   Edge Src: SJC / <lnid 1>             Edge Dst: DEN / <lnid 4294967294>    Distance: 948     
#     
#     Host 2 output:
#     Given Airport: DEN   Edge Src: DEN / <lnid 0>             Edge Dst: SJC / <lnid 4294967293>    Distance: 948     
#     Given Airport: DEN   Edge Src: DEN / <lnid 0>             Edge Dst: ORD / <lnid 4294967294>    Distance: 886     


In [None]:

#  Write to a file that is local to the worker node
#
#     (Why: poor person's debugging maybe ?)
#

def my_func(i_graph, i_property, i_label):
    
   l_node_prop = i_graph.nodes.get_property(i_property)
      #
   with open("/tmp/my_debug.txt", "w") as f:
         #
      for l_node in i_graph.nodes.masters():
         if (i_label in i_graph.nodes.labels(l_node) ):
            print(l_node_prop[l_node])
            f.write("Node: " + l_node_prop[l_node] + "\n")
     
   return

l_result = my_graph.run(lambda g: my_func(g, "id", "Airport"))

print(l_result)

#  Sample output,
#
#     root@amusing-fawn-compute-0:~# cat /tmp/my*
#     Node: MKE
#     
#     root@amusing-fawn-compute-1:~# cat /tmp/my*
#     Node: ORD
#     Node: SJC
#     
#     root@amusing-fawn-compute-2:~# cat /tmp/my*
#     Node: DEN


In [None]:

#  Actually returning data
#

def my_func(i_graph, i_property, i_label):
    
   from katana_enterprise.distributed import single_host
      #
   l_result = []
    
   l_node_prop = i_graph.nodes.get_property(i_property)
      #
   for l_node in i_graph.nodes.masters():
      if (i_label in i_graph.nodes.labels(l_node) ):
         print(l_node_prop[l_node])
         l_result.append(l_node_prop[l_node])

   #  return l_result
   #
   #     ValueError: Hosts returned more than one value. Please have only one host return a value while other's return a None.
   #     The returned results (index is the host # in list):
   #     [['MKE'], ['ORD', 'SJC'], ['DEN']]
                      
   return single_host(host=0, result=l_result)

l_return = my_graph.run(lambda g: my_func(g, "id", "Airport"))
   #
print(l_return)


#  Sample output,
#
#     Host 0 output:
#     MKE
#     
#     Host 1 output:
#     ORD
#     SJC
#     
#     Host 2 output:
#     DEN
#     ['MKE']


In [None]:

#  Return a list from multiple hosts ..
#


In [None]:

#  We will use this counter to generate a unique column name below-
#
l_cntr = 20

print("--")


In [None]:

#  Add a new column to each node, one that is derived from columns local to each node; approach 1
#
#     .  This will fail if the named new column already exists in the graph.
#     .  Just get this working, then make it more functional.
#

def my_func(i_graph, i_label, i_properties, i_new_colname):
    
   from katana.distributed import KeyedColumnNode
   import numpy as np
    
    
   l_node_props = i_graph.nodes.get_property(i_properties[0])                     #  An existing column, we will upshift this existing value
      #
   l_node_keys  = l_node_props.keys()                                             #  keys() works against any column in the node.
      #
   l_new_cols   = np.zeros(len(l_node_props), dtype="object")                     #  An empty NumPy array, same length as l_node_props


   for l_index, l_key in enumerate(l_node_keys):                                  #  Loop over the keys that are already in the graph
      l_new_cols[l_index] = str(l_node_props[l_key]).upper()                      #  Derive a new column property value

    
   l_keyed_cols = KeyedColumnNode(l_new_cols, l_node_props, i_new_colname)        #  Building what we need to send to add_property()
   i_graph.nodes.add_property(l_keyed_cols)


   return

l_cntr   += 1
l_newcol =  "newcol_" + str(l_cntr).zfill(4)

   #
l_result = my_graph.run(lambda g: my_func(g, "Airport", ["airport_name"], l_newcol))


print("--")


In [109]:


#  Add a new column to each node, one that is derived from columns local to each node; approach 2
#
#     .  This will fail if the named new column already exists in the graph.
#     .  We moved properties to our comprehension technique. Why ?  When dealing with multiple
#        properties, this is handy.
#

def my_func(i_graph, i_label, i_properties, i_new_colname):
    
   from katana.distributed import KeyedColumnNode
   import numpy as np
    
    
   l_node_props = {each: i_graph.nodes.get_property(each) for each in i_properties}                 #  An existing column, we will upshift this existing value
      #
   l_node_keys  = l_node_props[i_properties[0]].keys()                                              #  keys() works against any column in the node.
      #
   l_new_cols   = np.zeros(len(l_node_props[i_properties[0]]), dtype="object")                      #  An empty NumPy array, same length as l_node_props


   for l_index, l_key in enumerate(l_node_keys):                                                    #  Loop over the keys that are already in the graph
      l_new_cols[l_index] = str(l_node_props[i_properties[0]][l_key]).upper()                       #  Derive a new column property value

    
   l_keyed_cols = KeyedColumnNode(l_new_cols, l_node_props[i_properties[0]], i_new_colname)         #  Building what we need to send to add_property()
      #
   i_graph.nodes.add_property(l_keyed_cols)


   return

l_cntr   += 1
l_newcol =  "newcol_" + str(l_cntr).zfill(4)

   #
l_result = my_graph.run(lambda g: my_func(g, "Airport", ["airport_name"], l_newcol))


print("--")


          0/? [?op/s]

--


In [110]:

#  View the graph; verify results
#
#  Based on our return list, not easily certain if new column was added to Restaurant
#

l_query  = """
   MATCH (n)
   RETURN n.restaurant_name, n.airport_name, n.{0}
   """.format(l_newcol)

l_result = my_graph.query(l_query)

print(tabulate(l_result, headers='keys', tablefmt='psql'))

#  Sample output,
#
#     +----+-----------------------------+------------------+-----------------+
#     |    | n.restaurant_name           | n.airport_name   | n.newcol_0021   |
#     |----+-----------------------------+------------------+-----------------|
#     |  0 |                             | San Jose         | SAN JOSE        |
#     |  1 |                             | Chicago O-Hare   | CHICAGO O-HARE  |
#     |  2 |                             | Milwaukee        | MILWAUKEE       |
#     |  3 | Pappadeauxs Seafood Kitchen |                  | NONE            |
#     |  4 |                             | Denver           | DENVER          |
#     +----+-----------------------------+------------------+-----------------+


          0/? [?op/s]

+----+-----------------------------+------------------+-----------------+
|    | n.restaurant_name           | n.airport_name   | n.newcol_0113   |
|----+-----------------------------+------------------+-----------------|
|  0 |                             | Chicago O-Hare   | CHICAGO O-HARE  |
|  1 |                             | Denver           | DENVER          |
|  2 | Pappadeauxs Seafood Kitchen |                  | NONE            |
|  3 |                             | Milwaukee        | MILWAUKEE       |
|  4 |                             | San Jose         | SAN JOSE        |
+----+-----------------------------+------------------+-----------------+


In [None]:

#  View the graph; verify results
#
#  Confirming new column was added to Restaurant
#

l_query  = """
   MATCH (n: Restaurant)
   // RETURN n.restaurant_name, n.{0}
   RETURN n
   """.format(l_newcol)

l_result = my_graph.query(l_query)

print(tabulate(l_result, headers='keys', tablefmt='psql'))

#  Sample output,
#
#     +----+-----------------+----------------+--------+-----------------+-----------------------------+----------+
#     |    |   n.internal_id | n.labels       | n.id   | n.newcol_0021   | n.restaurant_name           | n.type   |
#     |----+-----------------+----------------+--------+-----------------+-----------------------------+----------|
#     |  0 | 562949953421313 | ['Restaurant'] | PAP    | NONE            | Pappadeauxs Seafood Kitchen | node     |
#     +----+-----------------+----------------+--------+-----------------+-----------------------------+----------+


In [119]:



def my_func(i_graph, i_label, i_properties, i_new_colname):
    
   from katana.distributed import KeyedColumnNode
   import numpy as np
    
    
   l_node_props = {each: i_graph.nodes.get_property(each) for each in i_properties}                 #  An existing column, we will upshift this existing value
      #
   l_node_keys  = l_node_props[i_properties[0]].keys()                                              #  keys() works against any column in the node.
      #
   l_new_cols   = np.zeros(len(l_node_props[i_properties[0]]), dtype="object")                      #  An empty NumPy array, same length as l_node_props


   for l_index, l_key in enumerate(l_node_keys):                                                    #  Loop over the keys that are already in the graph
      l_new_cols[l_index] = str(l_node_props[i_properties[0]][l_key]).upper()                       #  Derive a new column property value

    
    
   aaa = np.zeros(0, dtype="object")
                  
   for l_each in l_node_props[i_properties[0]]:
      aaa = np.append(aaa, l_each)
    
    
   #  l_keyed_cols = KeyedColumnNode(l_new_cols, l_node_props[i_properties[0]], i_new_colname)         #  Building what we need to send to add_property()
   l_keyed_cols = KeyedColumnNode(l_new_cols, aaa                          , i_new_colname)         #  Building what we need to send to add_property()
      #
   #  i_graph.nodes.add_property(l_keyed_cols)


   return

l_cntr   += 1
l_newcol =  "newcol_" + str(l_cntr).zfill(4)

   #
l_result = my_graph.run(lambda g: my_func(g, "Airport", ["airport_name"], l_newcol))







          0/? [?op/s]


Host 0 errors:
Traceback (most recent call last):
  File "/opt/miniconda/lib/python3.8/site-packages/katana_enterprise/worker/worker.py", line 86, in execute
    value = function(graph)
  File "/tmp/ipykernel_3864/559108063.py", line 37, in <lambda>
  File "/tmp/ipykernel_3864/559108063.py", line 26, in my_func
TypeError: __init__(): incompatible constructor arguments. The following argument types are supported:
    1. katana_enterprise.distributed_native.lspg.KeyedColumnNode(data: numpy.ndarray, like: katana_enterprise.distributed_native.lspg.KeyedColumnNode, name: Optional[str] = None)
    2. katana_enterprise.distributed_native.lspg.KeyedColumnNode(data: numpy.ndarray, like: katana::python::PseudoKeyedColumn<katana::GraphTypes::Node>, name: str)

Invoked with: array(['CHICAGO O-HARE', 'MILWAUKEE', 'DENVER'], dtype=object), array(['Chicago O-Hare', 'Milwaukee', 'Denver'], dtype=object), 'newcol_0121'

Host 1 errors:
Traceback (most recent call last):
  File "/opt/miniconda/lib/pytho

TypeError: __init__(): incompatible constructor arguments. The following argument types are supported:
    1. katana_enterprise.distributed_native.lspg.KeyedColumnNode(data: numpy.ndarray, like: katana_enterprise.distributed_native.lspg.KeyedColumnNode, name: Optional[str] = None)
    2. katana_enterprise.distributed_native.lspg.KeyedColumnNode(data: numpy.ndarray, like: katana::python::PseudoKeyedColumn<katana::GraphTypes::Node>, name: str)

Invoked with: array(['CHICAGO O-HARE', 'MILWAUKEE', 'DENVER'], dtype=object), array(['Chicago O-Hare', 'Milwaukee', 'Denver'], dtype=object), 'newcol_0121'

In [108]:

#  Same as above; approach 3
#
#     .  Do not add the property to node types other than our target (Airport)
#     .  Move to using upsert_property()    (slower, safer)
#



def my_func(i_graph, i_label, i_properties, i_new_colname):
    
   from katana.distributed import KeyedColumnNode
   import numpy as np
    
    
   l_node_props = {each: i_graph.nodes.get_property(each) for each in i_properties}                              #  An existing column, we will upshift this existing value
      #
   l_node_keys  = l_node_props[i_properties[0]].keys()                                                           #  keys() works against any column in the node.
      #
   l_new_cols   = np.zeros(0, dtype="object")                                                                    #  An empty NumPy array
   l_new_keys   = np.zeros(0, dtype="object")                                                                    #  An empty NumPy array
    

   for l_index, l_key in enumerate(l_node_keys):                                                                 #  Loop over the keys that are already in the graph
    
      l_new_cols = np.append(l_new_cols, str(l_node_props[i_properties[0]][l_key]).upper(), axis=None)           #  Derive a new column property value, append to array
        
      #  l_new_keys = np.append(l_new_keys, np.array(l_key)                                  , axis=None)           #  Append matching key value to array
      #  l_new_keys = np.append(l_new_keys, np.array(l_node_props["id"][l_key])              , axis=None)           #  Append matching key value to array
      #  l_new_keys = np.append(l_new_keys, l_node_props["id"][l_key]                        , axis=None)           #  Append matching key value to array
    
          
        
        
          
   l_keyed_cols = KeyedColumnNode(l_new_cols, l_new_keys, i_new_colname)                                         #  Building what we need to send to add_property()
      #
#  i_graph.nodes.add_property(l_keyed_cols)


   return

l_cntr   += 1
l_newcol =  "newcol_" + str(l_cntr).zfill(4)

   #
l_result = my_graph.run(lambda g: my_func(g, "Airport", ["airport_name", "id"], l_newcol))


print("--")



# katana.distributed.KeyedColumnNode



          0/? [?op/s]

TypeError: cannot pickle 'katana_enterprise.distributed_native.lspg.Node' object

#  UDFs, Part 04: Data enrichment/derivation ..

In [None]:

#  Same as above, but add a derived column


def my_func_III(i_graph, i_properties):
    
    
   from katana_enterprise.distributed import single_host
      #
   import numpy  as np
   import pandas as pd
    

   ##################################################################

   #  Nested function, load a Python DataFrame from the KG graph
    
   def l_load_df(i_graph, i_properties):
      l_nodes = []
    
      for l_property in i_properties:
         l_node = i_graph.get_node_property(l_property).to_numpy().reshape(-1, 1)
         l_nodes.append(l_node)
       
      l_array     = np.hstack(l_nodes)
      l_dataframe = pd.DataFrame(l_array, columns=i_properties)
         #
    
      return l_dataframe
    
   ##################################################################

   #  Nested function, add a new, derived column to the DataFrame
    
   def l_enrich_df(i_dataframe, i_source_property, i_new_property):
    
      def l_to_upper(i_str):
         if i_str.get(i_source_property) is not None:
            return str(i_str[i_source_property]).upper()
         else:
            return
    
      i_dataframe[i_new_property] = i_dataframe.apply(l_to_upper, axis=1)
    
      return

   ##################################################################
    
    
   l_dataframe = l_load_df(i_graph, i_properties)
      #
   l_enrich_df(l_dataframe, "fname", "fname_upper")

        
   return single_host(host=0, result=l_dataframe)


l_result = my_graph.run(lambda g: my_func_III(g, ["id", "fname", "lname"]))
   #
print(l_result)


#  Sample output,
#
#                         id   fname    lname fname_upper
#     0  1111-1111-1111-1111  Justin     Fine      JUSTIN
#     1  2222-2222-2222-2222  Thomas     Cook      THOMAS
#     2  3333-3333-3333-3333  Sameer  Iyengar      SAMEER
#     3  4444-4444-4444-4444   Brian  Spencer       BRIAN
#     4                  101    None     None        None
#     5                  102    None     None        None
#     6                  103    None     None        None
#     7                  104    None     None        None
#     8                  105    None     None        None
#     9                  106    None     None        None


# UDFs, Part 05: Just writing to the graph ..

In [None]:

def my_func_JJJ(i_graph):

   import pandas as pd
       
   l_array     = [
      [ "7777-7777-7777-7777", "Bob"     , "Person" ],
      [ "8888-8888-8888-8888", "Mary"    , "Person" ],
      [ "9999-9999-9999-9999", "Earl"    , "Person" ],
      ]
   l_dataframe = pd.DataFrame(l_array, columns=["id", "newcol", "type"])
    
   i_graph.upsert_node_property(l_dataframe)

    
my_graph.run(lambda g: my_func_JJJ(g))


#     Host 0 errors:
#           ...
#     ValueError: expected 10 rows found 3 instead (PropertyGraph.cpp:1282): invalid argument
#     
#     Host 1 errors:
#           ...
#     ValueError: expected 6 rows found 3 instead (PropertyGraph.cpp:1282): invalid argument
#     
#     Host 2 errors:
#           ...
#     ValueError: expected 0 rows found 3 instead (PropertyGraph.cpp:1282): invalid argument



In [None]:

#  Attempt 01 to match what is expected by upsert_node_property()

def my_func_KKK(i_graph):

   l_node_ids = i_graph.get_node_property("id")


   for l_node in i_graph.master_nodes():
      print(l_node)

    
my_graph.run(lambda g: my_func_KKK(g))


#     Host 0 output:
#     0
#     1
#     2
#     3
#     
#     Host 1 output:
#     0
#     1
#     2
#     3
#     4
#     5



In [None]:

#  This technique works differently than that above because we fetch the property titled, "id", which is present everywhere.
#
#  **  Arthur

def my_func_MMM(i_graph, i_properties):
    
   import numpy  as np
   import pandas as pd
    
   pd.set_option("display.max_columns", None)
   pd.set_option("max_colwidth", None)
    
   l_nodes = []
 
   for l_property in i_properties:
      l_node = i_graph.get_node_property(l_property).to_numpy().reshape(-1, 1)
      l_nodes.append(l_node)
    
   l_array     = np.hstack(l_nodes)
   l_dataframe = pd.DataFrame(l_array, columns=i_properties)
    
   
   #  todo-   Want better example
   #          Also, one that only affects given rows
   #
   # xxx = l_dataframe.assign(new_column=lambda x: (None if x.fname is None else "Mr. " + x.fname))

   xxx = l_dataframe.assign(new_column=lambda x: (x.fname))
    
   print(xxx)

   #  This statement returns, but I see no changes to the graph.
   #
   i_graph.upsert_node_property(xxx)
    
   #  This statement hangs
   #
   i_graph.write()
    
    
my_graph.run(lambda g: my_func_MMM(g, ["id", "fname"]))
    
    
    
    

In [None]:

#  View the graph; verify results

l_result = my_graph.query("""

   MATCH (n)
   RETURN n
   
   """, contextualize=True)

l_result.view()


In [None]:

def my_func(i_graph, i_properties):
    
   import numpy as np
   from katana_enterprise.distributed import single_host


   l_return = np.random.randint(1, 101, 4)                #  Generate an array[4] of random numbers
   print(l_return)
    
   return single_host(host=0, result=l_return)



my_return = my_graph.run(lambda g: my_func(g, ["xxx", "yyy"]))
   #
display("--")
display(my_return)


#  Sample output,
#
#     Host 0 output:
#     [ 5 14 50 87]
#     
#     Host 1 output:
#     [15 50 81 79]
#     
#     Host 2 output:
#     [48 36 97 66]
#     
#     '--'
#     
#     array([ 5, 14, 50, 87])


In [None]:

def my_func_JJJ(i_graph, i_properties):
    
    
   from katana_enterprise.distributed import single_host
      #
   import numpy  as np
   import pandas as pd
    

   ##################################################################

   #  Nested function, load a Python DataFrame from the KG graph
    
   def l_load_df(i_graph, i_properties):
      l_nodes = []
    
      for l_property in i_properties:
         l_node = i_graph.get_node_property(l_property).to_numpy().reshape(-1, 1)
         l_nodes.append(l_node)
       
      l_array     = np.hstack(l_nodes)
      l_dataframe = pd.DataFrame(l_array, columns=i_properties)
         #
    
      return l_dataframe
    
   ##################################################################

   #  Nested function, add a new, derived column to the DataFrame
    
   def l_enrich_df(i_dataframe, i_source_property, i_new_property):
    
      def l_to_upper(i_str):
         if i_str.get(i_source_property) is not None:
            return str(i_str[i_source_property]).upper()
         else:
            return
    
      i_dataframe[i_new_property] = i_dataframe.apply(l_to_upper, axis=1)
    
      return

   ##################################################################
    
   def l_save_df(i_graph, i_dataframe):
    
      # i_graph.upsert_node_property(i_dataframe)

    
    
   ##################################################################
    
   l_dataframe = l_load_df(i_graph, i_properties)
      #
   l_enrich_df(l_dataframe, "fname", "fname_upper")
      #
   l_save_df(i_graph, l_dataframe)

        
   return single_host(host=0, result=l_dataframe)


l_result = my_graph.run(lambda g: my_func_JJJ(g, ["id", "fname", "lname"]))
   #
print(l_result)

